From 04f08eb44b5011493d77b602fdec29ff0f5c6cd5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Sep 2021 17:00:29 -0700
Subject: net/af_unix: fix a data-race in unix_dgram_poll

syzbot reported another data-race in af_unix [1]

Lets change __skb_insert() to use WRITE_ONCE() when changing
skb head qlen.

Also, change unix_dgram_poll() to use lockless version
of unix_recvq_full()

It is verry possible we can switch all/most unix_recvq_full()
to the lockless version, this will be done in a future kernel version.

[1] HEAD commit: 8596e589b787732c8346f0482919e83cc9362db1

BUG: KCSAN: data-race in skb_queue_tail / unix_dgram_poll

write to 0xffff88814eeb24e0 of 4 bytes by task 25815 on cpu 0:
 __skb_insert include/linux/skbuff.h:1938 [inline]
 __skb_queue_before include/linux/skbuff.h:2043 [inline]
 __skb_queue_tail include/linux/skbuff.h:2076 [inline]
 skb_queue_tail+0x80/0xa0 net/core/skbuff.c:3264
 unix_dgram_sendmsg+0xff2/0x1600 net/unix/af_unix.c:1850
 sock_sendmsg_nosec net/socket.c:703 [inline]
 sock_sendmsg net/socket.c:723 [inline]
 ____sys_sendmsg+0x360/0x4d0 net/socket.c:2392
 ___sys_sendmsg net/socket.c:2446 [inline]
 __sys_sendmmsg+0x315/0x4b0 net/socket.c:2532
 __do_sys_sendmmsg net/socket.c:2561 [inline]
 __se_sys_sendmmsg net/socket.c:2558 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2558
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88814eeb24e0 of 4 bytes by task 25834 on cpu 1:
 skb_queue_len include/linux/skbuff.h:1869 [inline]
 unix_recvq_full net/unix/af_unix.c:194 [inline]
 unix_dgram_poll+0x2bc/0x3e0 net/unix/af_unix.c:2777
 sock_poll+0x23e/0x260 net/socket.c:1288
 vfs_poll include/linux/poll.h:90 [inline]
 ep_item_poll fs/eventpoll.c:846 [inline]
 ep_send_events fs/eventpoll.c:1683 [inline]
 ep_poll fs/eventpoll.c:1798 [inline]
 do_epoll_wait+0x6ad/0xf00 fs/eventpoll.c:2226
 __do_sys_epoll_wait fs/eventpoll.c:2238 [inline]
 __se_sys_epoll_wait fs/eventpoll.c:2233 [inline]
 __x64_sys_epoll_wait+0xf6/0x120 fs/eventpoll.c:2233
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x3d/0x90 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000001b -> 0x00000001

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 25834 Comm: syz-executor.1 Tainted: G        W         5.14.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 86b18aaa2b5b ("skbuff: fix a data race in skb_queue_len()")
Cc: Qian Cai <cai@lca.pw>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6bdb0db3e825..841e2f0f5240 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1940,7 +1940,7 @@ static inline void __skb_insert(struct sk_buff *newsk,
 	WRITE_ONCE(newsk->prev, prev);
 	WRITE_ONCE(next->prev, newsk);
 	WRITE_ONCE(prev->next, newsk);
-	list->qlen++;
+	WRITE_ONCE(list->qlen, list->qlen + 1);
 }
 
 static inline void __skb_queue_splice(const struct sk_buff_head *list,
-- 
cgit v1.2.3


From 937f5d5ec642501d2dd3c91918685de30a932b34 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@zonque.org>
Date: Thu, 9 Sep 2021 18:01:28 -0700
Subject: Input: ads7846 - remove custom filter handling functions from pdata

The functions in the platform data struct to initialize, cleanup and
apply custom filters are not in use by any mainline board.

Remove support for them to pave the road for more cleanups to come.

The enum was moved as it has no users outside of the driver code
itself.

Signed-off-by: Daniel Mack <daniel@zonque.org>
Reviewed-by: Marco Felsch <m.felsch@pengutronix.de>
Link: https://lore.kernel.org/r/20210907200726.2034962-3-daniel@zonque.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/touchscreen/ads7846.c | 25 ++++++++-----------------
 include/linux/spi/ads7846.h         | 15 ---------------
 2 files changed, 8 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index ce2e14816af5..b9c8496155b7 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -142,13 +142,18 @@ struct ads7846 {
 
 	int			(*filter)(void *data, int data_idx, int *val);
 	void			*filter_data;
-	void			(*filter_cleanup)(void *data);
 	int			(*get_pendown_state)(void);
 	int			gpio_pendown;
 
 	void			(*wait_for_sync)(void);
 };
 
+enum ads7846_filter {
+	ADS7846_FILTER_OK,
+	ADS7846_FILTER_REPEAT,
+	ADS7846_FILTER_IGNORE,
+};
+
 /* leave chip selected when we're done, for quicker re-select? */
 #if	0
 #define	CS_CHANGE(xfer)	((xfer).cs_change = 1)
@@ -1277,15 +1282,7 @@ static int ads7846_probe(struct spi_device *spi)
 	ts->x_plate_ohms = pdata->x_plate_ohms ? : 400;
 	ts->vref_mv = pdata->vref_mv;
 
-	if (pdata->filter != NULL) {
-		if (pdata->filter_init != NULL) {
-			err = pdata->filter_init(pdata, &ts->filter_data);
-			if (err < 0)
-				goto err_free_mem;
-		}
-		ts->filter = pdata->filter;
-		ts->filter_cleanup = pdata->filter_cleanup;
-	} else if (pdata->debounce_max) {
+	if (pdata->debounce_max) {
 		ts->debounce_max = pdata->debounce_max;
 		if (ts->debounce_max < 2)
 			ts->debounce_max = 2;
@@ -1299,7 +1296,7 @@ static int ads7846_probe(struct spi_device *spi)
 
 	err = ads7846_setup_pendown(spi, ts, pdata);
 	if (err)
-		goto err_cleanup_filter;
+		goto err_free_mem;
 
 	if (pdata->penirq_recheck_delay_usecs)
 		ts->penirq_recheck_delay_usecs =
@@ -1425,9 +1422,6 @@ static int ads7846_probe(struct spi_device *spi)
  err_free_gpio:
 	if (!ts->get_pendown_state)
 		gpio_free(ts->gpio_pendown);
- err_cleanup_filter:
-	if (ts->filter_cleanup)
-		ts->filter_cleanup(ts->filter_data);
  err_free_mem:
 	input_free_device(input_dev);
 	kfree(packet);
@@ -1458,9 +1452,6 @@ static int ads7846_remove(struct spi_device *spi)
 		gpio_free(ts->gpio_pendown);
 	}
 
-	if (ts->filter_cleanup)
-		ts->filter_cleanup(ts->filter_data);
-
 	kfree(ts->packet);
 	kfree(ts);
 
diff --git a/include/linux/spi/ads7846.h b/include/linux/spi/ads7846.h
index 1a5eaef3b7f2..d424c1aadf38 100644
--- a/include/linux/spi/ads7846.h
+++ b/include/linux/spi/ads7846.h
@@ -1,17 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* linux/spi/ads7846.h */
 
-/* Touchscreen characteristics vary between boards and models.  The
- * platform_data for the device's "struct device" holds this information.
- *
- * It's OK if the min/max values are zero.
- */
-enum ads7846_filter {
-	ADS7846_FILTER_OK,
-	ADS7846_FILTER_REPEAT,
-	ADS7846_FILTER_IGNORE,
-};
-
 struct ads7846_platform_data {
 	u16	model;			/* 7843, 7845, 7846, 7873. */
 	u16	vref_delay_usecs;	/* 0 for external vref; etc */
@@ -51,10 +40,6 @@ struct ads7846_platform_data {
 	int	gpio_pendown_debounce;	/* platform specific debounce time for
 					 * the gpio_pendown */
 	int	(*get_pendown_state)(void);
-	int	(*filter_init)	(const struct ads7846_platform_data *pdata,
-				 void **filter_data);
-	int	(*filter)	(void *filter_data, int data_idx, int *val);
-	void	(*filter_cleanup)(void *filter_data);
 	void	(*wait_for_sync)(void);
 	bool	wakeup;
 	unsigned long irq_flags;
-- 
cgit v1.2.3


From 2f1aaf3ea666b737ad717b3d88667225aca23149 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 9 Sep 2021 08:49:59 -0700
Subject: bpf, mm: Fix lockdep warning triggered by
 stack_map_get_build_id_offset()

Currently the bpf selftest "get_stack_raw_tp" triggered the warning:

  [ 1411.304463] WARNING: CPU: 3 PID: 140 at include/linux/mmap_lock.h:164 find_vma+0x47/0xa0
  [ 1411.304469] Modules linked in: bpf_testmod(O) [last unloaded: bpf_testmod]
  [ 1411.304476] CPU: 3 PID: 140 Comm: systemd-journal Tainted: G        W  O      5.14.0+ #53
  [ 1411.304479] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
  [ 1411.304481] RIP: 0010:find_vma+0x47/0xa0
  [ 1411.304484] Code: de 48 89 ef e8 ba f5 fe ff 48 85 c0 74 2e 48 83 c4 08 5b 5d c3 48 8d bf 28 01 00 00 be ff ff ff ff e8 2d 9f d8 00 85 c0 75 d4 <0f> 0b 48 89 de 48 8
  [ 1411.304487] RSP: 0018:ffffabd440403db8 EFLAGS: 00010246
  [ 1411.304490] RAX: 0000000000000000 RBX: 00007f00ad80a0e0 RCX: 0000000000000000
  [ 1411.304492] RDX: 0000000000000001 RSI: ffffffff9776b144 RDI: ffffffff977e1b0e
  [ 1411.304494] RBP: ffff9cf5c2f50000 R08: ffff9cf5c3eb25d8 R09: 00000000fffffffe
  [ 1411.304496] R10: 0000000000000001 R11: 00000000ef974e19 R12: ffff9cf5c39ae0e0
  [ 1411.304498] R13: 0000000000000000 R14: 0000000000000000 R15: ffff9cf5c39ae0e0
  [ 1411.304501] FS:  00007f00ae754780(0000) GS:ffff9cf5fba00000(0000) knlGS:0000000000000000
  [ 1411.304504] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [ 1411.304506] CR2: 000000003e34343c CR3: 0000000103a98005 CR4: 0000000000370ee0
  [ 1411.304508] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [ 1411.304510] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [ 1411.304512] Call Trace:
  [ 1411.304517]  stack_map_get_build_id_offset+0x17c/0x260
  [ 1411.304528]  __bpf_get_stack+0x18f/0x230
  [ 1411.304541]  bpf_get_stack_raw_tp+0x5a/0x70
  [ 1411.305752] RAX: 0000000000000000 RBX: 5541f689495641d7 RCX: 0000000000000000
  [ 1411.305756] RDX: 0000000000000001 RSI: ffffffff9776b144 RDI: ffffffff977e1b0e
  [ 1411.305758] RBP: ffff9cf5c02b2f40 R08: ffff9cf5ca7606c0 R09: ffffcbd43ee02c04
  [ 1411.306978]  bpf_prog_32007c34f7726d29_bpf_prog1+0xaf/0xd9c
  [ 1411.307861] R10: 0000000000000001 R11: 0000000000000044 R12: ffff9cf5c2ef60e0
  [ 1411.307865] R13: 0000000000000005 R14: 0000000000000000 R15: ffff9cf5c2ef6108
  [ 1411.309074]  bpf_trace_run2+0x8f/0x1a0
  [ 1411.309891] FS:  00007ff485141700(0000) GS:ffff9cf5fae00000(0000) knlGS:0000000000000000
  [ 1411.309896] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [ 1411.311221]  syscall_trace_enter.isra.20+0x161/0x1f0
  [ 1411.311600] CR2: 00007ff48514d90e CR3: 0000000107114001 CR4: 0000000000370ef0
  [ 1411.312291]  do_syscall_64+0x15/0x80
  [ 1411.312941] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [ 1411.313803]  entry_SYSCALL_64_after_hwframe+0x44/0xae
  [ 1411.314223] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [ 1411.315082] RIP: 0033:0x7f00ad80a0e0
  [ 1411.315626] Call Trace:
  [ 1411.315632]  stack_map_get_build_id_offset+0x17c/0x260

To reproduce, first build `test_progs` binary:

  make -C tools/testing/selftests/bpf -j60

and then run the binary at tools/testing/selftests/bpf directory:

  ./test_progs -t get_stack_raw_tp

The warning is due to commit 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked()
annotations to find_vma*()") which added mmap_assert_locked() in find_vma()
function. The mmap_assert_locked() function asserts that mm->mmap_lock needs
to be held. But this is not the case for bpf_get_stack() or bpf_get_stackid()
helper (kernel/bpf/stackmap.c), which uses mmap_read_trylock_non_owner()
instead. Since mm->mmap_lock is not held in bpf_get_stack[id]() use case,
the above warning is emitted during test run.

This patch fixed the issue by (1). using mmap_read_trylock() instead of
mmap_read_trylock_non_owner() to satisfy lockdep checking in find_vma(), and
(2). droping lockdep for mmap_lock right before the irq_work_queue(). The
function mmap_read_trylock_non_owner() is also removed since after this
patch nobody calls it any more.

Fixes: 5b78ed24e8ec ("mm/pagemap: add mmap_assert_locked() annotations to find_vma*()")
Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Luigi Rizzo <lrizzo@google.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: linux-mm@kvack.org
Link: https://lore.kernel.org/bpf/20210909155000.1610299-1-yhs@fb.com
---
 include/linux/mmap_lock.h |  9 ---------
 kernel/bpf/stackmap.c     | 10 ++++++++--
 2 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 0540f0156f58..3af8f7fb067d 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -144,15 +144,6 @@ static inline void mmap_read_unlock(struct mm_struct *mm)
 	__mmap_lock_trace_released(mm, false);
 }
 
-static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
-{
-	if (mmap_read_trylock(mm)) {
-		rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
-		return true;
-	}
-	return false;
-}
-
 static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
 {
 	up_read_non_owner(&mm->mmap_lock);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index e8eefdf8cf3e..09a3fd97d329 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -179,7 +179,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	 * with build_id.
 	 */
 	if (!user || !current || !current->mm || irq_work_busy ||
-	    !mmap_read_trylock_non_owner(current->mm)) {
+	    !mmap_read_trylock(current->mm)) {
 		/* cannot access current->mm, fall back to ips */
 		for (i = 0; i < trace_nr; i++) {
 			id_offs[i].status = BPF_STACK_BUILD_ID_IP;
@@ -204,9 +204,15 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
 	}
 
 	if (!work) {
-		mmap_read_unlock_non_owner(current->mm);
+		mmap_read_unlock(current->mm);
 	} else {
 		work->mm = current->mm;
+
+		/* The lock will be released once we're out of interrupt
+		 * context. Tell lockdep that we've released it now so
+		 * it doesn't complain that we forgot to release it.
+		 */
+		rwsem_release(&current->mm->mmap_lock.dep_map, _RET_IP_);
 		irq_work_queue(&work->irq_work);
 	}
 }
-- 
cgit v1.2.3


From cb19c107979b8000825060cb5b84dfe9212f4f86 Mon Sep 17 00:00:00 2001
From: Yongqiang Niu <yongqiang.niu@mediatek.com>
Date: Mon, 2 Aug 2021 16:59:32 +0800
Subject: soc: mediatek: mmsys: add comp OVL_2L2/POSTMASK/RDMA4

This patch add some more ddp component
OVL_2L2 is ovl which include 2 layers overlay
POSTMASK control round corner for display frame
RDMA4 read dma buffer

Signed-off-by: Yongqiang Niu <yongqiang.niu@mediatek.com>
Reviewed-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: https://lore.kernel.org/r/1627894773-23872-2-git-send-email-yongqiang.niu@mediatek.com
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 include/linux/soc/mediatek/mtk-mmsys.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/mediatek/mtk-mmsys.h b/include/linux/soc/mediatek/mtk-mmsys.h
index 2228bf6133da..4bba275e235a 100644
--- a/include/linux/soc/mediatek/mtk-mmsys.h
+++ b/include/linux/soc/mediatek/mtk-mmsys.h
@@ -29,13 +29,16 @@ enum mtk_ddp_comp_id {
 	DDP_COMPONENT_OVL0,
 	DDP_COMPONENT_OVL_2L0,
 	DDP_COMPONENT_OVL_2L1,
+	DDP_COMPONENT_OVL_2L2,
 	DDP_COMPONENT_OVL1,
+	DDP_COMPONENT_POSTMASK0,
 	DDP_COMPONENT_PWM0,
 	DDP_COMPONENT_PWM1,
 	DDP_COMPONENT_PWM2,
 	DDP_COMPONENT_RDMA0,
 	DDP_COMPONENT_RDMA1,
 	DDP_COMPONENT_RDMA2,
+	DDP_COMPONENT_RDMA4,
 	DDP_COMPONENT_UFOE,
 	DDP_COMPONENT_WDMA0,
 	DDP_COMPONENT_WDMA1,
-- 
cgit v1.2.3


From f55e36d5ab76c3097ff36ecea60b91c6b0d80fc8 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Mon, 13 Sep 2021 10:50:24 +0300
Subject: qed: Improve the stack space of filter_config()

As it was reported and discussed in: https://lore.kernel.org/lkml/CAHk-=whF9F89vsfH8E9TGc0tZA-yhzi2Di8wOtquNB5vRkFX5w@mail.gmail.com/
This patch improves the stack space of qede_config_rx_mode() by
splitting filter_config() to 3 functions and removing the
union qed_filter_type_params.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_l2.c       | 23 ++-----------
 drivers/net/ethernet/qlogic/qede/qede_filter.c | 47 +++++++++++---------------
 include/linux/qed/qed_eth_if.h                 | 21 +++++-------
 3 files changed, 30 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index dfaf10edfabf..ba8c7a31cce1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2763,25 +2763,6 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev,
 	return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL);
 }
 
-static int qed_configure_filter(struct qed_dev *cdev,
-				struct qed_filter_params *params)
-{
-	enum qed_filter_rx_mode_type accept_flags;
-
-	switch (params->type) {
-	case QED_FILTER_TYPE_UCAST:
-		return qed_configure_filter_ucast(cdev, &params->filter.ucast);
-	case QED_FILTER_TYPE_MCAST:
-		return qed_configure_filter_mcast(cdev, &params->filter.mcast);
-	case QED_FILTER_TYPE_RX_MODE:
-		accept_flags = params->filter.accept_flags;
-		return qed_configure_filter_rx_mode(cdev, accept_flags);
-	default:
-		DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type);
-		return -EINVAL;
-	}
-}
-
 static int qed_configure_arfs_searcher(struct qed_dev *cdev,
 				       enum qed_filter_config_mode mode)
 {
@@ -2904,7 +2885,9 @@ static const struct qed_eth_ops qed_eth_ops_pass = {
 	.q_rx_stop = &qed_stop_rxq,
 	.q_tx_start = &qed_start_txq,
 	.q_tx_stop = &qed_stop_txq,
-	.filter_config = &qed_configure_filter,
+	.filter_config_rx_mode = &qed_configure_filter_rx_mode,
+	.filter_config_ucast = &qed_configure_filter_ucast,
+	.filter_config_mcast = &qed_configure_filter_mcast,
 	.fastpath_stop = &qed_fastpath_stop,
 	.eth_cqe_completion = &qed_fp_cqe_completion,
 	.get_vport_stats = &qed_get_vport_stats,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index a2e4dfb5cb44..f99b085b56a5 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -619,30 +619,28 @@ static int qede_set_ucast_rx_mac(struct qede_dev *edev,
 				 enum qed_filter_xcast_params_type opcode,
 				 unsigned char mac[ETH_ALEN])
 {
-	struct qed_filter_params filter_cmd;
+	struct qed_filter_ucast_params ucast;
 
-	memset(&filter_cmd, 0, sizeof(filter_cmd));
-	filter_cmd.type = QED_FILTER_TYPE_UCAST;
-	filter_cmd.filter.ucast.type = opcode;
-	filter_cmd.filter.ucast.mac_valid = 1;
-	ether_addr_copy(filter_cmd.filter.ucast.mac, mac);
+	memset(&ucast, 0, sizeof(ucast));
+	ucast.type = opcode;
+	ucast.mac_valid = 1;
+	ether_addr_copy(ucast.mac, mac);
 
-	return edev->ops->filter_config(edev->cdev, &filter_cmd);
+	return edev->ops->filter_config_ucast(edev->cdev, &ucast);
 }
 
 static int qede_set_ucast_rx_vlan(struct qede_dev *edev,
 				  enum qed_filter_xcast_params_type opcode,
 				  u16 vid)
 {
-	struct qed_filter_params filter_cmd;
+	struct qed_filter_ucast_params ucast;
 
-	memset(&filter_cmd, 0, sizeof(filter_cmd));
-	filter_cmd.type = QED_FILTER_TYPE_UCAST;
-	filter_cmd.filter.ucast.type = opcode;
-	filter_cmd.filter.ucast.vlan_valid = 1;
-	filter_cmd.filter.ucast.vlan = vid;
+	memset(&ucast, 0, sizeof(ucast));
+	ucast.type = opcode;
+	ucast.vlan_valid = 1;
+	ucast.vlan = vid;
 
-	return edev->ops->filter_config(edev->cdev, &filter_cmd);
+	return edev->ops->filter_config_ucast(edev->cdev, &ucast);
 }
 
 static int qede_config_accept_any_vlan(struct qede_dev *edev, bool action)
@@ -1057,18 +1055,17 @@ static int qede_set_mcast_rx_mac(struct qede_dev *edev,
 				 enum qed_filter_xcast_params_type opcode,
 				 unsigned char *mac, int num_macs)
 {
-	struct qed_filter_params filter_cmd;
+	struct qed_filter_mcast_params mcast;
 	int i;
 
-	memset(&filter_cmd, 0, sizeof(filter_cmd));
-	filter_cmd.type = QED_FILTER_TYPE_MCAST;
-	filter_cmd.filter.mcast.type = opcode;
-	filter_cmd.filter.mcast.num = num_macs;
+	memset(&mcast, 0, sizeof(mcast));
+	mcast.type = opcode;
+	mcast.num = num_macs;
 
 	for (i = 0; i < num_macs; i++, mac += ETH_ALEN)
-		ether_addr_copy(filter_cmd.filter.mcast.mac[i], mac);
+		ether_addr_copy(mcast.mac[i], mac);
 
-	return edev->ops->filter_config(edev->cdev, &filter_cmd);
+	return edev->ops->filter_config_mcast(edev->cdev, &mcast);
 }
 
 int qede_set_mac_addr(struct net_device *ndev, void *p)
@@ -1194,7 +1191,6 @@ void qede_config_rx_mode(struct net_device *ndev)
 {
 	enum qed_filter_rx_mode_type accept_flags;
 	struct qede_dev *edev = netdev_priv(ndev);
-	struct qed_filter_params rx_mode;
 	unsigned char *uc_macs, *temp;
 	struct netdev_hw_addr *ha;
 	int rc, uc_count;
@@ -1220,10 +1216,6 @@ void qede_config_rx_mode(struct net_device *ndev)
 
 	netif_addr_unlock_bh(ndev);
 
-	/* Configure the struct for the Rx mode */
-	memset(&rx_mode, 0, sizeof(struct qed_filter_params));
-	rx_mode.type = QED_FILTER_TYPE_RX_MODE;
-
 	/* Remove all previous unicast secondary macs and multicast macs
 	 * (configure / leave the primary mac)
 	 */
@@ -1271,8 +1263,7 @@ void qede_config_rx_mode(struct net_device *ndev)
 		qede_config_accept_any_vlan(edev, false);
 	}
 
-	rx_mode.filter.accept_flags = accept_flags;
-	edev->ops->filter_config(edev->cdev, &rx_mode);
+	edev->ops->filter_config_rx_mode(edev->cdev, accept_flags);
 out:
 	kfree(uc_macs);
 }
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 812a4d751163..4df0bf0a0864 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -145,12 +145,6 @@ struct qed_filter_mcast_params {
 	unsigned char mac[64][ETH_ALEN];
 };
 
-union qed_filter_type_params {
-	enum qed_filter_rx_mode_type accept_flags;
-	struct qed_filter_ucast_params ucast;
-	struct qed_filter_mcast_params mcast;
-};
-
 enum qed_filter_type {
 	QED_FILTER_TYPE_UCAST,
 	QED_FILTER_TYPE_MCAST,
@@ -158,11 +152,6 @@ enum qed_filter_type {
 	QED_MAX_FILTER_TYPES,
 };
 
-struct qed_filter_params {
-	enum qed_filter_type type;
-	union qed_filter_type_params filter;
-};
-
 struct qed_tunn_params {
 	u16 vxlan_port;
 	u8 update_vxlan_port;
@@ -314,8 +303,14 @@ struct qed_eth_ops {
 
 	int (*q_tx_stop)(struct qed_dev *cdev, u8 rss_id, void *handle);
 
-	int (*filter_config)(struct qed_dev *cdev,
-			     struct qed_filter_params *params);
+	int (*filter_config_rx_mode)(struct qed_dev *cdev,
+				     enum qed_filter_rx_mode_type type);
+
+	int (*filter_config_ucast)(struct qed_dev *cdev,
+				   struct qed_filter_ucast_params *params);
+
+	int (*filter_config_mcast)(struct qed_dev *cdev,
+				   struct qed_filter_mcast_params *params);
 
 	int (*fastpath_stop)(struct qed_dev *cdev);
 
-- 
cgit v1.2.3


From 537d3af1bee8ad1415fda9b622d1ea6d1ae76dfa Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Date: Mon, 12 Jul 2021 14:39:12 +0200
Subject: rpmsg: Fix rpmsg_create_ept return when RPMSG config is not defined

According to the description of the rpmsg_create_ept in rpmsg_core.c
the function should return NULL on error.

Fixes: 2c8a57088045 ("rpmsg: Provide function stubs for API")
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: https://lore.kernel.org/r/20210712123912.10672-1-arnaud.pouliquen@foss.st.com
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/rpmsg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index d97dcd049f18..a8dcf8a9ae88 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -231,7 +231,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev
 	/* This shouldn't be possible */
 	WARN_ON(1);
 
-	return ERR_PTR(-ENXIO);
+	return NULL;
 }
 
 static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len)
-- 
cgit v1.2.3


From 4eb6bd55cfb22ffc20652732340c4962f3ac9a91 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 10 Sep 2021 16:40:39 -0700
Subject: compiler.h: drop fallback overflow checkers

Once upgrading the minimum supported version of GCC to 5.1, we can drop
the fallback code for !COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW.

This is effectively a revert of commit f0907827a8a9 ("compiler.h: enable
builtin overflow checkers and add fallback code")

Link: https://github.com/ClangBuiltLinux/linux/issues/1438#issuecomment-916745801
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-clang.h     |  13 ----
 include/linux/compiler-gcc.h       |   4 --
 include/linux/overflow.h           | 138 +-----------------------------------
 tools/include/linux/compiler-gcc.h |   4 --
 tools/include/linux/overflow.h     | 140 +------------------------------------
 5 files changed, 6 insertions(+), 293 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 49b0ac8b6fd3..3c4de9b6c6e3 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -62,19 +62,6 @@
 #define __no_sanitize_coverage
 #endif
 
-/*
- * Not all versions of clang implement the type-generic versions
- * of the builtin overflow checkers. Fortunately, clang implements
- * __has_builtin allowing us to avoid awkward version
- * checks. Unfortunately, we don't know which version of gcc clang
- * pretends to be, so the macro may or may not be defined.
- */
-#if __has_builtin(__builtin_mul_overflow) && \
-    __has_builtin(__builtin_add_overflow) && \
-    __has_builtin(__builtin_sub_overflow)
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
 #if __has_feature(shadow_call_stack)
 # define __noscs	__attribute__((__no_sanitize__("shadow-call-stack")))
 #endif
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cb9217fc60af..3f7f6fa0e051 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -128,10 +128,6 @@
 #define __no_sanitize_coverage
 #endif
 
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 0f12345c21fb..4669632bd72b 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -6,12 +6,9 @@
 #include <linux/limits.h>
 
 /*
- * In the fallback code below, we need to compute the minimum and
- * maximum values representable in a given type. These macros may also
- * be useful elsewhere, so we provide them outside the
- * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
- *
- * It would seem more obvious to do something like
+ * We need to compute the minimum and maximum values representable in a given
+ * type. These macros may also be useful elsewhere. It would seem more obvious
+ * to do something like:
  *
  * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
  * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@@ -54,7 +51,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 	return unlikely(overflow);
 }
 
-#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
 /*
  * For simplicity and code hygiene, the fallback code below insists on
  * a, b and *d having the same type (similar to the min() and max()
@@ -90,134 +86,6 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 	__builtin_mul_overflow(__a, __b, __d);	\
 }))
 
-#else
-
-
-/* Checking for unsigned overflow is relatively easy without causing UB. */
-#define __unsigned_add_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = __a + __b;			\
-	*__d < __a;				\
-})
-#define __unsigned_sub_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = __a - __b;			\
-	__a < __b;				\
-})
-/*
- * If one of a or b is a compile-time constant, this avoids a division.
- */
-#define __unsigned_mul_overflow(a, b, d) ({		\
-	typeof(a) __a = (a);				\
-	typeof(b) __b = (b);				\
-	typeof(d) __d = (d);				\
-	(void) (&__a == &__b);				\
-	(void) (&__a == __d);				\
-	*__d = __a * __b;				\
-	__builtin_constant_p(__b) ?			\
-	  __b > 0 && __a > type_max(typeof(__a)) / __b : \
-	  __a > 0 && __b > type_max(typeof(__b)) / __a;	 \
-})
-
-/*
- * For signed types, detecting overflow is much harder, especially if
- * we want to avoid UB. But the interface of these macros is such that
- * we must provide a result in *d, and in fact we must produce the
- * result promised by gcc's builtins, which is simply the possibly
- * wrapped-around value. Fortunately, we can just formally do the
- * operations in the widest relevant unsigned type (u64) and then
- * truncate the result - gcc is smart enough to generate the same code
- * with and without the (u64) casts.
- */
-
-/*
- * Adding two signed integers can overflow only if they have the same
- * sign, and overflow has happened iff the result has the opposite
- * sign.
- */
-#define __signed_add_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = (u64)__a + (u64)__b;		\
-	(((~(__a ^ __b)) & (*__d ^ __a))	\
-		& type_min(typeof(__a))) != 0;	\
-})
-
-/*
- * Subtraction is similar, except that overflow can now happen only
- * when the signs are opposite. In this case, overflow has happened if
- * the result has the opposite sign of a.
- */
-#define __signed_sub_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = (u64)__a - (u64)__b;		\
-	((((__a ^ __b)) & (*__d ^ __a))		\
-		& type_min(typeof(__a))) != 0;	\
-})
-
-/*
- * Signed multiplication is rather hard. gcc always follows C99, so
- * division is truncated towards 0. This means that we can write the
- * overflow check like this:
- *
- * (a > 0 && (b > MAX/a || b < MIN/a)) ||
- * (a < -1 && (b > MIN/a || b < MAX/a) ||
- * (a == -1 && b == MIN)
- *
- * The redundant casts of -1 are to silence an annoying -Wtype-limits
- * (included in -Wextra) warning: When the type is u8 or u16, the
- * __b_c_e in check_mul_overflow obviously selects
- * __unsigned_mul_overflow, but unfortunately gcc still parses this
- * code and warns about the limited range of __b.
- */
-
-#define __signed_mul_overflow(a, b, d) ({				\
-	typeof(a) __a = (a);						\
-	typeof(b) __b = (b);						\
-	typeof(d) __d = (d);						\
-	typeof(a) __tmax = type_max(typeof(a));				\
-	typeof(a) __tmin = type_min(typeof(a));				\
-	(void) (&__a == &__b);						\
-	(void) (&__a == __d);						\
-	*__d = (u64)__a * (u64)__b;					\
-	(__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||	\
-	(__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
-	(__b == (typeof(__b))-1 && __a == __tmin);			\
-})
-
-
-#define check_add_overflow(a, b, d)	__must_check_overflow(		\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_add_overflow(a, b, d),			\
-			__unsigned_add_overflow(a, b, d)))
-
-#define check_sub_overflow(a, b, d)	__must_check_overflow(		\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_sub_overflow(a, b, d),			\
-			__unsigned_sub_overflow(a, b, d)))
-
-#define check_mul_overflow(a, b, d)	__must_check_overflow(		\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_mul_overflow(a, b, d),			\
-			__unsigned_mul_overflow(a, b, d)))
-
-#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
-
 /** check_shl_overflow() - Calculate a left-shifted value and check overflow
  *
  * @a: Value to be shifted
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 95c072b70d0e..a590a1dfafd9 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -38,7 +38,3 @@
 #endif
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 #define __scanf(a, b)	__attribute__((format(scanf, a, b)))
-
-#if GCC_VERSION >= 50100
-#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
-#endif
diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h
index 8712ff70995f..dcb0c1bf6866 100644
--- a/tools/include/linux/overflow.h
+++ b/tools/include/linux/overflow.h
@@ -5,12 +5,9 @@
 #include <linux/compiler.h>
 
 /*
- * In the fallback code below, we need to compute the minimum and
- * maximum values representable in a given type. These macros may also
- * be useful elsewhere, so we provide them outside the
- * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
- *
- * It would seem more obvious to do something like
+ * We need to compute the minimum and maximum values representable in a given
+ * type. These macros may also be useful elsewhere. It would seem more obvious
+ * to do something like:
  *
  * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
  * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
@@ -36,8 +33,6 @@
 #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
 #define type_min(T) ((T)((T)-type_max(T)-(T)1))
 
-
-#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
 /*
  * For simplicity and code hygiene, the fallback code below insists on
  * a, b and *d having the same type (similar to the min() and max()
@@ -73,135 +68,6 @@
 	__builtin_mul_overflow(__a, __b, __d);	\
 })
 
-#else
-
-
-/* Checking for unsigned overflow is relatively easy without causing UB. */
-#define __unsigned_add_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = __a + __b;			\
-	*__d < __a;				\
-})
-#define __unsigned_sub_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = __a - __b;			\
-	__a < __b;				\
-})
-/*
- * If one of a or b is a compile-time constant, this avoids a division.
- */
-#define __unsigned_mul_overflow(a, b, d) ({		\
-	typeof(a) __a = (a);				\
-	typeof(b) __b = (b);				\
-	typeof(d) __d = (d);				\
-	(void) (&__a == &__b);				\
-	(void) (&__a == __d);				\
-	*__d = __a * __b;				\
-	__builtin_constant_p(__b) ?			\
-	  __b > 0 && __a > type_max(typeof(__a)) / __b : \
-	  __a > 0 && __b > type_max(typeof(__b)) / __a;	 \
-})
-
-/*
- * For signed types, detecting overflow is much harder, especially if
- * we want to avoid UB. But the interface of these macros is such that
- * we must provide a result in *d, and in fact we must produce the
- * result promised by gcc's builtins, which is simply the possibly
- * wrapped-around value. Fortunately, we can just formally do the
- * operations in the widest relevant unsigned type (u64) and then
- * truncate the result - gcc is smart enough to generate the same code
- * with and without the (u64) casts.
- */
-
-/*
- * Adding two signed integers can overflow only if they have the same
- * sign, and overflow has happened iff the result has the opposite
- * sign.
- */
-#define __signed_add_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = (u64)__a + (u64)__b;		\
-	(((~(__a ^ __b)) & (*__d ^ __a))	\
-		& type_min(typeof(__a))) != 0;	\
-})
-
-/*
- * Subtraction is similar, except that overflow can now happen only
- * when the signs are opposite. In this case, overflow has happened if
- * the result has the opposite sign of a.
- */
-#define __signed_sub_overflow(a, b, d) ({	\
-	typeof(a) __a = (a);			\
-	typeof(b) __b = (b);			\
-	typeof(d) __d = (d);			\
-	(void) (&__a == &__b);			\
-	(void) (&__a == __d);			\
-	*__d = (u64)__a - (u64)__b;		\
-	((((__a ^ __b)) & (*__d ^ __a))		\
-		& type_min(typeof(__a))) != 0;	\
-})
-
-/*
- * Signed multiplication is rather hard. gcc always follows C99, so
- * division is truncated towards 0. This means that we can write the
- * overflow check like this:
- *
- * (a > 0 && (b > MAX/a || b < MIN/a)) ||
- * (a < -1 && (b > MIN/a || b < MAX/a) ||
- * (a == -1 && b == MIN)
- *
- * The redundant casts of -1 are to silence an annoying -Wtype-limits
- * (included in -Wextra) warning: When the type is u8 or u16, the
- * __b_c_e in check_mul_overflow obviously selects
- * __unsigned_mul_overflow, but unfortunately gcc still parses this
- * code and warns about the limited range of __b.
- */
-
-#define __signed_mul_overflow(a, b, d) ({				\
-	typeof(a) __a = (a);						\
-	typeof(b) __b = (b);						\
-	typeof(d) __d = (d);						\
-	typeof(a) __tmax = type_max(typeof(a));				\
-	typeof(a) __tmin = type_min(typeof(a));				\
-	(void) (&__a == &__b);						\
-	(void) (&__a == __d);						\
-	*__d = (u64)__a * (u64)__b;					\
-	(__b > 0   && (__a > __tmax/__b || __a < __tmin/__b)) ||	\
-	(__b < (typeof(__b))-1  && (__a > __tmin/__b || __a < __tmax/__b)) || \
-	(__b == (typeof(__b))-1 && __a == __tmin);			\
-})
-
-
-#define check_add_overflow(a, b, d)					\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_add_overflow(a, b, d),			\
-			__unsigned_add_overflow(a, b, d))
-
-#define check_sub_overflow(a, b, d)					\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_sub_overflow(a, b, d),			\
-			__unsigned_sub_overflow(a, b, d))
-
-#define check_mul_overflow(a, b, d)					\
-	__builtin_choose_expr(is_signed_type(typeof(a)),		\
-			__signed_mul_overflow(a, b, d),			\
-			__unsigned_mul_overflow(a, b, d))
-
-
-#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
-
 /**
  * array_size() - Calculate size of 2-dimensional array.
  *
-- 
cgit v1.2.3


From 4e59869aa6550657cb148ad49835605660ec9b88 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 10 Sep 2021 16:40:46 -0700
Subject: compiler-gcc.h: drop checks for older GCC versions

Now that GCC 5.1 is the minimally supported default, drop the values we
don't use.

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc.h       | 4 +---
 tools/include/linux/compiler-gcc.h | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3f7f6fa0e051..fd82ce169ce9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -98,10 +98,8 @@
 
 #if GCC_VERSION >= 70000
 #define KASAN_ABI_VERSION 5
-#elif GCC_VERSION >= 50000
+#else
 #define KASAN_ABI_VERSION 4
-#elif GCC_VERSION >= 40902
-#define KASAN_ABI_VERSION 3
 #endif
 
 #if __has_attribute(__no_sanitize_address__)
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index a590a1dfafd9..43d9a46d36f0 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -16,9 +16,7 @@
 # define __fallthrough __attribute__ ((fallthrough))
 #endif
 
-#if GCC_VERSION >= 40300
-# define __compiletime_error(message) __attribute__((error(message)))
-#endif /* GCC_VERSION >= 40300 */
+#define __compiletime_error(message) __attribute__((error(message)))
 
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-- 
cgit v1.2.3


From 6d2ef226f2f18d530e48ead0cb5704505628b797 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 13 Sep 2021 10:20:01 -0700
Subject: compiler_attributes.h: drop __has_attribute() support for gcc4

Now that GCC 5.1 is the minimally supported default, the manual
workaround for older gcc versions not having __has_attribute() are no
longer relevant and can be removed.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler_attributes.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 2487be0e7199..ba417a5c80af 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -20,26 +20,6 @@
  * Provide links to the documentation of each supported compiler, if it exists.
  */
 
-/*
- * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
- * In the meantime, to support gcc < 5, we implement __has_attribute
- * by hand.
- */
-#ifndef __has_attribute
-# define __has_attribute(x) __GCC4_has_attribute_##x
-# define __GCC4_has_attribute___assume_aligned__      1
-# define __GCC4_has_attribute___copy__                0
-# define __GCC4_has_attribute___designated_init__     0
-# define __GCC4_has_attribute___externally_visible__  1
-# define __GCC4_has_attribute___no_caller_saved_registers__ 0
-# define __GCC4_has_attribute___noclone__             1
-# define __GCC4_has_attribute___no_profile_instrument_function__ 0
-# define __GCC4_has_attribute___nonstring__           0
-# define __GCC4_has_attribute___no_sanitize_address__ 1
-# define __GCC4_has_attribute___no_sanitize_undefined__ 1
-# define __GCC4_has_attribute___fallthrough__         0
-#endif
-
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
  */
-- 
cgit v1.2.3


From df26327ea097eb78e7967c45df6b23010c43c28d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 13 Sep 2021 10:29:44 -0700
Subject: Drop some straggling mentions of gcc-4.9 as being stale

Fix up the admin-guide README file to the new gcc-5.1 requirement, and
remove a stale comment about gcc support for the __assume_aligned__
attribute.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/README.rst                    | 2 +-
 Documentation/translations/zh_CN/admin-guide/README.rst | 2 +-
 Documentation/translations/zh_TW/admin-guide/README.rst | 2 +-
 include/linux/compiler_attributes.h                     | 1 -
 4 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst
index 35314b63008c..caa3c09a5c3f 100644
--- a/Documentation/admin-guide/README.rst
+++ b/Documentation/admin-guide/README.rst
@@ -259,7 +259,7 @@ Configuring the kernel
 Compiling the kernel
 --------------------
 
- - Make sure you have at least gcc 4.9 available.
+ - Make sure you have at least gcc 5.1 available.
    For more information, refer to :ref:`Documentation/process/changes.rst <changes>`.
 
    Please note that you can still run a.out user programs with this kernel.
diff --git a/Documentation/translations/zh_CN/admin-guide/README.rst b/Documentation/translations/zh_CN/admin-guide/README.rst
index 669a022f6817..980eb20521cf 100644
--- a/Documentation/translations/zh_CN/admin-guide/README.rst
+++ b/Documentation/translations/zh_CN/admin-guide/README.rst
@@ -223,7 +223,7 @@ Linux内核5.x版本 <http://kernel.org/>
 编译内核
 ---------
 
- - 确保您至少有gcc 4.9可用。
+ - 确保您至少有gcc 5.1可用。
    有关更多信息，请参阅 :ref:`Documentation/process/changes.rst <changes>` 。
 
    请注意，您仍然可以使用此内核运行a.out用户程序。
diff --git a/Documentation/translations/zh_TW/admin-guide/README.rst b/Documentation/translations/zh_TW/admin-guide/README.rst
index b752e50359e6..6ce97edbab37 100644
--- a/Documentation/translations/zh_TW/admin-guide/README.rst
+++ b/Documentation/translations/zh_TW/admin-guide/README.rst
@@ -226,7 +226,7 @@ Linux內核5.x版本 <http://kernel.org/>
 編譯內核
 ---------
 
- - 確保您至少有gcc 4.9可用。
+ - 確保您至少有gcc 5.1可用。
    有關更多信息，請參閱 :ref:`Documentation/process/changes.rst <changes>` 。
 
    請注意，您仍然可以使用此內核運行a.out用戶程序。
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index ba417a5c80af..ee19cebabcf5 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -54,7 +54,6 @@
  * compiler should see some alignment anyway, when the return value is
  * massaged by 'flags = ptr & 3; ptr &= ~3;').
  *
- * Optional: only supported since gcc >= 4.9
  * Optional: not supported by icc
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
-- 
cgit v1.2.3


From 80f0a1f99983296be587325004acf72dd11eccd8 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Mon, 13 Sep 2021 12:02:56 +0200
Subject: workqueue: annotate alloc_workqueue() as printf

This also enables checking of allows alloc_ordered_workqueue().

Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2ebef6b1a3d6..74d3c1efd9bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -399,9 +399,8 @@ extern struct workqueue_struct *system_freezable_power_efficient_wq;
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-struct workqueue_struct *alloc_workqueue(const char *fmt,
-					 unsigned int flags,
-					 int max_active, ...);
+__printf(1, 4) struct workqueue_struct *
+alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...);
 
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
-- 
cgit v1.2.3


From c22ac2a3d4bd83411ebf0b1726e9e5fc4f5e7ebf Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 10 Sep 2021 11:33:50 -0700
Subject: perf: Enable branch record for software events

The typical way to access branch record (e.g. Intel LBR) is via hardware
perf_event. For CPUs with FREEZE_LBRS_ON_PMI support, PMI could capture
reliable LBR. On the other hand, LBR could also be useful in non-PMI
scenario. For example, in kretprobe or bpf fexit program, LBR could
provide a lot of information on what happened with the function. Add API
to use branch record for software use.

Note that, when the software event triggers, it is necessary to stop the
branch record hardware asap. Therefore, static_call is used to remove some
branch instructions in this process.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/20210910183352.3151445-2-songliubraving@fb.com
---
 arch/x86/events/intel/core.c | 67 ++++++++++++++++++++++++++++++++++++++++----
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/intel/lbr.c  | 20 ++++---------
 arch/x86/events/perf_event.h | 19 +++++++++++++
 include/linux/perf_event.h   | 23 +++++++++++++++
 kernel/events/core.c         |  2 ++
 6 files changed, 111 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..1248fc1937f8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2143,19 +2143,19 @@ static __initconst const u64 knl_hw_cache_extra_regs
  * However, there are some cases which may change PEBS status, e.g. PMI
  * throttle. The PEBS_ENABLE should be updated where the status changes.
  */
-static void __intel_pmu_disable_all(void)
+static __always_inline void __intel_pmu_disable_all(bool bts)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
-	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+	if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
 }
 
-static void intel_pmu_disable_all(void)
+static __always_inline void intel_pmu_disable_all(void)
 {
-	__intel_pmu_disable_all();
+	__intel_pmu_disable_all(true);
 	intel_pmu_pebs_disable_all();
 	intel_pmu_lbr_disable_all();
 }
@@ -2186,6 +2186,49 @@ static void intel_pmu_enable_all(int added)
 	__intel_pmu_enable_all(added, false);
 }
 
+static noinline int
+__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
+				  unsigned int cnt, unsigned long flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	intel_pmu_lbr_read();
+	cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
+
+	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
+	intel_pmu_enable_all(0);
+	local_irq_restore(flags);
+	return cnt;
+}
+
+static int
+intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+	unsigned long flags;
+
+	/* must not have branches... */
+	local_irq_save(flags);
+	__intel_pmu_disable_all(false); /* we don't care about BTS */
+	__intel_pmu_pebs_disable_all();
+	__intel_pmu_lbr_disable();
+	/*            ... until here */
+	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
+static int
+intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+	unsigned long flags;
+
+	/* must not have branches... */
+	local_irq_save(flags);
+	__intel_pmu_disable_all(false); /* we don't care about BTS */
+	__intel_pmu_pebs_disable_all();
+	__intel_pmu_arch_lbr_disable();
+	/*            ... until here */
+	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
 /*
  * Workaround for:
  *   Intel Errata AAK100 (model 26)
@@ -2929,7 +2972,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	intel_bts_disable_local();
 	cpuc->enabled = 0;
-	__intel_pmu_disable_all();
+	__intel_pmu_disable_all(true);
 	handled = intel_pmu_drain_bts_buffer();
 	handled += intel_bts_interrupt();
 	status = intel_pmu_get_status();
@@ -6283,9 +6326,21 @@ __init int intel_pmu_init(void)
 			x86_pmu.lbr_nr = 0;
 	}
 
-	if (x86_pmu.lbr_nr)
+	if (x86_pmu.lbr_nr) {
 		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
 
+		/* only support branch_stack snapshot for perfmon >= v2 */
+		if (x86_pmu.disable_all == intel_pmu_disable_all) {
+			if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
+				static_call_update(perf_snapshot_branch_stack,
+						   intel_pmu_snapshot_arch_branch_stack);
+			} else {
+				static_call_update(perf_snapshot_branch_stack,
+						   intel_pmu_snapshot_branch_stack);
+			}
+		}
+	}
+
 	intel_pmu_check_extra_regs(x86_pmu.extra_regs);
 
 	/* Support full width counters using alternative MSR range */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8647713276a7..ac5991fea9ee 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1301,7 +1301,7 @@ void intel_pmu_pebs_disable_all(void)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (cpuc->pebs_enabled)
-		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+		__intel_pmu_pebs_disable_all();
 }
 
 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e6d6eaeb4cb..6b72e9b55c69 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -228,20 +228,6 @@ static void __intel_pmu_lbr_enable(bool pmi)
 		wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
 }
 
-static void __intel_pmu_lbr_disable(void)
-{
-	u64 debugctl;
-
-	if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
-		wrmsrl(MSR_ARCH_LBR_CTL, 0);
-		return;
-	}
-
-	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
 void intel_pmu_lbr_reset_32(void)
 {
 	int i;
@@ -779,8 +765,12 @@ void intel_pmu_lbr_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
-	if (cpuc->lbr_users && !vlbr_exclude_host())
+	if (cpuc->lbr_users && !vlbr_exclude_host()) {
+		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+			return __intel_pmu_arch_lbr_disable();
+
 		__intel_pmu_lbr_disable();
+	}
 }
 
 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..0e3e596e33cd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1240,6 +1240,25 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
 	return intel_pmu_has_bts_period(event, hwc->sample_period);
 }
 
+static __always_inline void __intel_pmu_pebs_disable_all(void)
+{
+	wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static __always_inline void __intel_pmu_arch_lbr_disable(void)
+{
+	wrmsrl(MSR_ARCH_LBR_CTL, 0);
+}
+
+static __always_inline void __intel_pmu_lbr_disable(void)
+{
+	u64 debugctl;
+
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
 int intel_pmu_save_and_restart(struct perf_event *event);
 
 struct event_constraint *
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe156a8170aa..0cbc5dfe1110 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -57,6 +57,7 @@ struct perf_guest_info_callbacks {
 #include <linux/cgroup.h>
 #include <linux/refcount.h>
 #include <linux/security.h>
+#include <linux/static_call.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -1612,4 +1613,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
 extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
 #endif
 
+/*
+ * Snapshot branch stack on software events.
+ *
+ * Branch stack can be very useful in understanding software events. For
+ * example, when a long function, e.g. sys_perf_event_open, returns an
+ * errno, it is not obvious why the function failed. Branch stack could
+ * provide very helpful information in this type of scenarios.
+ *
+ * On software event, it is necessary to stop the hardware branch recorder
+ * fast. Otherwise, the hardware register/buffer will be flushed with
+ * entries of the triggering event. Therefore, static call is used to
+ * stop the hardware recorder.
+ */
+
+/*
+ * cnt is the number of entries allocated for entries.
+ * Return number of entries copied to .
+ */
+typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
+					   unsigned int cnt);
+DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
+
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 744e8726c5b2..349f80aa9e7d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13435,3 +13435,5 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
 	.threaded	= true,
 };
 #endif /* CONFIG_CGROUP_PERF */
+
+DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
-- 
cgit v1.2.3


From 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 14 Sep 2021 01:07:57 +0200
Subject: bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode

Fix cgroup v1 interference when non-root cgroup v2 BPF programs are used.
Back in the days, commit bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
embedded per-socket cgroup information into sock->sk_cgrp_data and in order
to save 8 bytes in struct sock made both mutually exclusive, that is, when
cgroup v1 socket tagging (e.g. net_cls/net_prio) is used, then cgroup v2
falls back to the root cgroup in sock_cgroup_ptr() (&cgrp_dfl_root.cgrp).

The assumption made was "there is no reason to mix the two and this is in line
with how legacy and v2 compatibility is handled" as stated in bd1060a1d671.
However, with Kubernetes more widely supporting cgroups v2 as well nowadays,
this assumption no longer holds, and the possibility of the v1/v2 mixed mode
with the v2 root fallback being hit becomes a real security issue.

Many of the cgroup v2 BPF programs are also used for policy enforcement, just
to pick _one_ example, that is, to programmatically deny socket related system
calls like connect(2) or bind(2). A v2 root fallback would implicitly cause
a policy bypass for the affected Pods.

In production environments, we have recently seen this case due to various
circumstances: i) a different 3rd party agent and/or ii) a container runtime
such as [0] in the user's environment configuring legacy cgroup v1 net_cls
tags, which triggered implicitly mentioned root fallback. Another case is
Kubernetes projects like kind [1] which create Kubernetes nodes in a container
and also add cgroup namespaces to the mix, meaning programs which are attached
to the cgroup v2 root of the cgroup namespace get attached to a non-root
cgroup v2 path from init namespace point of view. And the latter's root is
out of reach for agents on a kind Kubernetes node to configure. Meaning, any
entity on the node setting cgroup v1 net_cls tag will trigger the bypass
despite cgroup v2 BPF programs attached to the namespace root.

Generally, this mutual exclusiveness does not hold anymore in today's user
environments and makes cgroup v2 usage from BPF side fragile and unreliable.
This fix adds proper struct cgroup pointer for the cgroup v2 case to struct
sock_cgroup_data in order to address these issues; this implicitly also fixes
the tradeoffs being made back then with regards to races and refcount leaks
as stated in bd1060a1d671, and removes the fallback, so that cgroup v2 BPF
programs always operate as expected.

  [0] https://github.com/nestybox/sysbox/
  [1] https://kind.sigs.k8s.io/

Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/bpf/20210913230759.2313-1-daniel@iogearbox.net
---
 include/linux/cgroup-defs.h  | 107 +++++++++++--------------------------------
 include/linux/cgroup.h       |  22 +--------
 kernel/cgroup/cgroup.c       |  50 ++++----------------
 net/core/netclassid_cgroup.c |   7 +--
 net/core/netprio_cgroup.c    |  10 +---
 5 files changed, 41 insertions(+), 155 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index e1c705fdfa7c..db2e147e069f 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -752,107 +752,54 @@ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {}
  * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
  * per-socket cgroup information except for memcg association.
  *
- * On legacy hierarchies, net_prio and net_cls controllers directly set
- * attributes on each sock which can then be tested by the network layer.
- * On the default hierarchy, each sock is associated with the cgroup it was
- * created in and the networking layer can match the cgroup directly.
- *
- * To avoid carrying all three cgroup related fields separately in sock,
- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
- * On boot, sock_cgroup_data records the cgroup that the sock was created
- * in so that cgroup2 matches can be made; however, once either net_prio or
- * net_cls starts being used, the area is overridden to carry prioidx and/or
- * classid.  The two modes are distinguished by whether the lowest bit is
- * set.  Clear bit indicates cgroup pointer while set bit prioidx and
- * classid.
- *
- * While userland may start using net_prio or net_cls at any time, once
- * either is used, cgroup2 matching no longer works.  There is no reason to
- * mix the two and this is in line with how legacy and v2 compatibility is
- * handled.  On mode switch, cgroup references which are already being
- * pointed to by socks may be leaked.  While this can be remedied by adding
- * synchronization around sock_cgroup_data, given that the number of leaked
- * cgroups is bound and highly unlikely to be high, this seems to be the
- * better trade-off.
+ * On legacy hierarchies, net_prio and net_cls controllers directly
+ * set attributes on each sock which can then be tested by the network
+ * layer. On the default hierarchy, each sock is associated with the
+ * cgroup it was created in and the networking layer can match the
+ * cgroup directly.
  */
 struct sock_cgroup_data {
-	union {
-#ifdef __LITTLE_ENDIAN
-		struct {
-			u8	is_data : 1;
-			u8	no_refcnt : 1;
-			u8	unused : 6;
-			u8	padding;
-			u16	prioidx;
-			u32	classid;
-		} __packed;
-#else
-		struct {
-			u32	classid;
-			u16	prioidx;
-			u8	padding;
-			u8	unused : 6;
-			u8	no_refcnt : 1;
-			u8	is_data : 1;
-		} __packed;
+	struct cgroup	*cgroup; /* v2 */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	u32		classid; /* v1 */
+#endif
+#ifdef CONFIG_CGROUP_NET_PRIO
+	u16		prioidx; /* v1 */
 #endif
-		u64		val;
-	};
 };
 
-/*
- * There's a theoretical window where the following accessors race with
- * updaters and return part of the previous pointer as the prioidx or
- * classid.  Such races are short-lived and the result isn't critical.
- */
 static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
 {
-	/* fallback to 1 which is always the ID of the root cgroup */
-	return (skcd->is_data & 1) ? skcd->prioidx : 1;
+#ifdef CONFIG_CGROUP_NET_PRIO
+	return READ_ONCE(skcd->prioidx);
+#else
+	return 1;
+#endif
 }
 
 static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
 {
-	/* fallback to 0 which is the unconfigured default classid */
-	return (skcd->is_data & 1) ? skcd->classid : 0;
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	return READ_ONCE(skcd->classid);
+#else
+	return 0;
+#endif
 }
 
-/*
- * If invoked concurrently, the updaters may clobber each other.  The
- * caller is responsible for synchronization.
- */
 static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
 					   u16 prioidx)
 {
-	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
-	if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
-		return;
-
-	if (!(skcd_buf.is_data & 1)) {
-		skcd_buf.val = 0;
-		skcd_buf.is_data = 1;
-	}
-
-	skcd_buf.prioidx = prioidx;
-	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_PRIO
+	WRITE_ONCE(skcd->prioidx, prioidx);
+#endif
 }
 
 static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
 					   u32 classid)
 {
-	struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
-
-	if (sock_cgroup_classid(&skcd_buf) == classid)
-		return;
-
-	if (!(skcd_buf.is_data & 1)) {
-		skcd_buf.val = 0;
-		skcd_buf.is_data = 1;
-	}
-
-	skcd_buf.classid = classid;
-	WRITE_ONCE(skcd->val, skcd_buf.val);	/* see sock_cgroup_ptr() */
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	WRITE_ONCE(skcd->classid, classid);
+#endif
 }
 
 #else	/* CONFIG_SOCK_CGROUP_DATA */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 7bf60454a313..75c151413fda 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -829,33 +829,13 @@ static inline void cgroup_account_cputime_field(struct task_struct *task,
  */
 #ifdef CONFIG_SOCK_CGROUP_DATA
 
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-extern spinlock_t cgroup_sk_update_lock;
-#endif
-
-void cgroup_sk_alloc_disable(void);
 void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
 void cgroup_sk_clone(struct sock_cgroup_data *skcd);
 void cgroup_sk_free(struct sock_cgroup_data *skcd);
 
 static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
 {
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-	unsigned long v;
-
-	/*
-	 * @skcd->val is 64bit but the following is safe on 32bit too as we
-	 * just need the lower ulong to be written and read atomically.
-	 */
-	v = READ_ONCE(skcd->val);
-
-	if (v & 3)
-		return &cgrp_dfl_root.cgrp;
-
-	return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
-#else
-	return (struct cgroup *)(unsigned long)skcd->val;
-#endif
+	return skcd->cgroup;
 }
 
 #else	/* CONFIG_CGROUP_DATA */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 881ce1470beb..8afa8690d288 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6572,74 +6572,44 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
  */
 #ifdef CONFIG_SOCK_CGROUP_DATA
 
-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
-
-DEFINE_SPINLOCK(cgroup_sk_update_lock);
-static bool cgroup_sk_alloc_disabled __read_mostly;
-
-void cgroup_sk_alloc_disable(void)
-{
-	if (cgroup_sk_alloc_disabled)
-		return;
-	pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
-	cgroup_sk_alloc_disabled = true;
-}
-
-#else
-
-#define cgroup_sk_alloc_disabled	false
-
-#endif
-
 void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 {
-	if (cgroup_sk_alloc_disabled) {
-		skcd->no_refcnt = 1;
-		return;
-	}
-
 	/* Don't associate the sock with unrelated interrupted task's cgroup. */
 	if (in_interrupt())
 		return;
 
 	rcu_read_lock();
-
 	while (true) {
 		struct css_set *cset;
 
 		cset = task_css_set(current);
 		if (likely(cgroup_tryget(cset->dfl_cgrp))) {
-			skcd->val = (unsigned long)cset->dfl_cgrp;
+			skcd->cgroup = cset->dfl_cgrp;
 			cgroup_bpf_get(cset->dfl_cgrp);
 			break;
 		}
 		cpu_relax();
 	}
-
 	rcu_read_unlock();
 }
 
 void cgroup_sk_clone(struct sock_cgroup_data *skcd)
 {
-	if (skcd->val) {
-		if (skcd->no_refcnt)
-			return;
-		/*
-		 * We might be cloning a socket which is left in an empty
-		 * cgroup and the cgroup might have already been rmdir'd.
-		 * Don't use cgroup_get_live().
-		 */
-		cgroup_get(sock_cgroup_ptr(skcd));
-		cgroup_bpf_get(sock_cgroup_ptr(skcd));
-	}
+	struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+	/*
+	 * We might be cloning a socket which is left in an empty
+	 * cgroup and the cgroup might have already been rmdir'd.
+	 * Don't use cgroup_get_live().
+	 */
+	cgroup_get(cgrp);
+	cgroup_bpf_get(cgrp);
 }
 
 void cgroup_sk_free(struct sock_cgroup_data *skcd)
 {
 	struct cgroup *cgrp = sock_cgroup_ptr(skcd);
 
-	if (skcd->no_refcnt)
-		return;
 	cgroup_bpf_put(cgrp);
 	cgroup_put(cgrp);
 }
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b49c57d35a88..1a6a86693b74 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
 	struct update_classid_context *ctx = (void *)v;
 	struct socket *sock = sock_from_file(file);
 
-	if (sock) {
-		spin_lock(&cgroup_sk_update_lock);
+	if (sock)
 		sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
-		spin_unlock(&cgroup_sk_update_lock);
-	}
 	if (--ctx->batch == 0) {
 		ctx->batch = UPDATE_CLASSID_BATCH;
 		return n + 1;
@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
 	struct css_task_iter it;
 	struct task_struct *p;
 
-	cgroup_sk_alloc_disable();
-
 	cs->classid = (u32)value;
 
 	css_task_iter_start(css, 0, &it);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 99a431c56f23..8456dfbe2eb4 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
 	if (!dev)
 		return -ENODEV;
 
-	cgroup_sk_alloc_disable();
-
 	rtnl_lock();
 
 	ret = netprio_set_prio(of_css(of), dev, prio);
@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
 static int update_netprio(const void *v, struct file *file, unsigned n)
 {
 	struct socket *sock = sock_from_file(file);
-	if (sock) {
-		spin_lock(&cgroup_sk_update_lock);
+
+	if (sock)
 		sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
 					(unsigned long)v);
-		spin_unlock(&cgroup_sk_update_lock);
-	}
 	return 0;
 }
 
@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
 	struct task_struct *p;
 	struct cgroup_subsys_state *css;
 
-	cgroup_sk_alloc_disable();
-
 	cgroup_taskset_for_each(p, css, tset) {
 		void *v = (void *)(unsigned long)css->id;
 
-- 
cgit v1.2.3


From efeff6b39b9de4480572c7b0c5eb77204795cb57 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 5 Aug 2021 13:28:24 -0700
Subject: rcutorture: Warn on individual rcu_torture_init() error conditions

When running rcutorture as a module, any rcu_torture_init() issues will be
reflected in the error code from modprobe or insmod, as the case may be.
However, these error codes are not available when running rcutorture
built-in, for example, when using the kvm.sh script.  This commit
therefore adds WARN_ON_ONCE() to allow distinguishing rcu_torture_init()
errors when running rcutorture built-in.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/torture.h |  8 ++++++++
 kernel/rcu/rcutorture.c | 30 +++++++++++++++---------------
 2 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 0910c5803f35..24f58e50a94b 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -47,6 +47,14 @@ do {										\
 } while (0)
 void verbose_torout_sleep(void);
 
+#define torture_init_error(firsterr)						\
+({										\
+	int ___firsterr = (firsterr);						\
+										\
+	WARN_ONCE(!IS_MODULE(CONFIG_RCU_TORTURE_TEST) && ___firsterr < 0, "Torture-test initialization failed with error code %d\n", ___firsterr); \
+	___firsterr < 0;								\
+})
+
 /* Definitions for online/offline exerciser. */
 #ifdef CONFIG_HOTPLUG_CPU
 int torture_num_online_cpus(void);
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 59254fa15cc6..b90cd4d98a20 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -3037,7 +3037,7 @@ rcu_torture_init(void)
 	rcu_torture_write_types();
 	firsterr = torture_create_kthread(rcu_torture_writer, NULL,
 					  writer_task);
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	if (nfakewriters > 0) {
 		fakewriter_tasks = kcalloc(nfakewriters,
@@ -3052,7 +3052,7 @@ rcu_torture_init(void)
 	for (i = 0; i < nfakewriters; i++) {
 		firsterr = torture_create_kthread(rcu_torture_fakewriter,
 						  NULL, fakewriter_tasks[i]);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
@@ -3068,7 +3068,7 @@ rcu_torture_init(void)
 		rcu_torture_reader_mbchk[i].rtc_chkrdr = -1;
 		firsterr = torture_create_kthread(rcu_torture_reader, (void *)i,
 						  reader_tasks[i]);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	nrealnocbers = nocbs_nthreads;
@@ -3088,18 +3088,18 @@ rcu_torture_init(void)
 	}
 	for (i = 0; i < nrealnocbers; i++) {
 		firsterr = torture_create_kthread(rcu_nocb_toggle, NULL, nocb_tasks[i]);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	if (stat_interval > 0) {
 		firsterr = torture_create_kthread(rcu_torture_stats, NULL,
 						  stats_task);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	if (test_no_idle_hz && shuffle_interval > 0) {
 		firsterr = torture_shuffle_init(shuffle_interval * HZ);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	if (stutter < 0)
@@ -3109,7 +3109,7 @@ rcu_torture_init(void)
 
 		t = cur_ops->stall_dur ? cur_ops->stall_dur() : stutter * HZ;
 		firsterr = torture_stutter_init(stutter * HZ, t);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	if (fqs_duration < 0)
@@ -3118,7 +3118,7 @@ rcu_torture_init(void)
 		/* Create the fqs thread */
 		firsterr = torture_create_kthread(rcu_torture_fqs, NULL,
 						  fqs_task);
-		if (firsterr)
+		if (torture_init_error(firsterr))
 			goto unwind;
 	}
 	if (test_boost_interval < 1)
@@ -3132,7 +3132,7 @@ rcu_torture_init(void)
 		firsterr = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "RCU_TORTURE",
 					     rcutorture_booster_init,
 					     rcutorture_booster_cleanup);
-		if (firsterr < 0)
+		if (torture_init_error(firsterr))
 			goto unwind;
 		rcutor_hp = firsterr;
 
@@ -3153,23 +3153,23 @@ rcu_torture_init(void)
 	}
 	shutdown_jiffies = jiffies + shutdown_secs * HZ;
 	firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup);
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval,
 				      rcutorture_sync);
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	firsterr = rcu_torture_stall_init();
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	firsterr = rcu_torture_fwd_prog_init();
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	firsterr = rcu_torture_barrier_init();
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	firsterr = rcu_torture_read_exit_init();
-	if (firsterr)
+	if (torture_init_error(firsterr))
 		goto unwind;
 	if (object_debug)
 		rcu_test_debug_objects();
-- 
cgit v1.2.3


From f4c87dbbef2638f6da6e29b5e998e3b1dcdb08ee Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Mon, 9 Aug 2021 13:25:13 +0200
Subject: kcsan: Save instruction pointer for scoped accesses

Save the instruction pointer for scoped accesses, so that it becomes
possible for the reporting code to construct more accurate stack traces
that will show the start of the scope.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/kcsan-checks.h |  3 +++
 kernel/kcsan/core.c          | 12 +++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h
index 9fd0ad80fef6..5f5965246877 100644
--- a/include/linux/kcsan-checks.h
+++ b/include/linux/kcsan-checks.h
@@ -100,9 +100,12 @@ void kcsan_set_access_mask(unsigned long mask);
 /* Scoped access information. */
 struct kcsan_scoped_access {
 	struct list_head list;
+	/* Access information. */
 	const volatile void *ptr;
 	size_t size;
 	int type;
+	/* Location where scoped access was set up. */
+	unsigned long ip;
 };
 /*
  * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index bffd1d95addb..8b20af541776 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -202,6 +202,9 @@ static __always_inline struct kcsan_ctx *get_ctx(void)
 	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
 }
 
+static __always_inline void
+check_access(const volatile void *ptr, size_t size, int type, unsigned long ip);
+
 /* Check scoped accesses; never inline because this is a slow-path! */
 static noinline void kcsan_check_scoped_accesses(void)
 {
@@ -210,8 +213,10 @@ static noinline void kcsan_check_scoped_accesses(void)
 	struct kcsan_scoped_access *scoped_access;
 
 	ctx->scoped_accesses.prev = NULL;  /* Avoid recursion. */
-	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list)
-		__kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type);
+	list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) {
+		check_access(scoped_access->ptr, scoped_access->size,
+			     scoped_access->type, scoped_access->ip);
+	}
 	ctx->scoped_accesses.prev = prev_save;
 }
 
@@ -767,6 +772,7 @@ kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type,
 	sa->ptr = ptr;
 	sa->size = size;
 	sa->type = type;
+	sa->ip = _RET_IP_;
 
 	if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */
 		INIT_LIST_HEAD(&ctx->scoped_accesses);
@@ -798,7 +804,7 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa)
 
 	ctx->disable_count--;
 
-	__kcsan_check_access(sa->ptr, sa->size, sa->type);
+	check_access(sa->ptr, sa->size, sa->type, sa->ip);
 }
 EXPORT_SYMBOL(kcsan_end_scoped_access);
 
-- 
cgit v1.2.3


From 7a8aa39d44564703620d937bb54cdea2d003657f Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Mon, 13 Sep 2021 17:05:51 +0100
Subject: nvmem: core: Add stubs for nvmem_cell_read_variable_le_u32/64 if
 !CONFIG_NVMEM

When I added nvmem_cell_read_variable_le_u32() and
nvmem_cell_read_variable_le_u64() I forgot to add the "static inline"
stub functions for when CONFIG_NVMEM wasn't defined. Add them
now. This was causing problems with randconfig builds that compiled
`drivers/soc/qcom/cpr.c`.

Fixes: 6feba6a62c57 ("PM: AVS: qcom-cpr: Use nvmem_cell_read_variable_le_u32()")
Fixes: a28e824fb827 ("nvmem: core: Add functions to make number reading easy")
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20210913160551.12907-1-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/nvmem-consumer.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 923dada24eb4..c0c0cefc3b92 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -150,6 +150,20 @@ static inline int nvmem_cell_read_u64(struct device *dev,
 	return -EOPNOTSUPP;
 }
 
+static inline int nvmem_cell_read_variable_le_u32(struct device *dev,
+						 const char *cell_id,
+						 u32 *val)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int nvmem_cell_read_variable_le_u64(struct device *dev,
+						  const char *cell_id,
+						  u64 *val)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline struct nvmem_device *nvmem_device_get(struct device *dev,
 						    const char *name)
 {
-- 
cgit v1.2.3


From 81065b35e2486c024c7aa86caed452e1f01a59d4 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 13 Sep 2021 14:52:39 -0700
Subject: x86/mce: Avoid infinite loop for copy from user recovery

There are two cases for machine check recovery:

1) The machine check was triggered by ring3 (application) code.
   This is the simpler case. The machine check handler simply queues
   work to be executed on return to user. That code unmaps the page
   from all users and arranges to send a SIGBUS to the task that
   triggered the poison.

2) The machine check was triggered in kernel code that is covered by
   an exception table entry. In this case the machine check handler
   still queues a work entry to unmap the page, etc. but this will
   not be called right away because the #MC handler returns to the
   fix up code address in the exception table entry.

Problems occur if the kernel triggers another machine check before the
return to user processes the first queued work item.

Specifically, the work is queued using the ->mce_kill_me callback
structure in the task struct for the current thread. Attempting to queue
a second work item using this same callback results in a loop in the
linked list of work functions to call. So when the kernel does return to
user, it enters an infinite loop processing the same entry for ever.

There are some legitimate scenarios where the kernel may take a second
machine check before returning to the user.

1) Some code (e.g. futex) first tries a get_user() with page faults
   disabled. If this fails, the code retries with page faults enabled
   expecting that this will resolve the page fault.

2) Copy from user code retries a copy in byte-at-time mode to check
   whether any additional bytes can be copied.

On the other side of the fence are some bad drivers that do not check
the return value from individual get_user() calls and may access
multiple user addresses without noticing that some/all calls have
failed.

Fix by adding a counter (current->mce_count) to keep track of repeated
machine checks before task_work() is called. First machine check saves
the address information and calls task_work_add(). Subsequent machine
checks before that task_work call back is executed check that the address
is in the same page as the first machine check (since the callback will
offline exactly one page).

Expected worst case is four machine checks before moving on (e.g. one
user access with page faults disabled, then a repeat to the same address
with page faults enabled ... repeat in copy tail bytes). Just in case
there is some code that loops forever enforce a limit of 10.

 [ bp: Massage commit message, drop noinstr, fix typo, extend panic
   messages. ]

Fixes: 5567d11c21a1 ("x86/mce: Send #MC singal from task work")
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/YT/IJ9ziLqmtqEPu@agluck-desk2.amr.corp.intel.com
---
 arch/x86/kernel/cpu/mce/core.c | 43 +++++++++++++++++++++++++++++++-----------
 include/linux/sched.h          |  1 +
 2 files changed, 33 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8cb7816d03b4..193204aee880 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1253,6 +1253,9 @@ static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *fin
 
 static void kill_me_now(struct callback_head *ch)
 {
+	struct task_struct *p = container_of(ch, struct task_struct, mce_kill_me);
+
+	p->mce_count = 0;
 	force_sig(SIGBUS);
 }
 
@@ -1262,6 +1265,7 @@ static void kill_me_maybe(struct callback_head *cb)
 	int flags = MF_ACTION_REQUIRED;
 	int ret;
 
+	p->mce_count = 0;
 	pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr);
 
 	if (!p->mce_ripv)
@@ -1290,17 +1294,34 @@ static void kill_me_maybe(struct callback_head *cb)
 	}
 }
 
-static void queue_task_work(struct mce *m, int kill_current_task)
+static void queue_task_work(struct mce *m, char *msg, int kill_current_task)
 {
-	current->mce_addr = m->addr;
-	current->mce_kflags = m->kflags;
-	current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
-	current->mce_whole_page = whole_page(m);
+	int count = ++current->mce_count;
 
-	if (kill_current_task)
-		current->mce_kill_me.func = kill_me_now;
-	else
-		current->mce_kill_me.func = kill_me_maybe;
+	/* First call, save all the details */
+	if (count == 1) {
+		current->mce_addr = m->addr;
+		current->mce_kflags = m->kflags;
+		current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+		current->mce_whole_page = whole_page(m);
+
+		if (kill_current_task)
+			current->mce_kill_me.func = kill_me_now;
+		else
+			current->mce_kill_me.func = kill_me_maybe;
+	}
+
+	/* Ten is likely overkill. Don't expect more than two faults before task_work() */
+	if (count > 10)
+		mce_panic("Too many consecutive machine checks while accessing user data", m, msg);
+
+	/* Second or later call, make sure page address matches the one from first call */
+	if (count > 1 && (current->mce_addr >> PAGE_SHIFT) != (m->addr >> PAGE_SHIFT))
+		mce_panic("Consecutive machine checks to different user pages", m, msg);
+
+	/* Do not call task_work_add() more than once */
+	if (count > 1)
+		return;
 
 	task_work_add(current, &current->mce_kill_me, TWA_RESUME);
 }
@@ -1438,7 +1459,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		/* If this triggers there is no way to recover. Die hard. */
 		BUG_ON(!on_thread_stack() || !user_mode(regs));
 
-		queue_task_work(&m, kill_current_task);
+		queue_task_work(&m, msg, kill_current_task);
 
 	} else {
 		/*
@@ -1456,7 +1477,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
 		}
 
 		if (m.kflags & MCE_IN_KERNEL_COPYIN)
-			queue_task_work(&m, kill_current_task);
+			queue_task_work(&m, msg, kill_current_task);
 	}
 out:
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1780260f237b..361c7bc72cbb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1468,6 +1468,7 @@ struct task_struct {
 					mce_whole_page : 1,
 					__mce_reserved : 62;
 	struct callback_head		mce_kill_me;
+	int				mce_count;
 #endif
 
 #ifdef CONFIG_KRETPROBES
-- 
cgit v1.2.3


From 58877b0824da15698bd85a0a9dbfa8c354e6ecb7 Mon Sep 17 00:00:00 2001
From: Kishon Vijay Abraham I <kishon@ti.com>
Date: Thu, 9 Sep 2021 12:11:58 +0530
Subject: usb: core: hcd: Add support for deferring roothub registration

It has been observed with certain PCIe USB cards (like Inateck connected
to AM64 EVM or J7200 EVM) that as soon as the primary roothub is
registered, port status change is handled even before xHC is running
leading to cold plug USB devices not detected. For such cases, registering
both the root hubs along with the second HCD is required. Add support for
deferring roothub registration in usb_add_hcd(), so that both primary and
secondary roothubs are registered along with the second HCD.

CC: stable@vger.kernel.org # 5.4+
Suggested-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Tested-by: Chris Chiu <chris.chiu@canonical.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Link: https://lore.kernel.org/r/20210909064200.16216-2-kishon@ti.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 29 +++++++++++++++++++++++------
 include/linux/usb/hcd.h |  2 ++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0f8b7c93310e..99ff2d23be05 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2775,6 +2775,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 {
 	int retval;
 	struct usb_device *rhdev;
+	struct usb_hcd *shared_hcd;
 
 	if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) {
 		hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
@@ -2935,13 +2936,26 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		goto err_hcd_driver_start;
 	}
 
+	/* starting here, usbcore will pay attention to the shared HCD roothub */
+	shared_hcd = hcd->shared_hcd;
+	if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) {
+		retval = register_root_hub(shared_hcd);
+		if (retval != 0)
+			goto err_register_root_hub;
+
+		if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd))
+			usb_hcd_poll_rh_status(shared_hcd);
+	}
+
 	/* starting here, usbcore will pay attention to this root hub */
-	retval = register_root_hub(hcd);
-	if (retval != 0)
-		goto err_register_root_hub;
+	if (!HCD_DEFER_RH_REGISTER(hcd)) {
+		retval = register_root_hub(hcd);
+		if (retval != 0)
+			goto err_register_root_hub;
 
-	if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
-		usb_hcd_poll_rh_status(hcd);
+		if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
+			usb_hcd_poll_rh_status(hcd);
+	}
 
 	return retval;
 
@@ -2985,6 +2999,7 @@ EXPORT_SYMBOL_GPL(usb_add_hcd);
 void usb_remove_hcd(struct usb_hcd *hcd)
 {
 	struct usb_device *rhdev = hcd->self.root_hub;
+	bool rh_registered;
 
 	dev_info(hcd->self.controller, "remove, state %x\n", hcd->state);
 
@@ -2995,6 +3010,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	dev_dbg(hcd->self.controller, "roothub graceful disconnect\n");
 	spin_lock_irq (&hcd_root_hub_lock);
+	rh_registered = hcd->rh_registered;
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
 
@@ -3004,7 +3020,8 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	cancel_work_sync(&hcd->died_work);
 
 	mutex_lock(&usb_bus_idr_lock);
-	usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
+	if (rh_registered)
+		usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
 	mutex_unlock(&usb_bus_idr_lock);
 
 	/*
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 548a028f2dab..2c1fc9212cf2 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -124,6 +124,7 @@ struct usb_hcd {
 #define HCD_FLAG_RH_RUNNING		5	/* root hub is running? */
 #define HCD_FLAG_DEAD			6	/* controller has died? */
 #define HCD_FLAG_INTF_AUTHORIZED	7	/* authorize interfaces? */
+#define HCD_FLAG_DEFER_RH_REGISTER	8	/* Defer roothub registration */
 
 	/* The flags can be tested using these macros; they are likely to
 	 * be slightly faster than test_bit().
@@ -134,6 +135,7 @@ struct usb_hcd {
 #define HCD_WAKEUP_PENDING(hcd)	((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
 #define HCD_RH_RUNNING(hcd)	((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
 #define HCD_DEAD(hcd)		((hcd)->flags & (1U << HCD_FLAG_DEAD))
+#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER))
 
 	/*
 	 * Specifies if interfaces are authorized by default
-- 
cgit v1.2.3


From 8988bacd6045adf39719e5057e43170f83bd1709 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 31 Aug 2021 17:30:44 +0800
Subject: kobject: unexport kobject_create() in kobject.h

The function kobject_create() is only used by one caller,
kobject_create_and_add(), no other driver uses it, nor is exported to
other modules.

However it's still exported in kobject.h, and can sometimes confuse
users of kobject.h.

Since all users should call kobject_create_and_add(), or if extra
attributes are needed, should alloc the memory manually then call
kobject_init_and_add().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Link: https://lore.kernel.org/r/20210831093044.110729-1-wqu@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 1 -
 lib/kobject.c           | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index ea30529fba08..efd56f990a46 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -101,7 +101,6 @@ int kobject_init_and_add(struct kobject *kobj,
 
 extern void kobject_del(struct kobject *kobj);
 
-extern struct kobject * __must_check kobject_create(void);
 extern struct kobject * __must_check kobject_create_and_add(const char *name,
 						struct kobject *parent);
 
diff --git a/lib/kobject.c b/lib/kobject.c
index ea53b30cf483..4a56f519139d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -777,7 +777,7 @@ static struct kobj_type dynamic_kobj_ktype = {
  * call to kobject_put() and not kfree(), as kobject_init() has
  * already been called on this structure.
  */
-struct kobject *kobject_create(void)
+static struct kobject *kobject_create(void)
 {
 	struct kobject *kobj;
 
-- 
cgit v1.2.3


From 82bcb7fb649844a561ff1ac2e2ace4252bdff793 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 23 Aug 2021 14:22:01 +0300
Subject: iio: st_sensors: remove st_sensors_deallocate_trigger() function

This change converts the st_sensors_allocate_trigger() to use
device-managed functions.

The parent device of the IIO device object is used. This is based on the
assumption that all other devm_ calls in the ST sensors use this reference.

That makes the st_sensors_deallocate_trigger() function un-needed, so it
can be removed.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210823112204.243255-3-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c                  | 18 +--------
 drivers/iio/common/st_sensors/st_sensors_trigger.c | 45 ++++++++--------------
 drivers/iio/gyro/st_gyro_core.c                    | 18 +--------
 drivers/iio/magnetometer/st_magn_core.c            | 18 +--------
 drivers/iio/pressure/st_pressure_core.c            | 18 +--------
 include/linux/iio/common/st_sensors.h              |  5 ---
 6 files changed, 19 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index f1e6ec380667..a7be1633bff1 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1380,29 +1380,13 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	err = iio_device_register(indio_dev);
-	if (err)
-		goto st_accel_device_register_error;
-
-	dev_info(&indio_dev->dev, "registered accelerometer %s\n",
-		 indio_dev->name);
-
-	return 0;
-
-st_accel_device_register_error:
-	if (adata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
-	return err;
+	return iio_device_register(indio_dev);
 }
 EXPORT_SYMBOL(st_accel_common_probe);
 
 void st_accel_common_remove(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *adata = iio_priv(indio_dev);
-
 	iio_device_unregister(indio_dev);
-	if (adata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
 }
 EXPORT_SYMBOL(st_accel_common_remove);
 
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index 64e0a748a855..d022157b66a2 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -119,11 +119,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	struct device *parent = indio_dev->dev.parent;
 	unsigned long irq_trig;
 	int err;
 
-	sdata->trig = iio_trigger_alloc(sdata->dev, "%s-trigger",
-					indio_dev->name);
+	sdata->trig = devm_iio_trigger_alloc(parent, "%s-trigger",
+					     indio_dev->name);
 	if (sdata->trig == NULL) {
 		dev_err(&indio_dev->dev, "failed to allocate iio trigger.\n");
 		return -ENOMEM;
@@ -153,7 +154,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				sdata->sensor_settings->drdy_irq.addr_ihl,
 				sdata->sensor_settings->drdy_irq.mask_ihl, 1);
 			if (err < 0)
-				goto iio_trigger_free;
+				return err;
 			dev_info(&indio_dev->dev,
 				 "interrupts on the falling edge or active low level\n");
 		}
@@ -179,8 +180,7 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		if (!sdata->sensor_settings->drdy_irq.stat_drdy.addr) {
 			dev_err(&indio_dev->dev,
 				"edge IRQ not supported w/o stat register.\n");
-			err = -EOPNOTSUPP;
-			goto iio_trigger_free;
+			return -EOPNOTSUPP;
 		}
 		sdata->edge_irq = true;
 	} else {
@@ -205,44 +205,29 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.stat_drdy.addr)
 		irq_trig |= IRQF_SHARED;
 
-	err = request_threaded_irq(sdata->irq,
-				   st_sensors_irq_handler,
-				   st_sensors_irq_thread,
-				   irq_trig,
-				   sdata->trig->name,
-				   sdata->trig);
+	err = devm_request_threaded_irq(parent,
+					sdata->irq,
+					st_sensors_irq_handler,
+					st_sensors_irq_thread,
+					irq_trig,
+					sdata->trig->name,
+					sdata->trig);
 	if (err) {
 		dev_err(&indio_dev->dev, "failed to request trigger IRQ.\n");
-		goto iio_trigger_free;
+		return err;
 	}
 
-	err = iio_trigger_register(sdata->trig);
+	err = devm_iio_trigger_register(parent, sdata->trig);
 	if (err < 0) {
 		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
-		goto iio_trigger_register_error;
+		return err;
 	}
 	indio_dev->trig = iio_trigger_get(sdata->trig);
 
 	return 0;
-
-iio_trigger_register_error:
-	free_irq(sdata->irq, sdata->trig);
-iio_trigger_free:
-	iio_trigger_free(sdata->trig);
-	return err;
 }
 EXPORT_SYMBOL(st_sensors_allocate_trigger);
 
-void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *sdata = iio_priv(indio_dev);
-
-	iio_trigger_unregister(sdata->trig);
-	free_irq(sdata->irq, sdata->trig);
-	iio_trigger_free(sdata->trig);
-}
-EXPORT_SYMBOL(st_sensors_deallocate_trigger);
-
 int st_sensors_validate_device(struct iio_trigger *trig,
 			       struct iio_dev *indio_dev)
 {
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index e8fc8af65143..cb539b47cdf4 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -515,29 +515,13 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	err = iio_device_register(indio_dev);
-	if (err)
-		goto st_gyro_device_register_error;
-
-	dev_info(&indio_dev->dev, "registered gyroscope %s\n",
-		 indio_dev->name);
-
-	return 0;
-
-st_gyro_device_register_error:
-	if (gdata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
-	return err;
+	return iio_device_register(indio_dev);
 }
 EXPORT_SYMBOL(st_gyro_common_probe);
 
 void st_gyro_common_remove(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *gdata = iio_priv(indio_dev);
-
 	iio_device_unregister(indio_dev);
-	if (gdata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
 }
 EXPORT_SYMBOL(st_gyro_common_remove);
 
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 9ffd50d796bf..5be85e2405a5 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -650,29 +650,13 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	err = iio_device_register(indio_dev);
-	if (err)
-		goto st_magn_device_register_error;
-
-	dev_info(&indio_dev->dev, "registered magnetometer %s\n",
-		 indio_dev->name);
-
-	return 0;
-
-st_magn_device_register_error:
-	if (mdata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
-	return err;
+	return iio_device_register(indio_dev);
 }
 EXPORT_SYMBOL(st_magn_common_probe);
 
 void st_magn_common_remove(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *mdata = iio_priv(indio_dev);
-
 	iio_device_unregister(indio_dev);
-	if (mdata->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
 }
 EXPORT_SYMBOL(st_magn_common_remove);
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index ab1c17fac807..17ebb5171d4c 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -721,29 +721,13 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	err = iio_device_register(indio_dev);
-	if (err)
-		goto st_press_device_register_error;
-
-	dev_info(&indio_dev->dev, "registered pressure sensor %s\n",
-		 indio_dev->name);
-
-	return err;
-
-st_press_device_register_error:
-	if (press_data->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
-	return err;
+	return iio_device_register(indio_dev);
 }
 EXPORT_SYMBOL(st_press_common_probe);
 
 void st_press_common_remove(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *press_data = iio_priv(indio_dev);
-
 	iio_device_unregister(indio_dev);
-	if (press_data->irq > 0)
-		st_sensors_deallocate_trigger(indio_dev);
 }
 EXPORT_SYMBOL(st_press_common_remove);
 
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index 8bdbaf3f3796..e74b55244f35 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -273,7 +273,6 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p);
 int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops);
 
-void st_sensors_deallocate_trigger(struct iio_dev *indio_dev);
 int st_sensors_validate_device(struct iio_trigger *trig,
 			       struct iio_dev *indio_dev);
 #else
@@ -282,10 +281,6 @@ static inline int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 {
 	return 0;
 }
-static inline void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
-{
-	return;
-}
 #define st_sensors_validate_device NULL
 #endif
 
-- 
cgit v1.2.3


From 5363c6c17b1014f696bf0b2984af4b694062ce8f Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 23 Aug 2021 14:22:02 +0300
Subject: iio: st_sensors: remove st_sensors_power_disable() function

This change converts the st_sensors_power_enable() function to use
devm_add_action_or_reset() handlers to register regulator_disable hooks for
when the drivers get unloaded.

The parent device of the IIO device object is used. This is based on the
assumption that all other devm_ calls in the ST sensors use this reference.

This makes the st_sensors_power_disable() un-needed.
Removing this also changes unload order a bit, as all ST drivers would call
st_sensors_power_disable() first and iio_device_unregister() after that.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210823112204.243255-4-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_i2c.c                | 13 +---------
 drivers/iio/accel/st_accel_spi.c                | 13 +---------
 drivers/iio/common/st_sensors/st_sensors_core.c | 34 +++++++++++--------------
 drivers/iio/gyro/st_gyro_i2c.c                  | 13 +---------
 drivers/iio/gyro/st_gyro_spi.c                  | 13 +---------
 drivers/iio/magnetometer/st_magn_i2c.c          | 13 +---------
 drivers/iio/magnetometer/st_magn_spi.c          | 13 +---------
 drivers/iio/pressure/st_pressure_i2c.c          | 13 +---------
 drivers/iio/pressure/st_pressure_spi.c          | 13 +---------
 include/linux/iio/common/st_sensors.h           |  2 --
 10 files changed, 23 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index cba57459e90a..b377575efc41 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -177,16 +177,7 @@ static int st_accel_i2c_probe(struct i2c_client *client)
 	if (ret)
 		return ret;
 
-	ret = st_accel_common_probe(indio_dev);
-	if (ret < 0)
-		goto st_accel_power_off;
-
-	return 0;
-
-st_accel_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return ret;
+	return st_accel_common_probe(indio_dev);
 }
 
 static int st_accel_i2c_remove(struct i2c_client *client)
@@ -195,8 +186,6 @@ static int st_accel_i2c_remove(struct i2c_client *client)
 
 	st_accel_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c
index 5167fae1ee8e..4ca87e73bdb3 100644
--- a/drivers/iio/accel/st_accel_spi.c
+++ b/drivers/iio/accel/st_accel_spi.c
@@ -127,16 +127,7 @@ static int st_accel_spi_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	err = st_accel_common_probe(indio_dev);
-	if (err < 0)
-		goto st_accel_power_off;
-
-	return 0;
-
-st_accel_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_accel_common_probe(indio_dev);
 }
 
 static int st_accel_spi_remove(struct spi_device *spi)
@@ -145,8 +136,6 @@ static int st_accel_spi_remove(struct spi_device *spi)
 
 	st_accel_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 0bbb090b108c..a5a140de9a23 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -215,13 +215,19 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
 }
 EXPORT_SYMBOL(st_sensors_set_axis_enable);
 
+static void st_reg_disable(void *reg)
+{
+	regulator_disable(reg);
+}
+
 int st_sensors_power_enable(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *pdata = iio_priv(indio_dev);
+	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	/* Regulators not mandatory, but if requested we should enable them. */
-	pdata->vdd = devm_regulator_get(indio_dev->dev.parent, "vdd");
+	pdata->vdd = devm_regulator_get(parent, "vdd");
 	if (IS_ERR(pdata->vdd)) {
 		dev_err(&indio_dev->dev, "unable to get Vdd supply\n");
 		return PTR_ERR(pdata->vdd);
@@ -233,36 +239,26 @@ int st_sensors_power_enable(struct iio_dev *indio_dev)
 		return err;
 	}
 
-	pdata->vdd_io = devm_regulator_get(indio_dev->dev.parent, "vddio");
+	err = devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd);
+	if (err)
+		return err;
+
+	pdata->vdd_io = devm_regulator_get(parent, "vddio");
 	if (IS_ERR(pdata->vdd_io)) {
 		dev_err(&indio_dev->dev, "unable to get Vdd_IO supply\n");
-		err = PTR_ERR(pdata->vdd_io);
-		goto st_sensors_disable_vdd;
+		return PTR_ERR(pdata->vdd_io);
 	}
 	err = regulator_enable(pdata->vdd_io);
 	if (err != 0) {
 		dev_warn(&indio_dev->dev,
 			 "Failed to enable specified Vdd_IO supply\n");
-		goto st_sensors_disable_vdd;
+		return err;
 	}
 
-	return 0;
-
-st_sensors_disable_vdd:
-	regulator_disable(pdata->vdd);
-	return err;
+	return devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd_io);
 }
 EXPORT_SYMBOL(st_sensors_power_enable);
 
-void st_sensors_power_disable(struct iio_dev *indio_dev)
-{
-	struct st_sensor_data *pdata = iio_priv(indio_dev);
-
-	regulator_disable(pdata->vdd);
-	regulator_disable(pdata->vdd_io);
-}
-EXPORT_SYMBOL(st_sensors_power_disable);
-
 static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
 					struct st_sensors_platform_data *pdata)
 {
diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
index a8164fe48b85..0bd80dfd389f 100644
--- a/drivers/iio/gyro/st_gyro_i2c.c
+++ b/drivers/iio/gyro/st_gyro_i2c.c
@@ -90,16 +90,7 @@ static int st_gyro_i2c_probe(struct i2c_client *client,
 	if (err)
 		return err;
 
-	err = st_gyro_common_probe(indio_dev);
-	if (err < 0)
-		goto st_gyro_power_off;
-
-	return 0;
-
-st_gyro_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_gyro_common_probe(indio_dev);
 }
 
 static int st_gyro_i2c_remove(struct i2c_client *client)
@@ -108,8 +99,6 @@ static int st_gyro_i2c_remove(struct i2c_client *client)
 
 	st_gyro_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c
index 9d8916871b4b..f74b09fa5cde 100644
--- a/drivers/iio/gyro/st_gyro_spi.c
+++ b/drivers/iio/gyro/st_gyro_spi.c
@@ -94,16 +94,7 @@ static int st_gyro_spi_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	err = st_gyro_common_probe(indio_dev);
-	if (err < 0)
-		goto st_gyro_power_off;
-
-	return 0;
-
-st_gyro_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_gyro_common_probe(indio_dev);
 }
 
 static int st_gyro_spi_remove(struct spi_device *spi)
@@ -112,8 +103,6 @@ static int st_gyro_spi_remove(struct spi_device *spi)
 
 	st_gyro_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
index fa78f0a3b53e..0a5117dffcf4 100644
--- a/drivers/iio/magnetometer/st_magn_i2c.c
+++ b/drivers/iio/magnetometer/st_magn_i2c.c
@@ -86,16 +86,7 @@ static int st_magn_i2c_probe(struct i2c_client *client,
 	if (err)
 		return err;
 
-	err = st_magn_common_probe(indio_dev);
-	if (err < 0)
-		goto st_magn_power_off;
-
-	return 0;
-
-st_magn_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_magn_common_probe(indio_dev);
 }
 
 static int st_magn_i2c_remove(struct i2c_client *client)
@@ -104,8 +95,6 @@ static int st_magn_i2c_remove(struct i2c_client *client)
 
 	st_magn_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c
index ff43cbf61b05..1f3bf02b24e0 100644
--- a/drivers/iio/magnetometer/st_magn_spi.c
+++ b/drivers/iio/magnetometer/st_magn_spi.c
@@ -80,16 +80,7 @@ static int st_magn_spi_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	err = st_magn_common_probe(indio_dev);
-	if (err < 0)
-		goto st_magn_power_off;
-
-	return 0;
-
-st_magn_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_magn_common_probe(indio_dev);
 }
 
 static int st_magn_spi_remove(struct spi_device *spi)
@@ -98,8 +89,6 @@ static int st_magn_spi_remove(struct spi_device *spi)
 
 	st_magn_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index 6215de677017..afeeab485c0d 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -103,16 +103,7 @@ static int st_press_i2c_probe(struct i2c_client *client,
 	if (ret)
 		return ret;
 
-	ret = st_press_common_probe(indio_dev);
-	if (ret < 0)
-		goto st_press_power_off;
-
-	return 0;
-
-st_press_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return ret;
+	return st_press_common_probe(indio_dev);
 }
 
 static int st_press_i2c_remove(struct i2c_client *client)
@@ -121,8 +112,6 @@ static int st_press_i2c_remove(struct i2c_client *client)
 
 	st_press_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c
index 5001aae8f00b..834ad6d40a70 100644
--- a/drivers/iio/pressure/st_pressure_spi.c
+++ b/drivers/iio/pressure/st_pressure_spi.c
@@ -86,16 +86,7 @@ static int st_press_spi_probe(struct spi_device *spi)
 	if (err)
 		return err;
 
-	err = st_press_common_probe(indio_dev);
-	if (err < 0)
-		goto st_press_power_off;
-
-	return 0;
-
-st_press_power_off:
-	st_sensors_power_disable(indio_dev);
-
-	return err;
+	return st_press_common_probe(indio_dev);
 }
 
 static int st_press_spi_remove(struct spi_device *spi)
@@ -104,8 +95,6 @@ static int st_press_spi_remove(struct spi_device *spi)
 
 	st_press_common_remove(indio_dev);
 
-	st_sensors_power_disable(indio_dev);
-
 	return 0;
 }
 
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index e74b55244f35..fc90c202d15e 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -293,8 +293,6 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable);
 
 int st_sensors_power_enable(struct iio_dev *indio_dev);
 
-void st_sensors_power_disable(struct iio_dev *indio_dev);
-
 int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev,
 				  unsigned reg, unsigned writeval,
 				  unsigned *readval);
-- 
cgit v1.2.3


From 6b658c31bb6bc033f6ae60141c676383fb78784c Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 23 Aug 2021 14:22:03 +0300
Subject: iio: st_sensors: remove all driver remove functions

At this point all ST driver remove functions do iio_device_unregister().
This change removes them from them and replaces all iio_device_register()
with devm_iio_device_register().

This can be done in a single change relatively easy, since all these remove
functions are define in st_sensors.h.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210823112204.243255-5-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c            |  9 ++-------
 drivers/iio/accel/st_accel_i2c.c             | 10 ----------
 drivers/iio/accel/st_accel_spi.c             | 10 ----------
 drivers/iio/gyro/st_gyro_core.c              |  9 ++-------
 drivers/iio/gyro/st_gyro_i2c.c               | 10 ----------
 drivers/iio/gyro/st_gyro_spi.c               | 10 ----------
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h      |  1 -
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c | 15 +--------------
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c  |  6 ------
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c  |  6 ------
 drivers/iio/magnetometer/st_magn_core.c      |  9 ++-------
 drivers/iio/magnetometer/st_magn_i2c.c       | 10 ----------
 drivers/iio/magnetometer/st_magn_spi.c       | 10 ----------
 drivers/iio/pressure/st_pressure_core.c      |  9 ++-------
 drivers/iio/pressure/st_pressure_i2c.c       | 10 ----------
 drivers/iio/pressure/st_pressure_spi.c       | 10 ----------
 include/linux/iio/common/st_sensors.h        |  4 ----
 17 files changed, 9 insertions(+), 139 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index a7be1633bff1..01695abd9d2f 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1335,6 +1335,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *adata = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata = dev_get_platdata(adata->dev);
+	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -1380,16 +1381,10 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	return iio_device_register(indio_dev);
+	return devm_iio_device_register(parent, indio_dev);
 }
 EXPORT_SYMBOL(st_accel_common_probe);
 
-void st_accel_common_remove(struct iio_dev *indio_dev)
-{
-	iio_device_unregister(indio_dev);
-}
-EXPORT_SYMBOL(st_accel_common_remove);
-
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics accelerometers driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c
index b377575efc41..c0ce78eebad9 100644
--- a/drivers/iio/accel/st_accel_i2c.c
+++ b/drivers/iio/accel/st_accel_i2c.c
@@ -180,15 +180,6 @@ static int st_accel_i2c_probe(struct i2c_client *client)
 	return st_accel_common_probe(indio_dev);
 }
 
-static int st_accel_i2c_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-
-	st_accel_common_remove(indio_dev);
-
-	return 0;
-}
-
 static struct i2c_driver st_accel_driver = {
 	.driver = {
 		.name = "st-accel-i2c",
@@ -196,7 +187,6 @@ static struct i2c_driver st_accel_driver = {
 		.acpi_match_table = ACPI_PTR(st_accel_acpi_match),
 	},
 	.probe_new = st_accel_i2c_probe,
-	.remove = st_accel_i2c_remove,
 	.id_table = st_accel_id_table,
 };
 module_i2c_driver(st_accel_driver);
diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c
index 4ca87e73bdb3..b74a1c6d03de 100644
--- a/drivers/iio/accel/st_accel_spi.c
+++ b/drivers/iio/accel/st_accel_spi.c
@@ -130,15 +130,6 @@ static int st_accel_spi_probe(struct spi_device *spi)
 	return st_accel_common_probe(indio_dev);
 }
 
-static int st_accel_spi_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-
-	st_accel_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct spi_device_id st_accel_id_table[] = {
 	{ LIS3DH_ACCEL_DEV_NAME },
 	{ LSM330D_ACCEL_DEV_NAME },
@@ -166,7 +157,6 @@ static struct spi_driver st_accel_driver = {
 		.of_match_table = st_accel_of_match,
 	},
 	.probe = st_accel_spi_probe,
-	.remove = st_accel_spi_remove,
 	.id_table = st_accel_id_table,
 };
 module_spi_driver(st_accel_driver);
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index cb539b47cdf4..3609082a6778 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -478,6 +478,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *gdata = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata;
+	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -515,16 +516,10 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	return iio_device_register(indio_dev);
+	return devm_iio_device_register(parent, indio_dev);
 }
 EXPORT_SYMBOL(st_gyro_common_probe);
 
-void st_gyro_common_remove(struct iio_dev *indio_dev)
-{
-	iio_device_unregister(indio_dev);
-}
-EXPORT_SYMBOL(st_gyro_common_remove);
-
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics gyroscopes driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
index 0bd80dfd389f..163c7ba300c1 100644
--- a/drivers/iio/gyro/st_gyro_i2c.c
+++ b/drivers/iio/gyro/st_gyro_i2c.c
@@ -93,15 +93,6 @@ static int st_gyro_i2c_probe(struct i2c_client *client,
 	return st_gyro_common_probe(indio_dev);
 }
 
-static int st_gyro_i2c_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-
-	st_gyro_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct i2c_device_id st_gyro_id_table[] = {
 	{ L3G4200D_GYRO_DEV_NAME },
 	{ LSM330D_GYRO_DEV_NAME },
@@ -122,7 +113,6 @@ static struct i2c_driver st_gyro_driver = {
 		.of_match_table = st_gyro_of_match,
 	},
 	.probe = st_gyro_i2c_probe,
-	.remove = st_gyro_i2c_remove,
 	.id_table = st_gyro_id_table,
 };
 module_i2c_driver(st_gyro_driver);
diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c
index f74b09fa5cde..b0023f9b9771 100644
--- a/drivers/iio/gyro/st_gyro_spi.c
+++ b/drivers/iio/gyro/st_gyro_spi.c
@@ -97,15 +97,6 @@ static int st_gyro_spi_probe(struct spi_device *spi)
 	return st_gyro_common_probe(indio_dev);
 }
 
-static int st_gyro_spi_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-
-	st_gyro_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct spi_device_id st_gyro_id_table[] = {
 	{ L3G4200D_GYRO_DEV_NAME },
 	{ LSM330D_GYRO_DEV_NAME },
@@ -126,7 +117,6 @@ static struct spi_driver st_gyro_driver = {
 		.of_match_table = st_gyro_of_match,
 	},
 	.probe = st_gyro_spi_probe,
-	.remove = st_gyro_spi_remove,
 	.id_table = st_gyro_id_table,
 };
 module_spi_driver(st_gyro_driver);
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h
index 146393afd9a7..76678cdefb07 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0.h
@@ -18,6 +18,5 @@ struct st_lsm9ds0 {
 };
 
 int st_lsm9ds0_probe(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap);
-int st_lsm9ds0_remove(struct st_lsm9ds0 *lsm9ds0);
 
 #endif /* ST_LSM9DS0_H */
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
index 5e6625140db7..d276f663fe57 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
@@ -142,23 +142,10 @@ int st_lsm9ds0_probe(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap)
 		return ret;
 
 	/* Setup magnetometer device */
-	ret = st_lsm9ds0_probe_magn(lsm9ds0, regmap);
-	if (ret)
-		st_accel_common_remove(lsm9ds0->accel);
-
-	return ret;
+	return st_lsm9ds0_probe_magn(lsm9ds0, regmap);
 }
 EXPORT_SYMBOL_GPL(st_lsm9ds0_probe);
 
-int st_lsm9ds0_remove(struct st_lsm9ds0 *lsm9ds0)
-{
-	st_magn_common_remove(lsm9ds0->magn);
-	st_accel_common_remove(lsm9ds0->accel);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(st_lsm9ds0_remove);
-
 MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
 MODULE_DESCRIPTION("STMicroelectronics LSM9DS0 IMU core driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c
index 78bede358747..8f205c477e6f 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_i2c.c
@@ -64,18 +64,12 @@ static int st_lsm9ds0_i2c_probe(struct i2c_client *client)
 	return st_lsm9ds0_probe(lsm9ds0, regmap);
 }
 
-static int st_lsm9ds0_i2c_remove(struct i2c_client *client)
-{
-	return st_lsm9ds0_remove(i2c_get_clientdata(client));
-}
-
 static struct i2c_driver st_lsm9ds0_driver = {
 	.driver = {
 		.name = "st-lsm9ds0-i2c",
 		.of_match_table = st_lsm9ds0_of_match,
 	},
 	.probe_new = st_lsm9ds0_i2c_probe,
-	.remove = st_lsm9ds0_i2c_remove,
 	.id_table = st_lsm9ds0_id_table,
 };
 module_i2c_driver(st_lsm9ds0_driver);
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c
index 180b54e66438..0ddfa53166af 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_spi.c
@@ -63,18 +63,12 @@ static int st_lsm9ds0_spi_probe(struct spi_device *spi)
 	return st_lsm9ds0_probe(lsm9ds0, regmap);
 }
 
-static int st_lsm9ds0_spi_remove(struct spi_device *spi)
-{
-	return st_lsm9ds0_remove(spi_get_drvdata(spi));
-}
-
 static struct spi_driver st_lsm9ds0_driver = {
 	.driver = {
 		.name = "st-lsm9ds0-spi",
 		.of_match_table = st_lsm9ds0_of_match,
 	},
 	.probe = st_lsm9ds0_spi_probe,
-	.remove = st_lsm9ds0_spi_remove,
 	.id_table = st_lsm9ds0_id_table,
 };
 module_spi_driver(st_lsm9ds0_driver);
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 5be85e2405a5..1458906a3765 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -612,6 +612,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata = dev_get_platdata(mdata->dev);
+	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -650,16 +651,10 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	return iio_device_register(indio_dev);
+	return devm_iio_device_register(parent, indio_dev);
 }
 EXPORT_SYMBOL(st_magn_common_probe);
 
-void st_magn_common_remove(struct iio_dev *indio_dev)
-{
-	iio_device_unregister(indio_dev);
-}
-EXPORT_SYMBOL(st_magn_common_remove);
-
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics magnetometers driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
index 0a5117dffcf4..7237711fc09b 100644
--- a/drivers/iio/magnetometer/st_magn_i2c.c
+++ b/drivers/iio/magnetometer/st_magn_i2c.c
@@ -89,15 +89,6 @@ static int st_magn_i2c_probe(struct i2c_client *client,
 	return st_magn_common_probe(indio_dev);
 }
 
-static int st_magn_i2c_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-
-	st_magn_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct i2c_device_id st_magn_id_table[] = {
 	{ LSM303DLH_MAGN_DEV_NAME },
 	{ LSM303DLHC_MAGN_DEV_NAME },
@@ -117,7 +108,6 @@ static struct i2c_driver st_magn_driver = {
 		.of_match_table = st_magn_of_match,
 	},
 	.probe = st_magn_i2c_probe,
-	.remove = st_magn_i2c_remove,
 	.id_table = st_magn_id_table,
 };
 module_i2c_driver(st_magn_driver);
diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c
index 1f3bf02b24e0..489d4462862f 100644
--- a/drivers/iio/magnetometer/st_magn_spi.c
+++ b/drivers/iio/magnetometer/st_magn_spi.c
@@ -83,15 +83,6 @@ static int st_magn_spi_probe(struct spi_device *spi)
 	return st_magn_common_probe(indio_dev);
 }
 
-static int st_magn_spi_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-
-	st_magn_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct spi_device_id st_magn_id_table[] = {
 	{ LIS3MDL_MAGN_DEV_NAME },
 	{ LSM303AGR_MAGN_DEV_NAME },
@@ -108,7 +99,6 @@ static struct spi_driver st_magn_driver = {
 		.of_match_table = st_magn_of_match,
 	},
 	.probe = st_magn_spi_probe,
-	.remove = st_magn_spi_remove,
 	.id_table = st_magn_id_table,
 };
 module_spi_driver(st_magn_driver);
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 17ebb5171d4c..cebcc1d93d0b 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -678,6 +678,7 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *press_data = iio_priv(indio_dev);
 	struct st_sensors_platform_data *pdata = dev_get_platdata(press_data->dev);
+	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -721,16 +722,10 @@ int st_press_common_probe(struct iio_dev *indio_dev)
 			return err;
 	}
 
-	return iio_device_register(indio_dev);
+	return devm_iio_device_register(parent, indio_dev);
 }
 EXPORT_SYMBOL(st_press_common_probe);
 
-void st_press_common_remove(struct iio_dev *indio_dev)
-{
-	iio_device_unregister(indio_dev);
-}
-EXPORT_SYMBOL(st_press_common_remove);
-
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics pressures driver");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index afeeab485c0d..1939e999a427 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -106,15 +106,6 @@ static int st_press_i2c_probe(struct i2c_client *client,
 	return st_press_common_probe(indio_dev);
 }
 
-static int st_press_i2c_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-
-	st_press_common_remove(indio_dev);
-
-	return 0;
-}
-
 static struct i2c_driver st_press_driver = {
 	.driver = {
 		.name = "st-press-i2c",
@@ -122,7 +113,6 @@ static struct i2c_driver st_press_driver = {
 		.acpi_match_table = ACPI_PTR(st_press_acpi_match),
 	},
 	.probe = st_press_i2c_probe,
-	.remove = st_press_i2c_remove,
 	.id_table = st_press_id_table,
 };
 module_i2c_driver(st_press_driver);
diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c
index 834ad6d40a70..9b2523c5bc94 100644
--- a/drivers/iio/pressure/st_pressure_spi.c
+++ b/drivers/iio/pressure/st_pressure_spi.c
@@ -89,15 +89,6 @@ static int st_press_spi_probe(struct spi_device *spi)
 	return st_press_common_probe(indio_dev);
 }
 
-static int st_press_spi_remove(struct spi_device *spi)
-{
-	struct iio_dev *indio_dev = spi_get_drvdata(spi);
-
-	st_press_common_remove(indio_dev);
-
-	return 0;
-}
-
 static const struct spi_device_id st_press_id_table[] = {
 	{ LPS001WP_PRESS_DEV_NAME },
 	{ LPS25H_PRESS_DEV_NAME },
@@ -116,7 +107,6 @@ static struct spi_driver st_press_driver = {
 		.of_match_table = st_press_of_match,
 	},
 	.probe = st_press_spi_probe,
-	.remove = st_press_spi_remove,
 	.id_table = st_press_id_table,
 };
 module_spi_driver(st_press_driver);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index fc90c202d15e..d17ae1e5ca19 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -323,21 +323,17 @@ void st_sensors_dev_name_probe(struct device *dev, char *name, int len);
 /* Accelerometer */
 const struct st_sensor_settings *st_accel_get_settings(const char *name);
 int st_accel_common_probe(struct iio_dev *indio_dev);
-void st_accel_common_remove(struct iio_dev *indio_dev);
 
 /* Gyroscope */
 const struct st_sensor_settings *st_gyro_get_settings(const char *name);
 int st_gyro_common_probe(struct iio_dev *indio_dev);
-void st_gyro_common_remove(struct iio_dev *indio_dev);
 
 /* Magnetometer */
 const struct st_sensor_settings *st_magn_get_settings(const char *name);
 int st_magn_common_probe(struct iio_dev *indio_dev);
-void st_magn_common_remove(struct iio_dev *indio_dev);
 
 /* Pressure */
 const struct st_sensor_settings *st_press_get_settings(const char *name);
 int st_press_common_probe(struct iio_dev *indio_dev);
-void st_press_common_remove(struct iio_dev *indio_dev);
 
 #endif /* ST_SENSORS_H */
-- 
cgit v1.2.3


From e42696515414a15774c80f1d454194ce0cd9f145 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Mon, 23 Aug 2021 14:22:04 +0300
Subject: iio: st_sensors: remove reference to parent device object on
 st_sensor_data

The idea behind it, is that all devm_ calls in ST sensors are bound to the
parent device object.

However, the reference to that object is kept on both the st_sensor_data
struct and the IIO object parent (indio_dev->dev.parent).

This change only adds a bit consistency and uses the reference stored on
indio_dev->dev.parent, to enforce the assumption that all ST sensors' devm_
calls are bound to the same reference as the one store on st_sensor_data.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20210823112204.243255-6-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/st_accel_core.c                  | 6 +++---
 drivers/iio/common/st_sensors/st_sensors_i2c.c     | 1 -
 drivers/iio/common/st_sensors/st_sensors_spi.c     | 1 -
 drivers/iio/common/st_sensors/st_sensors_trigger.c | 8 +++++---
 drivers/iio/gyro/st_gyro_core.c                    | 2 +-
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c       | 2 --
 drivers/iio/magnetometer/st_magn_core.c            | 4 ++--
 drivers/iio/pressure/st_pressure_core.c            | 2 +-
 include/linux/iio/common/st_sensors.h              | 2 --
 9 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 01695abd9d2f..31ea19d0ba71 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -1210,7 +1210,7 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev)
 	};
 
 
-	adev = ACPI_COMPANION(adata->dev);
+	adev = ACPI_COMPANION(indio_dev->dev.parent);
 	if (!adev)
 		return 0;
 
@@ -1334,8 +1334,8 @@ EXPORT_SYMBOL(st_accel_get_settings);
 int st_accel_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *adata = iio_priv(indio_dev);
-	struct st_sensors_platform_data *pdata = dev_get_platdata(adata->dev);
 	struct device *parent = indio_dev->dev.parent;
+	struct st_sensors_platform_data *pdata = dev_get_platdata(parent);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -1355,7 +1355,7 @@ int st_accel_common_probe(struct iio_dev *indio_dev)
 	 */
 	err = apply_acpi_orientation(indio_dev);
 	if (err) {
-		err = iio_read_mount_matrix(adata->dev, &adata->mount_matrix);
+		err = iio_read_mount_matrix(parent, &adata->mount_matrix);
 		if (err)
 			return err;
 	}
diff --git a/drivers/iio/common/st_sensors/st_sensors_i2c.c b/drivers/iio/common/st_sensors/st_sensors_i2c.c
index b3ff88700866..18bd3c3d99bc 100644
--- a/drivers/iio/common/st_sensors/st_sensors_i2c.c
+++ b/drivers/iio/common/st_sensors/st_sensors_i2c.c
@@ -57,7 +57,6 @@ int st_sensors_i2c_configure(struct iio_dev *indio_dev,
 
 	indio_dev->name = client->name;
 
-	sdata->dev = &client->dev;
 	sdata->irq = client->irq;
 
 	return 0;
diff --git a/drivers/iio/common/st_sensors/st_sensors_spi.c b/drivers/iio/common/st_sensors/st_sensors_spi.c
index 0d1d66c77cd8..7c60050e90dc 100644
--- a/drivers/iio/common/st_sensors/st_sensors_spi.c
+++ b/drivers/iio/common/st_sensors/st_sensors_spi.c
@@ -109,7 +109,6 @@ int st_sensors_spi_configure(struct iio_dev *indio_dev,
 
 	indio_dev->name = spi->modalias;
 
-	sdata->dev = &spi->dev;
 	sdata->irq = spi->irq;
 
 	return 0;
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index d022157b66a2..392d74449886 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -42,7 +42,8 @@ static bool st_sensors_new_samples_available(struct iio_dev *indio_dev,
 			  sdata->sensor_settings->drdy_irq.stat_drdy.addr,
 			  &status);
 	if (ret < 0) {
-		dev_err(sdata->dev, "error checking samples available\n");
+		dev_err(indio_dev->dev.parent,
+			"error checking samples available\n");
 		return false;
 	}
 
@@ -87,7 +88,7 @@ static irqreturn_t st_sensors_irq_thread(int irq, void *p)
 	    st_sensors_new_samples_available(indio_dev, sdata)) {
 		iio_trigger_poll_chained(p);
 	} else {
-		dev_dbg(sdata->dev, "spurious IRQ\n");
+		dev_dbg(indio_dev->dev.parent, "spurious IRQ\n");
 		return IRQ_NONE;
 	}
 
@@ -107,7 +108,8 @@ static irqreturn_t st_sensors_irq_thread(int irq, void *p)
 	 */
 	while (sdata->hw_irq_trigger &&
 	       st_sensors_new_samples_available(indio_dev, sdata)) {
-		dev_dbg(sdata->dev, "more samples came in during polling\n");
+		dev_dbg(indio_dev->dev.parent,
+			"more samples came in during polling\n");
 		sdata->hw_timestamp = iio_get_time_ns(indio_dev);
 		iio_trigger_poll_chained(p);
 	}
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 3609082a6778..201050b76fe5 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -492,7 +492,7 @@ int st_gyro_common_probe(struct iio_dev *indio_dev)
 	indio_dev->channels = gdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
-	err = iio_read_mount_matrix(gdata->dev, &gdata->mount_matrix);
+	err = iio_read_mount_matrix(parent, &gdata->mount_matrix);
 	if (err)
 		return err;
 
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
index d276f663fe57..b3a43a3b04ff 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
@@ -90,7 +90,6 @@ static int st_lsm9ds0_probe_accel(struct st_lsm9ds0 *lsm9ds0, struct regmap *reg
 
 	data = iio_priv(lsm9ds0->accel);
 	data->sensor_settings = (struct st_sensor_settings *)settings;
-	data->dev = dev;
 	data->irq = lsm9ds0->irq;
 	data->regmap = regmap;
 	data->vdd = lsm9ds0->vdd;
@@ -119,7 +118,6 @@ static int st_lsm9ds0_probe_magn(struct st_lsm9ds0 *lsm9ds0, struct regmap *regm
 
 	data = iio_priv(lsm9ds0->magn);
 	data->sensor_settings = (struct st_sensor_settings *)settings;
-	data->dev = dev;
 	data->irq = lsm9ds0->irq;
 	data->regmap = regmap;
 	data->vdd = lsm9ds0->vdd;
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 1458906a3765..0806a1e65ce4 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -611,8 +611,8 @@ EXPORT_SYMBOL(st_magn_get_settings);
 int st_magn_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *mdata = iio_priv(indio_dev);
-	struct st_sensors_platform_data *pdata = dev_get_platdata(mdata->dev);
 	struct device *parent = indio_dev->dev.parent;
+	struct st_sensors_platform_data *pdata = dev_get_platdata(parent);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -626,7 +626,7 @@ int st_magn_common_probe(struct iio_dev *indio_dev)
 	indio_dev->channels = mdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
-	err = iio_read_mount_matrix(mdata->dev, &mdata->mount_matrix);
+	err = iio_read_mount_matrix(parent, &mdata->mount_matrix);
 	if (err)
 		return err;
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index cebcc1d93d0b..26a1ee43d56e 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -677,8 +677,8 @@ EXPORT_SYMBOL(st_press_get_settings);
 int st_press_common_probe(struct iio_dev *indio_dev)
 {
 	struct st_sensor_data *press_data = iio_priv(indio_dev);
-	struct st_sensors_platform_data *pdata = dev_get_platdata(press_data->dev);
 	struct device *parent = indio_dev->dev.parent;
+	struct st_sensors_platform_data *pdata = dev_get_platdata(parent);
 	int err;
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index d17ae1e5ca19..22f67845cdd3 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -220,7 +220,6 @@ struct st_sensor_settings {
 
 /**
  * struct st_sensor_data - ST sensor device status
- * @dev: Pointer to instance of struct device (I2C or SPI).
  * @trig: The trigger in use by the core driver.
  * @mount_matrix: The mounting matrix of the sensor.
  * @sensor_settings: Pointer to the specific sensor settings in use.
@@ -240,7 +239,6 @@ struct st_sensor_settings {
  * @buffer_data: Data used by buffer part.
  */
 struct st_sensor_data {
-	struct device *dev;
 	struct iio_trigger *trig;
 	struct iio_mount_matrix mount_matrix;
 	struct st_sensor_settings *sensor_settings;
-- 
cgit v1.2.3


From 8fb0f47a9d7acf620d0fd97831b69da9bc5e22ed Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 10 Sep 2021 11:18:36 -0600
Subject: iov_iter: add helper to save iov_iter state

In an ideal world, when someone is passed an iov_iter and returns X bytes,
then X bytes would have been consumed/advanced from the iov_iter. But we
have use cases that always consume the entire iterator, a few examples
of that are iomap and bdev O_DIRECT. This means we cannot rely on the
state of the iov_iter once we've called ->read_iter() or ->write_iter().

This would be easier if we didn't always have to deal with truncate of
the iov_iter, as rewinding would be trivial without that. We recently
added a commit to track the truncate state, but that grew the iov_iter
by 8 bytes and wasn't the best solution.

Implement a helper to save enough of the iov_iter state to sanely restore
it after we've called the read/write iterator helpers. This currently
only works for IOVEC/BVEC/KVEC as that's all we need, support for other
iterator types are left as an exercise for the reader.

Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wiacKV4Gh-MYjteU0LwNBSGpWrK-Ov25HdqB1ewinrFPg@mail.gmail.com/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h | 15 +++++++++++++++
 lib/iov_iter.c      | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 5265024e8b90..984c4ab74859 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -27,6 +27,12 @@ enum iter_type {
 	ITER_DISCARD,
 };
 
+struct iov_iter_state {
+	size_t iov_offset;
+	size_t count;
+	unsigned long nr_segs;
+};
+
 struct iov_iter {
 	u8 iter_type;
 	bool data_source;
@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 	return i->iter_type;
 }
 
+static inline void iov_iter_save_state(struct iov_iter *iter,
+				       struct iov_iter_state *state)
+{
+	state->iov_offset = iter->iov_offset;
+	state->count = iter->count;
+	state->nr_segs = iter->nr_segs;
+}
+
 static inline bool iter_is_iovec(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_IOVEC;
@@ -233,6 +247,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
 
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f2d50d69a6c3..755c10c5138c 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1972,3 +1972,39 @@ int import_single_range(int rw, void __user *buf, size_t len,
 	return 0;
 }
 EXPORT_SYMBOL(import_single_range);
+
+/**
+ * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
+ *     iov_iter_save_state() was called.
+ *
+ * @i: &struct iov_iter to restore
+ * @state: state to restore from
+ *
+ * Used after iov_iter_save_state() to bring restore @i, if operations may
+ * have advanced it.
+ *
+ * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
+ */
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
+{
+	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
+			 !iov_iter_is_kvec(i))
+		return;
+	i->iov_offset = state->iov_offset;
+	i->count = state->count;
+	/*
+	 * For the *vec iters, nr_segs + iov is constant - if we increment
+	 * the vec, then we also decrement the nr_segs count. Hence we don't
+	 * need to track both of these, just one is enough and we can deduct
+	 * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
+	 * size, so we can just increment the iov pointer as they are unionzed.
+	 * ITER_BVEC _may_ be the same size on some archs, but on others it is
+	 * not. Be safe and handle it separately.
+	 */
+	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
+	if (iov_iter_is_bvec(i))
+		i->bvec -= state->nr_segs - i->nr_segs;
+	else
+		i->iov -= state->nr_segs - i->nr_segs;
+	i->nr_segs = state->nr_segs;
+}
-- 
cgit v1.2.3


From 2935662449dfa4467d2e769a70801c608fd510c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 13 Sep 2021 07:41:10 +0200
Subject: kernfs: remove kernfs_create_file and kernfs_create_file_ns

All callers actually use __kernfs_create_file.

Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210913054121.616001-3-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 24 ------------------------
 1 file changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 1093abf7c28c..cecfeedb7361 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -568,30 +568,6 @@ kernfs_create_dir(struct kernfs_node *parent, const char *name, umode_t mode,
 				    priv, NULL);
 }
 
-static inline struct kernfs_node *
-kernfs_create_file_ns(struct kernfs_node *parent, const char *name,
-		      umode_t mode, kuid_t uid, kgid_t gid,
-		      loff_t size, const struct kernfs_ops *ops,
-		      void *priv, const void *ns)
-{
-	struct lock_class_key *key = NULL;
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	key = (struct lock_class_key *)&ops->lockdep_key;
-#endif
-	return __kernfs_create_file(parent, name, mode, uid, gid,
-				    size, ops, priv, ns, key);
-}
-
-static inline struct kernfs_node *
-kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode,
-		   loff_t size, const struct kernfs_ops *ops, void *priv)
-{
-	return kernfs_create_file_ns(parent, name, mode,
-				     GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
-				     size, ops, priv, NULL);
-}
-
 static inline int kernfs_remove_by_name(struct kernfs_node *parent,
 					const char *name)
 {
-- 
cgit v1.2.3


From eaf501e0d8af691e532b6b9aee511659cf5ee00c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 13 Sep 2021 07:41:11 +0200
Subject: kernfs: remove the unused lockdep_key field in struct kernfs_ops

Not actually used anywhere.

Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210913054121.616001-4-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index cecfeedb7361..3ccce6f24548 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -269,10 +269,6 @@ struct kernfs_ops {
 			 struct poll_table_struct *pt);
 
 	int (*mmap)(struct kernfs_open_file *of, struct vm_area_struct *vma);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-	struct lock_class_key	lockdep_key;
-#endif
 };
 
 /*
-- 
cgit v1.2.3


From a2aec2c86ef0cad0fd6be718cfeb5cf5eefbfca9 Mon Sep 17 00:00:00 2001
From: "GONG, Ruiqi" <gongruiqi1@huawei.com>
Date: Mon, 30 Aug 2021 16:33:56 +0800
Subject: mtd: Remove obsolete macros only used by the old nand_ecclayout
 struct

All uses of MTD_MAX_{OOBFREE,ECCPOS}_ENTRIES_LARGE have been removed as
commit ef5eeea6e911 ("mtd: nand: brcm: switch to mtd_ooblayout_ops") and
commit aab616e31d1c ("mtd: kill the nand_ecclayout struct") replaced
struct nand_ecclayout by the new mtd_ooblayout_ops interface. Remove
these two macros therefore.

Reported-by: Yi Yang <yiyang13@huawei.com>
Signed-off-by: GONG, Ruiqi <gongruiqi1@huawei.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20210830083356.31702-1-gongruiqi1@huawei.com
---
 include/linux/mtd/mtd.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 88227044fc86..f5e7dfc2e4e9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -72,8 +72,6 @@ struct mtd_oob_ops {
 	uint8_t		*oobbuf;
 };
 
-#define MTD_MAX_OOBFREE_ENTRIES_LARGE	32
-#define MTD_MAX_ECCPOS_ENTRIES_LARGE	640
 /**
  * struct mtd_oob_region - oob region definition
  * @offset: region offset
-- 
cgit v1.2.3


From 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Tue, 14 Sep 2021 10:33:51 +0800
Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog

Currently if a function ptr in struct_ops has a return value, its
caller will get a random return value from it, because the return
value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped.

So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline
to save and return the return value of struct_ops prog if ret_size of
the function ptr is greater than 0. Also restricting the flag to be
used alone.

Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com
---
 arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++++++++-----------
 include/linux/bpf.h         |  3 ++-
 kernel/bpf/bpf_struct_ops.c |  7 ++++--
 3 files changed, 47 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0fe6aacef3db..d24a512fd6f3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
 }
 
 static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
-			   struct bpf_prog *p, int stack_size, bool mod_ret)
+			   struct bpf_prog *p, int stack_size, bool save_ret)
 {
 	u8 *prog = *pprog;
 	u8 *jmp_insn;
@@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	if (emit_call(&prog, p->bpf_func, prog))
 		return -EINVAL;
 
-	/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
+	/*
+	 * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
 	 * of the previous call which is then passed on the stack to
 	 * the next BPF program.
+	 *
+	 * BPF_TRAMP_FENTRY trampoline may need to return the return
+	 * value of BPF_PROG_TYPE_STRUCT_OPS prog.
 	 */
-	if (mod_ret)
+	if (save_ret)
 		emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
 
 	/* replace 2 nops with JE insn, since jmp target is known */
@@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
 }
 
 static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
-		      struct bpf_tramp_progs *tp, int stack_size)
+		      struct bpf_tramp_progs *tp, int stack_size,
+		      bool save_ret)
 {
 	int i;
 	u8 *prog = *pprog;
 
 	for (i = 0; i < tp->nr_progs; i++) {
-		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
+		if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
+				    save_ret))
 			return -EINVAL;
 	}
 	*pprog = prog;
@@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
 	return 0;
 }
 
+static bool is_valid_bpf_tramp_flags(unsigned int flags)
+{
+	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+		return false;
+
+	/*
+	 * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
+	 * and it must be used alone.
+	 */
+	if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
+	    (flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
+		return false;
+
+	return true;
+}
+
 /* Example:
  * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
  * its 'struct btf_func_model' will be nr_args=2
@@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
 	u8 **branches = NULL;
 	u8 *prog;
+	bool save_ret;
 
 	/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
 	if (nr_args > 6)
 		return -ENOTSUPP;
 
-	if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
-	    (flags & BPF_TRAMP_F_SKIP_FRAME))
+	if (!is_valid_bpf_tramp_flags(flags))
 		return -EINVAL;
 
-	if (flags & BPF_TRAMP_F_CALL_ORIG)
-		stack_size += 8; /* room for return value of orig_call */
+	/* room for return value of orig_call or fentry prog */
+	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+	if (save_ret)
+		stack_size += 8;
 
 	if (flags & BPF_TRAMP_F_IP_ARG)
 		stack_size += 8; /* room for IP address argument */
@@ -2005,7 +2030,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	}
 
 	if (fentry->nr_progs)
-		if (invoke_bpf(m, &prog, fentry, stack_size))
+		if (invoke_bpf(m, &prog, fentry, stack_size,
+			       flags & BPF_TRAMP_F_RET_FENTRY_RET))
 			return -EINVAL;
 
 	if (fmod_ret->nr_progs) {
@@ -2052,7 +2078,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	}
 
 	if (fexit->nr_progs)
-		if (invoke_bpf(m, &prog, fexit, stack_size)) {
+		if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
 			ret = -EINVAL;
 			goto cleanup;
 		}
@@ -2072,9 +2098,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 			ret = -EINVAL;
 			goto cleanup;
 		}
-		/* restore original return value back into RAX */
-		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 	}
+	/* restore return value of orig_call or fentry prog back into RAX */
+	if (save_ret)
+		emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
 
 	EMIT1(0x5B); /* pop rbx */
 	EMIT1(0xC9); /* leave */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f4c16f19f83e..020a7d5bf470 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -578,11 +578,12 @@ struct btf_func_model {
  * programs only. Should not be used with normal calls and indirect calls.
  */
 #define BPF_TRAMP_F_SKIP_FRAME		BIT(2)
-
 /* Store IP address of the caller on the trampoline stack,
  * so it's available for trampoline's programs.
  */
 #define BPF_TRAMP_F_IP_ARG		BIT(3)
+/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
+#define BPF_TRAMP_F_RET_FENTRY_RET	BIT(4)
 
 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
  * bytes on x86.  Pick a number to fit into BPF_IMAGE_SIZE / 2
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index d6731c32864e..9abcc33f02cf 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 		const struct btf_type *mtype, *ptype;
 		struct bpf_prog *prog;
 		u32 moff;
+		u32 flags;
 
 		moff = btf_member_bit_offset(t, member) / 8;
 		ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
@@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
 		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
 		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+		flags = st_ops->func_models[i].ret_size > 0 ?
+			BPF_TRAMP_F_RET_FENTRY_RET : 0;
 		err = arch_prepare_bpf_trampoline(NULL, image,
 						  st_map->image + PAGE_SIZE,
-						  &st_ops->func_models[i], 0,
-						  tprogs, NULL);
+						  &st_ops->func_models[i],
+						  flags, tprogs, NULL);
 		if (err < 0)
 			goto reset_unlock;
 
-- 
cgit v1.2.3


From 77e02cf57b6cff9919949defb7fd9b8ac16399a2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 14 Sep 2021 13:23:22 -0700
Subject: memblock: introduce saner 'memblock_free_ptr()' interface

The boot-time allocation interface for memblock is a mess, with
'memblock_alloc()' returning a virtual pointer, but then you are
supposed to free it with 'memblock_free()' that takes a _physical_
address.

Not only is that all kinds of strange and illogical, but it actually
causes bugs, when people then use it like a normal allocation function,
and it fails spectacularly on a NULL pointer:

   https://lore.kernel.org/all/20210912140820.GD25450@xsang-OptiPlex-9020/

or just random memory corruption if the debug checks don't catch it:

   https://lore.kernel.org/all/61ab2d0c-3313-aaab-514c-e15b7aa054a0@suse.cz/

I really don't want to apply patches that treat the symptoms, when the
fundamental cause is this horribly confusing interface.

I started out looking at just automating a sane replacement sequence,
but because of this mix or virtual and physical addresses, and because
people have used the "__pa()" macro that can take either a regular
kernel pointer, or just the raw "unsigned long" address, it's all quite
messy.

So this just introduces a new saner interface for freeing a virtual
address that was allocated using 'memblock_alloc()', and that was kept
as a regular kernel pointer.  And then it converts a couple of users
that are obvious and easy to test, including the 'xbc_nodes' case in
lib/bootconfig.c that caused problems.

Reported-by: kernel test robot <oliver.sang@intel.com>
Fixes: 40caa127f3c7 ("init: bootconfig: Remove all bootconfig data when the init memory is removed")
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup_percpu.c |  2 +-
 arch/x86/mm/kasan_init_64.c    |  6 ++----
 arch/x86/mm/numa.c             |  2 +-
 arch/x86/mm/numa_emulation.c   |  3 +--
 drivers/base/arch_numa.c       |  2 +-
 drivers/macintosh/smu.c        |  2 +-
 include/linux/memblock.h       |  1 +
 init/main.c                    |  2 +-
 kernel/printk/printk.c         |  4 ++--
 lib/bootconfig.c               |  2 +-
 mm/memblock.c                  | 16 +++++++++++++++-
 11 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 78a32b956e81..5afd98559193 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_free_ptr(ptr, size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 1a50434c8a4d..ef885370719a 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -49,8 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 			p = early_alloc(PMD_SIZE, nid, false);
 			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
 				return;
-			else if (p)
-				memblock_free(__pa(p), PMD_SIZE);
+			memblock_free_ptr(p, PMD_SIZE);
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
@@ -86,8 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
 			p = early_alloc(PUD_SIZE, nid, false);
 			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
 				return;
-			else if (p)
-				memblock_free(__pa(p), PUD_SIZE);
+			memblock_free_ptr(p, PUD_SIZE);
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index a1b5c71099e6..1e9b93b088db 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -355,7 +355,7 @@ void __init numa_reset_distance(void)
 
 	/* numa_distance could be 1LU marking allocation failure, test cnt */
 	if (numa_distance_cnt)
-		memblock_free(__pa(numa_distance), size);
+		memblock_free_ptr(numa_distance, size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;	/* enable table creation */
 }
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index 737491b13728..e801e30089c4 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -517,8 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	}
 
 	/* free the copied physical distance table */
-	if (phys_dist)
-		memblock_free(__pa(phys_dist), phys_size);
+	memblock_free_ptr(phys_dist, phys_size);
 	return;
 
 no_emu:
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 46c503486e96..00fb4120a5b3 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -264,7 +264,7 @@ void __init numa_free_distance(void)
 	size = numa_distance_cnt * numa_distance_cnt *
 		sizeof(numa_distance[0]);
 
-	memblock_free(__pa(numa_distance), size);
+	memblock_free_ptr(numa_distance, size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;
 }
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 94fb63a7b357..fe63d5ee201b 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -570,7 +570,7 @@ fail_msg_node:
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
-	memblock_free(__pa(smu), sizeof(struct smu_device));
+	memblock_free_ptr(smu, sizeof(struct smu_device));
 	smu = NULL;
 fail_np:
 	of_node_put(np);
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index b066024c62e3..34de69b3b8ba 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -118,6 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 
 void memblock_free_all(void);
+void memblock_free_ptr(void *ptr, size_t size);
 void reset_node_managed_pages(pg_data_t *pgdat);
 void reset_all_zones_managed_pages(void);
 
diff --git a/init/main.c b/init/main.c
index 5c9a48df90e1..3f7216934441 100644
--- a/init/main.c
+++ b/init/main.c
@@ -924,7 +924,7 @@ static void __init print_unknown_bootoptions(void)
 		end += sprintf(end, " %s", *p);
 
 	pr_notice("Unknown command line parameters:%s\n", unknown_options);
-	memblock_free(__pa(unknown_options), len);
+	memblock_free_ptr(unknown_options, len);
 }
 
 asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 825277e1e742..a8d0a58deebc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early)
 	return;
 
 err_free_descs:
-	memblock_free(__pa(new_descs), new_descs_size);
+	memblock_free_ptr(new_descs, new_descs_size);
 err_free_log_buf:
-	memblock_free(__pa(new_log_buf), new_log_buf_len);
+	memblock_free_ptr(new_log_buf, new_log_buf_len);
 }
 
 static bool __read_mostly ignore_loglevel;
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index f8419cff1147..5ae248b29373 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -792,7 +792,7 @@ void __init xbc_destroy_all(void)
 	xbc_data = NULL;
 	xbc_data_size = 0;
 	xbc_node_num = 0;
-	memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX);
+	memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
 	xbc_nodes = NULL;
 	brace_index = 0;
 }
diff --git a/mm/memblock.c b/mm/memblock.c
index 0ab5a749bfa6..184dcd2e5d99 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -472,7 +472,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
 		kfree(old_array);
 	else if (old_array != memblock_memory_init_regions &&
 		 old_array != memblock_reserved_init_regions)
-		memblock_free(__pa(old_array), old_alloc_size);
+		memblock_free_ptr(old_array, old_alloc_size);
 
 	/*
 	 * Reserve the new array if that comes from the memblock.  Otherwise, we
@@ -795,6 +795,20 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 	return memblock_remove_range(&memblock.memory, base, size);
 }
 
+/**
+ * memblock_free_ptr - free boot memory allocation
+ * @ptr: starting address of the  boot memory allocation
+ * @size: size of the boot memory block in bytes
+ *
+ * Free boot memory block previously allocated by memblock_alloc_xx() API.
+ * The freeing memory will not be released to the buddy allocator.
+ */
+void __init_memblock memblock_free_ptr(void *ptr, size_t size)
+{
+	if (ptr)
+		memblock_free(__pa(ptr), size);
+}
+
 /**
  * memblock_free - free boot memory block
  * @base: phys starting address of the  boot memory block
-- 
cgit v1.2.3


From 67f1e027c27054e641584655020a417eaac9cb3a Mon Sep 17 00:00:00 2001
From: Lukas Prediger <lumip@lumip.de>
Date: Tue, 14 Sep 2021 00:09:42 +0100
Subject: drivers/cdrom: improved ioctl for media change detection

The current implementation of the CDROM_MEDIA_CHANGED ioctl relies on
global state, meaning that only one process can detect a disc change
while the ioctl call will return 0 for other calling processes afterwards
(see bug 213267).

This introduces a new cdrom ioctl, CDROM_TIMED_MEDIA_CHANGE, that
works by maintaining a timestamp of the last detected disc change instead
of a boolean flag: Processes calling this ioctl command can provide
a timestamp of the last disc change known to them and receive
an indication whether the disc was changed since then and the updated
timestamp.

I considered fixing the buggy behavior in the original
CDROM_MEDIA_CHANGED ioctl but that would require maintaining state
for each calling process in the kernel, which seems like a worse
solution than introducing this new ioctl.

Signed-off-by: Lukas Prediger <lumip@lumip.de>
Link: https://lore.kernel.org/all/20210912191207.74449-1-lumip@lumip.de
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20210913230942.1188-1-phil@philpotter.co.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/cdrom/cdrom-standard.rst      | 11 ++++++
 Documentation/userspace-api/ioctl/cdrom.rst |  3 ++
 drivers/cdrom/cdrom.c                       | 59 +++++++++++++++++++++++++++--
 include/linux/cdrom.h                       |  1 +
 include/uapi/linux/cdrom.h                  | 19 ++++++++++
 5 files changed, 89 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst
index 5845960ca382..52ea7b6b2fe8 100644
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -907,6 +907,17 @@ commands can be identified by the underscores in their names.
 	specifies the slot for which the information is given. The special
 	value *CDSL_CURRENT* requests that information about the currently
 	selected slot be returned.
+`CDROM_TIMED_MEDIA_CHANGE`
+	Checks whether the disc has been changed since a user supplied time
+	and returns the time of the last disc change.
+
+	*arg* is a pointer to a *cdrom_timed_media_change_info* struct.
+	*arg->last_media_change* may be set by calling code to signal
+	the timestamp of the last known media change (by the caller).
+	Upon successful return, this ioctl call will set
+	*arg->last_media_change* to the latest media change timestamp (in ms)
+	known by the kernel/driver and set *arg->has_changed* to 1 if
+	that timestamp is more recent than the timestamp set by the caller.
 `CDROM_DRIVE_STATUS`
 	Returns the status of the drive by a call to
 	*drive_status()*. Return values are defined in cdrom_drive_status_.
diff --git a/Documentation/userspace-api/ioctl/cdrom.rst b/Documentation/userspace-api/ioctl/cdrom.rst
index 3b4c0506de46..bac5bbf93ca0 100644
--- a/Documentation/userspace-api/ioctl/cdrom.rst
+++ b/Documentation/userspace-api/ioctl/cdrom.rst
@@ -54,6 +54,9 @@ are as follows:
 	CDROM_SELECT_SPEED	Set the CD-ROM speed
 	CDROM_SELECT_DISC	Select disc (for juke-boxes)
 	CDROM_MEDIA_CHANGED	Check is media changed
+	CDROM_TIMED_MEDIA_CHANGE	Check if media changed
+					since given time
+					(struct cdrom_timed_media_change_info)
 	CDROM_DRIVE_STATUS	Get tray position, etc.
 	CDROM_DISC_STATUS	Get disc type, etc.
 	CDROM_CHANGER_NSLOTS	Get number of slots
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index bd2e5b1560f5..89a68457820a 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -344,6 +344,12 @@ static void cdrom_sysctl_register(void);
 
 static LIST_HEAD(cdrom_list);
 
+static void signal_media_change(struct cdrom_device_info *cdi)
+{
+	cdi->mc_flags = 0x3; /* set media changed bits, on both queues */
+	cdi->last_media_change_ms = ktime_to_ms(ktime_get());
+}
+
 int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi,
 			       struct packet_command *cgc)
 {
@@ -616,6 +622,7 @@ int register_cdrom(struct gendisk *disk, struct cdrom_device_info *cdi)
 	ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET);
 	cdi->mc_flags = 0;
 	cdi->options = CDO_USE_FFLAGS;
+	cdi->last_media_change_ms = ktime_to_ms(ktime_get());
 
 	if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY))
 		cdi->options |= (int) CDO_AUTO_CLOSE;
@@ -1421,8 +1428,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 		cdi->ops->check_events(cdi, 0, slot);
 
 	if (slot == CDSL_NONE) {
-		/* set media changed bits, on both queues */
-		cdi->mc_flags = 0x3;
+		signal_media_change(cdi);
 		return cdrom_load_unload(cdi, -1);
 	}
 
@@ -1455,7 +1461,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot)
 		slot = curslot;
 
 	/* set media changed bits on both queues */
-	cdi->mc_flags = 0x3;
+	signal_media_change(cdi);
 	if ((ret = cdrom_load_unload(cdi, slot)))
 		return ret;
 
@@ -1521,7 +1527,7 @@ int media_changed(struct cdrom_device_info *cdi, int queue)
 	cdi->ioctl_events = 0;
 
 	if (changed) {
-		cdi->mc_flags = 0x3;    /* set bit on both queues */
+		signal_media_change(cdi);
 		ret |= 1;
 		cdi->media_written = 0;
 	}
@@ -2336,6 +2342,49 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi,
 	return ret;
 }
 
+/*
+ * Media change detection with timing information.
+ *
+ * arg is a pointer to a cdrom_timed_media_change_info struct.
+ * arg->last_media_change may be set by calling code to signal
+ * the timestamp (in ms) of the last known media change (by the caller).
+ * Upon successful return, ioctl call will set arg->last_media_change
+ * to the latest media change timestamp known by the kernel/driver
+ * and set arg->has_changed to 1 if that timestamp is more recent
+ * than the timestamp set by the caller.
+ */
+static int cdrom_ioctl_timed_media_change(struct cdrom_device_info *cdi,
+		unsigned long arg)
+{
+	int ret;
+	struct cdrom_timed_media_change_info __user *info;
+	struct cdrom_timed_media_change_info tmp_info;
+
+	if (!CDROM_CAN(CDC_MEDIA_CHANGED))
+		return -ENOSYS;
+
+	info = (struct cdrom_timed_media_change_info __user *)arg;
+	cd_dbg(CD_DO_IOCTL, "entering CDROM_TIMED_MEDIA_CHANGE\n");
+
+	ret = cdrom_ioctl_media_changed(cdi, CDSL_CURRENT);
+	if (ret < 0)
+		return ret;
+
+	if (copy_from_user(&tmp_info, info, sizeof(tmp_info)) != 0)
+		return -EFAULT;
+
+	tmp_info.media_flags = 0;
+	if (tmp_info.last_media_change - cdi->last_media_change_ms < 0)
+		tmp_info.media_flags |= MEDIA_CHANGED_FLAG;
+
+	tmp_info.last_media_change = cdi->last_media_change_ms;
+
+	if (copy_to_user(info, &tmp_info, sizeof(*info)) != 0)
+		return -EFAULT;
+
+	return 0;
+}
+
 static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi,
 		unsigned long arg)
 {
@@ -3313,6 +3362,8 @@ int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev,
 		return cdrom_ioctl_eject_sw(cdi, arg);
 	case CDROM_MEDIA_CHANGED:
 		return cdrom_ioctl_media_changed(cdi, arg);
+	case CDROM_TIMED_MEDIA_CHANGE:
+		return cdrom_ioctl_timed_media_change(cdi, arg);
 	case CDROM_SET_OPTIONS:
 		return cdrom_ioctl_set_options(cdi, arg);
 	case CDROM_CLEAR_OPTIONS:
diff --git a/include/linux/cdrom.h b/include/linux/cdrom.h
index c4fef00abdf3..0a89f111e00e 100644
--- a/include/linux/cdrom.h
+++ b/include/linux/cdrom.h
@@ -64,6 +64,7 @@ struct cdrom_device_info {
 	int for_data;
 	int (*exit)(struct cdrom_device_info *);
 	int mrw_mode_page;
+	__s64 last_media_change_ms;
 };
 
 struct cdrom_device_ops {
diff --git a/include/uapi/linux/cdrom.h b/include/uapi/linux/cdrom.h
index 6c34f6e2f1f7..804ff8d98f71 100644
--- a/include/uapi/linux/cdrom.h
+++ b/include/uapi/linux/cdrom.h
@@ -147,6 +147,8 @@
 #define CDROM_NEXT_WRITABLE	0x5394	/* get next writable block */
 #define CDROM_LAST_WRITTEN	0x5395	/* get last block written on disc */
 
+#define CDROM_TIMED_MEDIA_CHANGE   0x5396  /* get the timestamp of the last media change */
+
 /*******************************************************
  * CDROM IOCTL structures
  *******************************************************/
@@ -295,6 +297,23 @@ struct cdrom_generic_command
 	};
 };
 
+/* This struct is used by CDROM_TIMED_MEDIA_CHANGE */
+struct cdrom_timed_media_change_info {
+	__s64	last_media_change;	/* Timestamp of the last detected media
+					 * change in ms. May be set by caller,
+					 * updated upon successful return of
+					 * ioctl.
+					 */
+	__u64	media_flags;		/* Flags returned by ioctl to indicate
+					 * media status.
+					 */
+};
+#define MEDIA_CHANGED_FLAG	0x1	/* Last detected media change was more
+					 * recent than last_media_change set by
+					 * caller.
+					 */
+/* other bits of media_flags available for future use */
+
 /*
  * A CD-ROM physical sector size is 2048, 2052, 2056, 2324, 2332, 2336, 
  * 2340, or 2352 bytes long.  
-- 
cgit v1.2.3


From e25b694bf1d9ef4a3f36c0b85348f8e780f22139 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 24 Jun 2021 11:41:05 +0200
Subject: x86: Always inline context_tracking_guest_enter()

Yes, it really did out-of-line this....

vmlinux.o: warning: objtool: vmx_vcpu_enter_exit()+0x31: call to context_tracking_guest_enter() leaves .noinstr.text section

000000000019f660 <context_tracking_guest_enter>:
  19f660:	e8 00 00 00 00       	callq  19f665 <context_tracking_guest_enter+0x5>	19f661: R_X86_64_PLT32	__sanitizer_cov_trace_pc-0x4
  19f665:	31 c0                	xor    %eax,%eax
  19f667:	c3                   	retq

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210624095148.003928226@infradead.org
---
 include/linux/context_tracking.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index 4d7fced3a39f..7a14807c9d1a 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -105,7 +105,7 @@ static inline void user_exit_irqoff(void) { }
 static inline enum ctx_state exception_enter(void) { return 0; }
 static inline void exception_exit(enum ctx_state prev_ctx) { }
 static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
-static inline bool context_tracking_guest_enter(void) { return false; }
+static __always_inline bool context_tracking_guest_enter(void) { return false; }
 static inline void context_tracking_guest_exit(void) { }
 
 #endif /* !CONFIG_CONTEXT_TRACKING */
-- 
cgit v1.2.3


From 7dedd3e18077f996a10c47250ac85d080e5f474e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 10 Sep 2021 11:19:58 -0600
Subject: Revert "iov_iter: track truncated size"

This reverts commit 2112ff5ce0c1128fe7b4d19cfe7f2b8ce5b595fa.

We no longer need to track the truncation count, the one user that did
need it has been converted to using iov_iter_restore() instead.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 984c4ab74859..207101a9c5c3 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -53,7 +53,6 @@ struct iov_iter {
 		};
 		loff_t xarray_start;
 	};
-	size_t truncated;
 };
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
@@ -270,10 +269,8 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
 	 * conversion in assignement is by definition greater than all
 	 * values of size_t, including old i->count.
 	 */
-	if (i->count > count) {
-		i->truncated += i->count - count;
+	if (i->count > count)
 		i->count = count;
-	}
 }
 
 /*
@@ -282,7 +279,6 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
  */
 static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
-	i->truncated -= count - i->count;
 	i->count = count;
 }
 
-- 
cgit v1.2.3


From a5c071ccfa1728508f31e61213ee795e4529d0d4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 28 Jul 2021 12:28:27 -0700
Subject: rcu-tasks: Remove second argument of rcu_read_unlock_trace_special()

The second argument of rcu_read_unlock_trace_special() is always zero.
When called from exit_tasks_rcu_finish_trace(), it is the constant
zero, and rcu_read_unlock_trace_special() doesn't get called from
rcu_read_unlock_trace() unless the value of local variable "nesting"
is zero because in that case the early return is taken instead.

This commit therefore removes the "nesting" argument from the
rcu_read_unlock_trace_special() function, substituting the constant
zero within that function.  This commit also adds a WARN_ON_ONCE()
to rcu_read_lock_trace_held() in case non-zeroness some day appears.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate_trace.h | 5 +++--
 kernel/rcu/tasks.h             | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 86c8f6c98412..6f9c35817398 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -31,7 +31,7 @@ static inline int rcu_read_lock_trace_held(void)
 
 #ifdef CONFIG_TASKS_TRACE_RCU
 
-void rcu_read_unlock_trace_special(struct task_struct *t, int nesting);
+void rcu_read_unlock_trace_special(struct task_struct *t);
 
 /**
  * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
@@ -80,7 +80,8 @@ static inline void rcu_read_unlock_trace(void)
 		WRITE_ONCE(t->trc_reader_nesting, nesting);
 		return;  // We assume shallow reader nesting.
 	}
-	rcu_read_unlock_trace_special(t, nesting);
+	WARN_ON_ONCE(nesting != 0);
+	rcu_read_unlock_trace_special(t);
 }
 
 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 8387e70e6b00..a3f4f9bd8c67 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -848,7 +848,7 @@ static void rcu_read_unlock_iw(struct irq_work *iwp)
 static DEFINE_IRQ_WORK(rcu_tasks_trace_iw, rcu_read_unlock_iw);
 
 /* If we are the last reader, wake up the grace-period kthread. */
-void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
+void rcu_read_unlock_trace_special(struct task_struct *t)
 {
 	int nq = READ_ONCE(t->trc_reader_special.b.need_qs);
 
@@ -858,7 +858,7 @@ void rcu_read_unlock_trace_special(struct task_struct *t, int nesting)
 	// Update .need_qs before ->trc_reader_nesting for irq/NMI handlers.
 	if (nq)
 		WRITE_ONCE(t->trc_reader_special.b.need_qs, false);
-	WRITE_ONCE(t->trc_reader_nesting, nesting);
+	WRITE_ONCE(t->trc_reader_nesting, 0);
 	if (nq && atomic_dec_and_test(&trc_n_readers_need_end))
 		irq_work_queue(&rcu_tasks_trace_iw);
 }
@@ -1200,7 +1200,7 @@ static void exit_tasks_rcu_finish_trace(struct task_struct *t)
 	WARN_ON_ONCE(READ_ONCE(t->trc_reader_nesting));
 	WRITE_ONCE(t->trc_reader_nesting, 0);
 	if (WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs)))
-		rcu_read_unlock_trace_special(t, 0);
+		rcu_read_unlock_trace_special(t);
 }
 
 /**
-- 
cgit v1.2.3


From f6b5f1a56987de837f8e25cd560847106b8632a8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 14 Sep 2021 20:52:24 -0700
Subject: compiler.h: Introduce absolute_pointer macro

absolute_pointer() disassociates a pointer from its originating symbol
type and context. Use it to prevent compiler warnings/errors such as

  drivers/net/ethernet/i825xx/82596.c: In function 'i82596_probe':
  arch/m68k/include/asm/string.h:72:25: error:
	'__builtin_memcpy' reading 6 bytes from a region of size 0 [-Werror=stringop-overread]

Such warnings may be reported by gcc 11.x for string and memory
operations on fixed addresses.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b67261a1e3e9..3d5af56337bd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -188,6 +188,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
     (typeof(ptr)) (__ptr + (off)); })
 #endif
 
+#define absolute_pointer(val)	RELOC_HIDE((void *)(val), 0)
+
 #ifndef OPTIMIZER_HIDE_VAR
 /* Make the optimizer believe the variable can be manipulated arbitrarily. */
 #define OPTIMIZER_HIDE_VAR(var)						\
-- 
cgit v1.2.3


From 925da92ba5cb0c82d07cdd5049a07e40f54e9c44 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 26 Aug 2021 22:21:22 -0400
Subject: rcu: Avoid unneeded function call in rcu_read_unlock()

Since commit aa40c138cc8f3 ("rcu: Report QS for outermost PREEMPT=n
rcu_read_unlock() for strict GPs") the function rcu_read_unlock_strict()
is invoked by the inlined rcu_read_unlock() function.  However,
rcu_read_unlock_strict() is an empty function in production kernels,
which are built with CONFIG_RCU_STRICT_GRACE_PERIOD=n.

There is a mention of rcu_read_unlock_strict() in the BPF verifier,
but this is in a deny-list, meaning that BPF does not care whether
rcu_read_unlock_strict() is ever called.

This commit therefore provides a slight performance improvement
by hoisting the check of CONFIG_RCU_STRICT_GRACE_PERIOD from
rcu_read_unlock_strict() into rcu_read_unlock(), thus avoiding the
pointless call to an empty function.

Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 include/linux/rcupdate.h | 3 ++-
 kernel/rcu/tree_plugin.h | 3 +--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 434d12fe2d4f..5e0beb5c5659 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -71,7 +71,8 @@ static inline void __rcu_read_lock(void)
 static inline void __rcu_read_unlock(void)
 {
 	preempt_enable();
-	rcu_read_unlock_strict();
+	if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
+		rcu_read_unlock_strict();
 }
 
 static inline int rcu_preempt_depth(void)
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d070059163d7..1a6fdb03d0a5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -814,8 +814,7 @@ void rcu_read_unlock_strict(void)
 {
 	struct rcu_data *rdp;
 
-	if (!IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
-	   irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
+	if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
 		return;
 	rdp = this_cpu_ptr(&rcu_data);
 	rcu_report_qs_rdp(rdp);
-- 
cgit v1.2.3


From 8dc84dcd7f74b50f81de3dbf6f6b5b146e3a8eea Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 16 Sep 2021 14:27:41 -0700
Subject: net: phy: broadcom: Enable 10BaseT DAC early wake

Enable the DAC early wake when then link operates at 10BaseT allows
power savings in the hundreds of milli Watts by shutting down the
transmitter. A number of errata have been issued for various Gigabit
PHYs and the recommendation is to enable both the early and forced DAC
wake to be on the safe side. This needs to be done dynamically based
upon the link state, which is why a link_change_notify callback is
utilized.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210916212742.1653088-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/broadcom.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h    |  1 +
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 83aea5c5cd03..add0c4e33425 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -702,6 +702,36 @@ static void bcm54xx_get_stats(struct phy_device *phydev,
 	bcm_phy_get_stats(phydev, priv->stats, stats, data);
 }
 
+static void bcm54xx_link_change_notify(struct phy_device *phydev)
+{
+	u16 mask = MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE |
+		   MII_BCM54XX_EXP_EXP08_FORCE_DAC_WAKE;
+	int ret;
+
+	if (phydev->state != PHY_RUNNING)
+		return;
+
+	/* Don't change the DAC wake settings if auto power down
+	 * is not requested.
+	 */
+	if (!(phydev->dev_flags & PHY_BRCM_AUTO_PWRDWN_ENABLE))
+		return;
+
+	ret = bcm_phy_read_exp(phydev, MII_BCM54XX_EXP_EXP08);
+	if (ret < 0)
+		return;
+
+	/* Enable/disable 10BaseT auto and forced early DAC wake depending
+	 * on the negotiated speed, those settings should only be done
+	 * for 10Mbits/sec.
+	 */
+	if (phydev->speed == SPEED_10)
+		ret |= mask;
+	else
+		ret &= ~mask;
+	bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_EXP08, ret);
+}
+
 static struct phy_driver broadcom_drivers[] = {
 {
 	.phy_id		= PHY_ID_BCM5411,
@@ -715,6 +745,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM5421,
 	.phy_id_mask	= 0xfffffff0,
@@ -727,6 +758,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM54210E,
 	.phy_id_mask	= 0xfffffff0,
@@ -739,6 +771,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM5461,
 	.phy_id_mask	= 0xfffffff0,
@@ -751,6 +784,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM54612E,
 	.phy_id_mask	= 0xfffffff0,
@@ -763,6 +797,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM54616S,
 	.phy_id_mask	= 0xfffffff0,
@@ -774,6 +809,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.read_status	= bcm54616s_read_status,
 	.probe		= bcm54616s_probe,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM5464,
 	.phy_id_mask	= 0xfffffff0,
@@ -788,6 +824,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM5481,
 	.phy_id_mask	= 0xfffffff0,
@@ -801,6 +838,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_aneg	= bcm5481_config_aneg,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id         = PHY_ID_BCM54810,
 	.phy_id_mask    = 0xfffffff0,
@@ -816,6 +854,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.suspend	= genphy_suspend,
 	.resume		= bcm54xx_resume,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id         = PHY_ID_BCM54811,
 	.phy_id_mask    = 0xfffffff0,
@@ -831,6 +870,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.suspend	= genphy_suspend,
 	.resume		= bcm54xx_resume,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM5482,
 	.phy_id_mask	= 0xfffffff0,
@@ -843,6 +883,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM50610,
 	.phy_id_mask	= 0xfffffff0,
@@ -855,6 +896,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM50610M,
 	.phy_id_mask	= 0xfffffff0,
@@ -867,6 +909,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM57780,
 	.phy_id_mask	= 0xfffffff0,
@@ -879,6 +922,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCMAC131,
 	.phy_id_mask	= 0xfffffff0,
@@ -905,6 +949,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.get_strings	= bcm_phy_get_strings,
 	.get_stats	= bcm54xx_get_stats,
 	.probe		= bcm54xx_phy_probe,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id		= PHY_ID_BCM53125,
 	.phy_id_mask	= 0xfffffff0,
@@ -918,6 +963,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init	= bcm54xx_config_init,
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 }, {
 	.phy_id         = PHY_ID_BCM89610,
 	.phy_id_mask    = 0xfffffff0,
@@ -930,6 +976,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_init    = bcm54xx_config_init,
 	.config_intr    = bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
+	.link_change_notify	= bcm54xx_link_change_notify,
 } };
 
 module_phy_driver(broadcom_drivers);
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index c2c2147dfeb8..3308cebe1c19 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -233,6 +233,7 @@
 #define MII_BCM54XX_EXP_EXP08			0x0F08
 #define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
 #define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
+#define  MII_BCM54XX_EXP_EXP08_FORCE_DAC_WAKE	0x0100
 #define MII_BCM54XX_EXP_EXP75			0x0f75
 #define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
 #define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
-- 
cgit v1.2.3


From 3c9cfb5269f76d447dbadb67835368f3111a91d7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 14:17:35 +0300
Subject: net: update NXP copyright text

NXP Legal insists that the following are not fine:

- Saying "NXP Semiconductors" instead of "NXP", since the company's
  registered name is "NXP"

- Putting a "(c)" sign in the copyright string

- Putting a comma in the copyright string

The only accepted copyright string format is "Copyright <year-range> NXP".

This patch changes the copyright headers in the networking files that
were sent by me, or derived from code sent by me.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c                                 | 2 +-
 drivers/net/dsa/ocelot/felix.h                                 | 2 +-
 drivers/net/dsa/ocelot/felix_vsc9959.c                         | 2 +-
 drivers/net/dsa/sja1105/sja1105_clocking.c                     | 2 +-
 drivers/net/dsa/sja1105/sja1105_devlink.c                      | 2 +-
 drivers/net/dsa/sja1105/sja1105_flower.c                       | 2 +-
 drivers/net/dsa/sja1105/sja1105_mdio.c                         | 2 +-
 drivers/net/dsa/sja1105/sja1105_spi.c                          | 2 +-
 drivers/net/dsa/sja1105/sja1105_static_config.c                | 2 +-
 drivers/net/dsa/sja1105/sja1105_static_config.h                | 2 +-
 drivers/net/dsa/sja1105/sja1105_vl.c                           | 2 +-
 drivers/net/dsa/sja1105/sja1105_vl.h                           | 2 +-
 drivers/net/ethernet/freescale/enetc/enetc_ierb.c              | 2 +-
 drivers/net/ethernet/freescale/enetc/enetc_ierb.h              | 2 +-
 drivers/net/ethernet/mscc/ocelot_devlink.c                     | 2 +-
 drivers/net/ethernet/mscc/ocelot_mrp.c                         | 2 +-
 drivers/net/ethernet/mscc/ocelot_net.c                         | 2 +-
 drivers/net/pcs/pcs-xpcs-nxp.c                                 | 2 +-
 include/linux/dsa/ocelot.h                                     | 2 +-
 include/linux/packing.h                                        | 2 +-
 lib/packing.c                                                  | 2 +-
 net/dsa/tag_ocelot.c                                           | 2 +-
 net/dsa/tag_ocelot_8021q.c                                     | 2 +-
 tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh | 2 +-
 24 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 3656e67af789..a3a9636430d6 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019-2021 NXP Semiconductors
+/* Copyright 2019-2021 NXP
  *
  * This is an umbrella module for all network switches that are
  * register-compatible with Ocelot and that perform I/O to their host CPU
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 5854bab43327..54024b6f9498 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
  */
 #ifndef _MSCC_FELIX_H
 #define _MSCC_FELIX_H
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index f966a253d1c7..9e2ac8e46619 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
 /* Copyright 2017 Microsemi Corporation
- * Copyright 2018-2019 NXP Semiconductors
+ * Copyright 2018-2019 NXP
  */
 #include <linux/fsl/enetc_mdio.h>
 #include <soc/mscc/ocelot_qsys.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 387a1f2f161c..5bbf1707f2af 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include <linux/packing.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c
index 05c7f4ca3b1a..0569ff066634 100644
--- a/drivers/net/dsa/sja1105/sja1105_devlink.c
+++ b/drivers/net/dsa/sja1105/sja1105_devlink.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
- * Copyright 2020 NXP Semiconductors
+ * Copyright 2020 NXP
  */
 #include "sja1105.h"
 
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 6c10ffa968ce..72b9b39b0989 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #include "sja1105.h"
 #include "sja1105_vl.h"
diff --git a/drivers/net/dsa/sja1105/sja1105_mdio.c b/drivers/net/dsa/sja1105/sja1105_mdio.c
index 705d3900e43a..215dd17ca790 100644
--- a/drivers/net/dsa/sja1105/sja1105_mdio.c
+++ b/drivers/net/dsa/sja1105/sja1105_mdio.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2021, NXP Semiconductors
+/* Copyright 2021 NXP
  */
 #include <linux/pcs/pcs-xpcs.h>
 #include <linux/of_mdio.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index d60a530d0272..d3c9ad6d39d4 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018, Sensor-Technik Wiedemann GmbH
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 7a422ef4deb6..baba204ad62f 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include "sja1105_static_config.h"
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index bce0f5c03d0b..6a372d5f22ae 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause */
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _SJA1105_STATIC_CONFIG_H
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index ec7b65daec20..6802f4057cc0 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #include <net/tc_act/tc_gate.h>
 #include <linux/dsa/8021q.h>
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h
index 173d78963fed..51fba0dce91a 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.h
+++ b/drivers/net/dsa/sja1105/sja1105_vl.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright 2020, NXP Semiconductors
+/* Copyright 2020 NXP
  */
 #ifndef _SJA1105_VL_H
 #define _SJA1105_VL_H
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
index ee1468e3eaa3..91f02c505028 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
-/* Copyright 2021 NXP Semiconductors
+/* Copyright 2021 NXP
  *
  * The Integrated Endpoint Register Block (IERB) is configured by pre-boot
  * software and is supposed to be to ENETC what a NVRAM is to a 'real' PCIe
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
index b3b774e0998a..c2ce47c4be9f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ierb.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/* Copyright 2021 NXP Semiconductors */
+/* Copyright 2021 NXP */
 
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/net/ethernet/mscc/ocelot_devlink.c b/drivers/net/ethernet/mscc/ocelot_devlink.c
index edafbd37d12c..b8737efd2a85 100644
--- a/drivers/net/ethernet/mscc/ocelot_devlink.c
+++ b/drivers/net/ethernet/mscc/ocelot_devlink.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
  */
 #include <net/devlink.h>
 #include "ocelot.h"
diff --git a/drivers/net/ethernet/mscc/ocelot_mrp.c b/drivers/net/ethernet/mscc/ocelot_mrp.c
index 08b481a93460..4b0941f09f71 100644
--- a/drivers/net/ethernet/mscc/ocelot_mrp.c
+++ b/drivers/net/ethernet/mscc/ocelot_mrp.c
@@ -2,7 +2,7 @@
 /* Microsemi Ocelot Switch driver
  *
  * Copyright (c) 2017, 2019 Microsemi Corporation
- * Copyright 2020-2021 NXP Semiconductors
+ * Copyright 2020-2021 NXP
  */
 
 #include <linux/if_bridge.h>
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index c0c465a4a981..e54b9fb2a97a 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -5,7 +5,7 @@
  * mscc_ocelot_switch_lib.
  *
  * Copyright (c) 2017, 2019 Microsemi Corporation
- * Copyright 2020-2021 NXP Semiconductors
+ * Copyright 2020-2021 NXP
  */
 
 #include <linux/if_bridge.h>
diff --git a/drivers/net/pcs/pcs-xpcs-nxp.c b/drivers/net/pcs/pcs-xpcs-nxp.c
index 984c9f7f16a8..d16fc58cd48d 100644
--- a/drivers/net/pcs/pcs-xpcs-nxp.c
+++ b/drivers/net/pcs/pcs-xpcs-nxp.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2021 NXP Semiconductors
+/* Copyright 2021 NXP
  */
 #include <linux/pcs/pcs-xpcs.h>
 #include "pcs-xpcs.h"
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index c6bc45ae5e03..435777a0073c 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0
- * Copyright 2019-2021 NXP Semiconductors
+ * Copyright 2019-2021 NXP
  */
 
 #ifndef _NET_DSA_TAG_OCELOT_H
diff --git a/include/linux/packing.h b/include/linux/packing.h
index 54667735cc67..8d6571feb95d 100644
--- a/include/linux/packing.h
+++ b/include/linux/packing.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2016-2018, NXP Semiconductors
+ * Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #ifndef _LINUX_PACKING_H
diff --git a/lib/packing.c b/lib/packing.c
index 6ed72dccfdb5..9a72f4bbf0e2 100644
--- a/lib/packing.c
+++ b/lib/packing.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
-/* Copyright (c) 2016-2018, NXP Semiconductors
+/* Copyright 2016-2018 NXP
  * Copyright (c) 2018-2019, Vladimir Oltean <olteanv@gmail.com>
  */
 #include <linux/packing.h>
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..8025ed778d33 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
  */
 #include <linux/dsa/ocelot.h>
 #include <soc/mscc/ocelot.h>
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..59072930cb02 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
  *
  * An implementation of the software-defined tag_8021q.c tagger format, which
  * also preserves full functionality under a vlan_filtering bridge. It does
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
 
 WAIT_TIME=1
 NUM_NETIFS=4
-- 
cgit v1.2.3


From 12235da8c80a1f9909008e4ca6036d5772b81192 Mon Sep 17 00:00:00 2001
From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Date: Thu, 9 Sep 2021 11:32:18 +0200
Subject: kernel/locking: Add context to ww_mutex_trylock()

i915 will soon gain an eviction path that trylock a whole lot of locks
for eviction, getting dmesg failures like below:

  BUG: MAX_LOCK_DEPTH too low!
  turning off the locking correctness validator.
  depth: 48  max: 48!
  48 locks held by i915_selftest/5776:
   #0: ffff888101a79240 (&dev->mutex){....}-{3:3}, at: __driver_attach+0x88/0x160
   #1: ffffc900009778c0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: i915_vma_pin.constprop.63+0x39/0x1b0 [i915]
   #2: ffff88800cf74de8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_vma_pin.constprop.63+0x5f/0x1b0 [i915]
   #3: ffff88810c7f9e38 (&vm->mutex/1){+.+.}-{3:3}, at: i915_vma_pin_ww+0x1c4/0x9d0 [i915]
   #4: ffff88810bad5768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915]
   #5: ffff88810bad60e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915]
  ...
   #46: ffff88811964d768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915]
   #47: ffff88811964e0e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: i915_gem_evict_something+0x110/0x860 [i915]
  INFO: lockdep is turned off.

Fixing eviction to nest into ww_class_acquire is a high priority, but
it requires a rework of the entire driver, which can only be done one
step at a time.

As an intermediate solution, add an acquire context to
ww_mutex_trylock, which allows us to do proper nesting annotations on
the trylocks, making the above lockdep splat disappear.

This is also useful in regulator_lock_nested, which may avoid dropping
regulator_nesting_mutex in the uncontended path, so use it there.

TTM may be another user for this, where we could lock a buffer in a
fastpath with list locks held, without dropping all locks we hold.

[peterz: rework actual ww_mutex_trylock() implementations]
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/YUBGPdDDjKlxAuXJ@hirez.programming.kicks-ass.net
---
 drivers/gpu/drm/drm_modeset_lock.c |  2 +-
 drivers/regulator/core.c           |  2 +-
 include/linux/dma-resv.h           |  2 +-
 include/linux/ww_mutex.h           | 15 +------
 kernel/locking/mutex.c             | 41 ++++++++++++++++++
 kernel/locking/test-ww_mutex.c     | 86 ++++++++++++++++++++++++++++----------
 kernel/locking/ww_rt_mutex.c       | 25 +++++++++++
 lib/locking-selftest.c             |  2 +-
 8 files changed, 137 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index fcfe1a03c4a1..bf8a6e823a15 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -248,7 +248,7 @@ static inline int modeset_lock(struct drm_modeset_lock *lock,
 	if (ctx->trylock_only) {
 		lockdep_assert_held(&ctx->ww_ctx);
 
-		if (!ww_mutex_trylock(&lock->mutex))
+		if (!ww_mutex_trylock(&lock->mutex, NULL))
 			return -EBUSY;
 		else
 			return 0;
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index ca6caba8a191..f4d441b1a8bf 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -145,7 +145,7 @@ static inline int regulator_lock_nested(struct regulator_dev *rdev,
 
 	mutex_lock(&regulator_nesting_mutex);
 
-	if (ww_ctx || !ww_mutex_trylock(&rdev->mutex)) {
+	if (!ww_mutex_trylock(&rdev->mutex, ww_ctx)) {
 		if (rdev->mutex_owner == current)
 			rdev->ref_cnt++;
 		else
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index e1ca2080a1ff..39fefb86780b 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -173,7 +173,7 @@ static inline int dma_resv_lock_slow_interruptible(struct dma_resv *obj,
  */
 static inline bool __must_check dma_resv_trylock(struct dma_resv *obj)
 {
-	return ww_mutex_trylock(&obj->lock);
+	return ww_mutex_trylock(&obj->lock, NULL);
 }
 
 /**
diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h
index 29db736af86d..bb763085479a 100644
--- a/include/linux/ww_mutex.h
+++ b/include/linux/ww_mutex.h
@@ -28,12 +28,10 @@
 #ifndef CONFIG_PREEMPT_RT
 #define WW_MUTEX_BASE			mutex
 #define ww_mutex_base_init(l,n,k)	__mutex_init(l,n,k)
-#define ww_mutex_base_trylock(l)	mutex_trylock(l)
 #define ww_mutex_base_is_locked(b)	mutex_is_locked((b))
 #else
 #define WW_MUTEX_BASE			rt_mutex
 #define ww_mutex_base_init(l,n,k)	__rt_mutex_init(l,n,k)
-#define ww_mutex_base_trylock(l)	rt_mutex_trylock(l)
 #define ww_mutex_base_is_locked(b)	rt_mutex_base_is_locked(&(b)->rtmutex)
 #endif
 
@@ -339,17 +337,8 @@ ww_mutex_lock_slow_interruptible(struct ww_mutex *lock,
 
 extern void ww_mutex_unlock(struct ww_mutex *lock);
 
-/**
- * ww_mutex_trylock - tries to acquire the w/w mutex without acquire context
- * @lock: mutex to lock
- *
- * Trylocks a mutex without acquire context, so no deadlock detection is
- * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise.
- */
-static inline int __must_check ww_mutex_trylock(struct ww_mutex *lock)
-{
-	return ww_mutex_base_trylock(&lock->base);
-}
+extern int __must_check ww_mutex_trylock(struct ww_mutex *lock,
+					 struct ww_acquire_ctx *ctx);
 
 /***
  * ww_mutex_destroy - mark a w/w mutex unusable
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index d456579d0952..2fede72b6af5 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -94,6 +94,9 @@ static inline unsigned long __owner_flags(unsigned long owner)
 	return owner & MUTEX_FLAGS;
 }
 
+/*
+ * Returns: __mutex_owner(lock) on failure or NULL on success.
+ */
 static inline struct task_struct *__mutex_trylock_common(struct mutex *lock, bool handoff)
 {
 	unsigned long owner, curr = (unsigned long)current;
@@ -736,6 +739,44 @@ __ww_mutex_lock(struct mutex *lock, unsigned int state, unsigned int subclass,
 	return __mutex_lock_common(lock, state, subclass, NULL, ip, ww_ctx, true);
 }
 
+/**
+ * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire context
+ * @ww: mutex to lock
+ * @ww_ctx: optional w/w acquire context
+ *
+ * Trylocks a mutex with the optional acquire context; no deadlock detection is
+ * possible. Returns 1 if the mutex has been acquired successfully, 0 otherwise.
+ *
+ * Unlike ww_mutex_lock, no deadlock handling is performed. However, if a @ctx is
+ * specified, -EALREADY handling may happen in calls to ww_mutex_trylock.
+ *
+ * A mutex acquired with this function must be released with ww_mutex_unlock.
+ */
+int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
+{
+	if (!ww_ctx)
+		return mutex_trylock(&ww->base);
+
+	MUTEX_WARN_ON(ww->base.magic != &ww->base);
+
+	/*
+	 * Reset the wounded flag after a kill. No other process can
+	 * race and wound us here, since they can't have a valid owner
+	 * pointer if we don't have any locks held.
+	 */
+	if (ww_ctx->acquired == 0)
+		ww_ctx->wounded = 0;
+
+	if (__mutex_trylock(&ww->base)) {
+		ww_mutex_set_context_fastpath(ww, ww_ctx);
+		mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, _RET_IP_);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ww_mutex_trylock);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void __sched
 mutex_lock_nested(struct mutex *lock, unsigned int subclass)
diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c
index 3e82f449b4ff..d63ac411f367 100644
--- a/kernel/locking/test-ww_mutex.c
+++ b/kernel/locking/test-ww_mutex.c
@@ -16,6 +16,15 @@
 static DEFINE_WD_CLASS(ww_class);
 struct workqueue_struct *wq;
 
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+#define ww_acquire_init_noinject(a, b) do { \
+		ww_acquire_init((a), (b)); \
+		(a)->deadlock_inject_countdown = ~0U; \
+	} while (0)
+#else
+#define ww_acquire_init_noinject(a, b) ww_acquire_init((a), (b))
+#endif
+
 struct test_mutex {
 	struct work_struct work;
 	struct ww_mutex mutex;
@@ -36,7 +45,7 @@ static void test_mutex_work(struct work_struct *work)
 	wait_for_completion(&mtx->go);
 
 	if (mtx->flags & TEST_MTX_TRY) {
-		while (!ww_mutex_trylock(&mtx->mutex))
+		while (!ww_mutex_trylock(&mtx->mutex, NULL))
 			cond_resched();
 	} else {
 		ww_mutex_lock(&mtx->mutex, NULL);
@@ -109,19 +118,38 @@ static int test_mutex(void)
 	return 0;
 }
 
-static int test_aa(void)
+static int test_aa(bool trylock)
 {
 	struct ww_mutex mutex;
 	struct ww_acquire_ctx ctx;
 	int ret;
+	const char *from = trylock ? "trylock" : "lock";
 
 	ww_mutex_init(&mutex, &ww_class);
 	ww_acquire_init(&ctx, &ww_class);
 
-	ww_mutex_lock(&mutex, &ctx);
+	if (!trylock) {
+		ret = ww_mutex_lock(&mutex, &ctx);
+		if (ret) {
+			pr_err("%s: initial lock failed!\n", __func__);
+			goto out;
+		}
+	} else {
+		if (!ww_mutex_trylock(&mutex, &ctx)) {
+			pr_err("%s: initial trylock failed!\n", __func__);
+			goto out;
+		}
+	}
 
-	if (ww_mutex_trylock(&mutex))  {
-		pr_err("%s: trylocked itself!\n", __func__);
+	if (ww_mutex_trylock(&mutex, NULL))  {
+		pr_err("%s: trylocked itself without context from %s!\n", __func__, from);
+		ww_mutex_unlock(&mutex);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (ww_mutex_trylock(&mutex, &ctx))  {
+		pr_err("%s: trylocked itself with context from %s!\n", __func__, from);
 		ww_mutex_unlock(&mutex);
 		ret = -EINVAL;
 		goto out;
@@ -129,17 +157,17 @@ static int test_aa(void)
 
 	ret = ww_mutex_lock(&mutex, &ctx);
 	if (ret != -EALREADY) {
-		pr_err("%s: missed deadlock for recursing, ret=%d\n",
-		       __func__, ret);
+		pr_err("%s: missed deadlock for recursing, ret=%d from %s\n",
+		       __func__, ret, from);
 		if (!ret)
 			ww_mutex_unlock(&mutex);
 		ret = -EINVAL;
 		goto out;
 	}
 
+	ww_mutex_unlock(&mutex);
 	ret = 0;
 out:
-	ww_mutex_unlock(&mutex);
 	ww_acquire_fini(&ctx);
 	return ret;
 }
@@ -150,7 +178,7 @@ struct test_abba {
 	struct ww_mutex b_mutex;
 	struct completion a_ready;
 	struct completion b_ready;
-	bool resolve;
+	bool resolve, trylock;
 	int result;
 };
 
@@ -160,8 +188,13 @@ static void test_abba_work(struct work_struct *work)
 	struct ww_acquire_ctx ctx;
 	int err;
 
-	ww_acquire_init(&ctx, &ww_class);
-	ww_mutex_lock(&abba->b_mutex, &ctx);
+	ww_acquire_init_noinject(&ctx, &ww_class);
+	if (!abba->trylock)
+		ww_mutex_lock(&abba->b_mutex, &ctx);
+	else
+		WARN_ON(!ww_mutex_trylock(&abba->b_mutex, &ctx));
+
+	WARN_ON(READ_ONCE(abba->b_mutex.ctx) != &ctx);
 
 	complete(&abba->b_ready);
 	wait_for_completion(&abba->a_ready);
@@ -181,7 +214,7 @@ static void test_abba_work(struct work_struct *work)
 	abba->result = err;
 }
 
-static int test_abba(bool resolve)
+static int test_abba(bool trylock, bool resolve)
 {
 	struct test_abba abba;
 	struct ww_acquire_ctx ctx;
@@ -192,12 +225,18 @@ static int test_abba(bool resolve)
 	INIT_WORK_ONSTACK(&abba.work, test_abba_work);
 	init_completion(&abba.a_ready);
 	init_completion(&abba.b_ready);
+	abba.trylock = trylock;
 	abba.resolve = resolve;
 
 	schedule_work(&abba.work);
 
-	ww_acquire_init(&ctx, &ww_class);
-	ww_mutex_lock(&abba.a_mutex, &ctx);
+	ww_acquire_init_noinject(&ctx, &ww_class);
+	if (!trylock)
+		ww_mutex_lock(&abba.a_mutex, &ctx);
+	else
+		WARN_ON(!ww_mutex_trylock(&abba.a_mutex, &ctx));
+
+	WARN_ON(READ_ONCE(abba.a_mutex.ctx) != &ctx);
 
 	complete(&abba.a_ready);
 	wait_for_completion(&abba.b_ready);
@@ -249,7 +288,7 @@ static void test_cycle_work(struct work_struct *work)
 	struct ww_acquire_ctx ctx;
 	int err, erra = 0;
 
-	ww_acquire_init(&ctx, &ww_class);
+	ww_acquire_init_noinject(&ctx, &ww_class);
 	ww_mutex_lock(&cycle->a_mutex, &ctx);
 
 	complete(cycle->a_signal);
@@ -581,7 +620,9 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
 static int __init test_ww_mutex_init(void)
 {
 	int ncpus = num_online_cpus();
-	int ret;
+	int ret, i;
+
+	printk(KERN_INFO "Beginning ww mutex selftests\n");
 
 	wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
 	if (!wq)
@@ -591,17 +632,19 @@ static int __init test_ww_mutex_init(void)
 	if (ret)
 		return ret;
 
-	ret = test_aa();
+	ret = test_aa(false);
 	if (ret)
 		return ret;
 
-	ret = test_abba(false);
+	ret = test_aa(true);
 	if (ret)
 		return ret;
 
-	ret = test_abba(true);
-	if (ret)
-		return ret;
+	for (i = 0; i < 4; i++) {
+		ret = test_abba(i & 1, i & 2);
+		if (ret)
+			return ret;
+	}
 
 	ret = test_cycle(ncpus);
 	if (ret)
@@ -619,6 +662,7 @@ static int __init test_ww_mutex_init(void)
 	if (ret)
 		return ret;
 
+	printk(KERN_INFO "All ww mutex selftests passed\n");
 	return 0;
 }
 
diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c
index 3f1fff7d2780..0e00205cf467 100644
--- a/kernel/locking/ww_rt_mutex.c
+++ b/kernel/locking/ww_rt_mutex.c
@@ -9,6 +9,31 @@
 #define WW_RT
 #include "rtmutex.c"
 
+int ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx)
+{
+	struct rt_mutex *rtm = &lock->base;
+
+	if (!ww_ctx)
+		return rt_mutex_trylock(rtm);
+
+	/*
+	 * Reset the wounded flag after a kill. No other process can
+	 * race and wound us here, since they can't have a valid owner
+	 * pointer if we don't have any locks held.
+	 */
+	if (ww_ctx->acquired == 0)
+		ww_ctx->wounded = 0;
+
+	if (__rt_mutex_trylock(&rtm->rtmutex)) {
+		ww_mutex_set_context_fastpath(lock, ww_ctx);
+		mutex_acquire_nest(&rtm->dep_map, 0, 1, ww_ctx->dep_map, _RET_IP_);
+		return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(ww_mutex_trylock);
+
 static int __sched
 __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx,
 		   unsigned int state, unsigned long ip)
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 161108e5d2fe..71652e1c397c 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -258,7 +258,7 @@ static void init_shared_classes(void)
 #define WWAF(x)			ww_acquire_fini(x)
 
 #define WWL(x, c)		ww_mutex_lock(x, c)
-#define WWT(x)			ww_mutex_trylock(x)
+#define WWT(x)			ww_mutex_trylock(x, NULL)
 #define WWL1(x)			ww_mutex_lock(x, NULL)
 #define WWU(x)			ww_mutex_unlock(x)
 
-- 
cgit v1.2.3


From a2e05ddda11b0bd529f443df9089ab498b2c2642 Mon Sep 17 00:00:00 2001
From: Zhouyi Zhou <zhouzhouyi@gmail.com>
Date: Wed, 11 Aug 2021 10:59:20 +0800
Subject: lockdep: Improve comments in wait-type checks

Comments in wait-type checks be improved by mentioning the
PREEPT_RT kernel configure option.

Signed-off-by: Zhouyi Zhou <zhouzhouyi@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lkml.kernel.org/r/20210811025920.20751-1-zhouzhouyi@gmail.com
---
 include/linux/lockdep_types.h | 2 +-
 kernel/locking/lockdep.c      | 2 +-
 kernel/rcu/update.c           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h
index 3e726ace5c62..d22430840b53 100644
--- a/include/linux/lockdep_types.h
+++ b/include/linux/lockdep_types.h
@@ -21,7 +21,7 @@ enum lockdep_wait_type {
 	LD_WAIT_SPIN,		/* spin loops, raw_spinlock_t etc.. */
 
 #ifdef CONFIG_PROVE_RAW_LOCK_NESTING
-	LD_WAIT_CONFIG,		/* CONFIG_PREEMPT_LOCK, spinlock_t etc.. */
+	LD_WAIT_CONFIG,		/* preemptible in PREEMPT_RT, spinlock_t etc.. */
 #else
 	LD_WAIT_CONFIG = LD_WAIT_SPIN,
 #endif
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index bfa0a347f27c..4e6312977ffb 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4671,7 +4671,7 @@ print_lock_invalid_wait_context(struct task_struct *curr,
 /*
  * Verify the wait_type context.
  *
- * This check validates we takes locks in the right wait-type order; that is it
+ * This check validates we take locks in the right wait-type order; that is it
  * ensures that we do not take mutexes inside spinlocks and do not attempt to
  * acquire spinlocks inside raw_spinlocks and the sort.
  *
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index c21b38cc25e9..690b0cec7459 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -247,7 +247,7 @@ struct lockdep_map rcu_lock_map = {
 	.name = "rcu_read_lock",
 	.key = &rcu_lock_key,
 	.wait_type_outer = LD_WAIT_FREE,
-	.wait_type_inner = LD_WAIT_CONFIG, /* XXX PREEMPT_RCU ? */
+	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT implies PREEMPT_RCU */
 };
 EXPORT_SYMBOL_GPL(rcu_lock_map);
 
@@ -256,7 +256,7 @@ struct lockdep_map rcu_bh_lock_map = {
 	.name = "rcu_read_lock_bh",
 	.key = &rcu_bh_lock_key,
 	.wait_type_outer = LD_WAIT_FREE,
-	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_LOCK also makes BH preemptible */
+	.wait_type_inner = LD_WAIT_CONFIG, /* PREEMPT_RT makes BH preemptible. */
 };
 EXPORT_SYMBOL_GPL(rcu_bh_lock_map);
 
-- 
cgit v1.2.3


From f7427ba5ce9c5438ad392b6cbcc4ca8a0487d7e7 Mon Sep 17 00:00:00 2001
From: Shaokun Zhang <zhangshaokun@hisilicon.com>
Date: Wed, 25 Aug 2021 15:07:04 +0800
Subject: locking/lockdep: Cleanup the repeated declaration

'struct task_struct' has been decleared twice, so keep the top one and
cleanup the repeated one.

Signed-off-by: Shaokun Zhang <zhangshaokun@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/1629875224-32751-1-git-send-email-zhangshaokun@hisilicon.com
---
 include/linux/debug_locks.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 3f49e65169c6..dbb409d77d4f 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -47,8 +47,6 @@ extern int debug_locks_off(void);
 # define locking_selftest()	do { } while (0)
 #endif
 
-struct task_struct;
-
 #ifdef CONFIG_LOCKDEP
 extern void debug_show_all_locks(void);
 extern void debug_show_held_locks(struct task_struct *task);
-- 
cgit v1.2.3


From f68d08c437f98ee19a14142b9de2d7afe2032d5c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Sep 2021 11:15:50 -0700
Subject: net: phy: bcm7xxx: Add EPHY entry for 72165

72165 is a 16nm process SoC with a 10/100 integrated Ethernet PHY,
create a new macro and set of functions for this different process type.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20210917181551.2836036-1-f.fainelli@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/bcm7xxx.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h   |   1 +
 2 files changed, 202 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index e79297a4bae8..3a29a1493ff1 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -398,6 +398,190 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev)
 	return bcm7xxx_28nm_ephy_apd_enable(phydev);
 }
 
+static int bcm7xxx_16nm_ephy_afe_config(struct phy_device *phydev)
+{
+	int tmp, rcalcode, rcalnewcodelp, rcalnewcode11, rcalnewcode11d2;
+
+	/* Reset PHY */
+	tmp = genphy_soft_reset(phydev);
+	if (tmp)
+		return tmp;
+
+	/* Reset AFE and PLL */
+	bcm_phy_write_exp_sel(phydev, 0x0003, 0x0006);
+	/* Clear reset */
+	bcm_phy_write_exp_sel(phydev, 0x0003, 0x0000);
+
+	/* Write PLL/AFE control register to select 54MHz crystal */
+	bcm_phy_write_misc(phydev, 0x0030, 0x0001, 0x0000);
+	bcm_phy_write_misc(phydev, 0x0031, 0x0000, 0x044a);
+
+	/* Change Ka,Kp,Ki to pdiv=1 */
+	bcm_phy_write_misc(phydev, 0x0033, 0x0002, 0x71a1);
+	/* Configuration override */
+	bcm_phy_write_misc(phydev, 0x0033, 0x0001, 0x8000);
+
+	/* Change PLL_NDIV and PLL_NUDGE */
+	bcm_phy_write_misc(phydev, 0x0031, 0x0001, 0x2f68);
+	bcm_phy_write_misc(phydev, 0x0031, 0x0002, 0x0000);
+
+	/* Reference frequency is 54Mhz, config_mode[15:14] = 3 (low
+	 * phase)
+	 */
+	bcm_phy_write_misc(phydev, 0x0030, 0x0003, 0xc036);
+
+	/* Initialize bypass mode */
+	bcm_phy_write_misc(phydev, 0x0032, 0x0003, 0x0000);
+	/* Bypass code, default: VCOCLK enabled */
+	bcm_phy_write_misc(phydev, 0x0033, 0x0000, 0x0002);
+	/* LDOs at default setting */
+	bcm_phy_write_misc(phydev, 0x0030, 0x0002, 0x01c0);
+	/* Release PLL reset */
+	bcm_phy_write_misc(phydev, 0x0030, 0x0001, 0x0001);
+
+	/* Bandgap curvature correction to correct default */
+	bcm_phy_write_misc(phydev, 0x0038, 0x0000, 0x0010);
+
+	/* Run RCAL */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0003, 0x0038);
+	bcm_phy_write_misc(phydev, 0x0039, 0x0003, 0x003b);
+	udelay(2);
+	bcm_phy_write_misc(phydev, 0x0039, 0x0003, 0x003f);
+	mdelay(5);
+
+	/* AFE_CAL_CONFIG_0, Vref=1000, Target=10, averaging enabled */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0001, 0x1c82);
+	/* AFE_CAL_CONFIG_0, no reset and analog powerup */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0001, 0x9e82);
+	udelay(2);
+	/* AFE_CAL_CONFIG_0, start calibration */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0001, 0x9f82);
+	udelay(100);
+	/* AFE_CAL_CONFIG_0, clear start calibration, set HiBW */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0001, 0x9e86);
+	udelay(2);
+	/* AFE_CAL_CONFIG_0, start calibration with hi BW mode set */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0001, 0x9f86);
+	udelay(100);
+
+	/* Adjust 10BT amplitude additional +7% and 100BT +2% */
+	bcm_phy_write_misc(phydev, 0x0038, 0x0001, 0xe7ea);
+	/* Adjust 1G mode amplitude and 1G testmode1 */
+	bcm_phy_write_misc(phydev, 0x0038, 0x0002, 0xede0);
+
+	/* Read CORE_EXPA9 */
+	tmp = bcm_phy_read_exp(phydev, 0x00a9);
+	/* CORE_EXPA9[6:1] is rcalcode[5:0] */
+	rcalcode = (tmp & 0x7e) / 2;
+	/* Correct RCAL code + 1 is -1% rprogr, LP: +16 */
+	rcalnewcodelp = rcalcode + 16;
+	/* Correct RCAL code + 1 is -15 rprogr, 11: +10 */
+	rcalnewcode11 = rcalcode + 10;
+	/* Saturate if necessary */
+	if (rcalnewcodelp > 0x3f)
+		rcalnewcodelp = 0x3f;
+	if (rcalnewcode11 > 0x3f)
+		rcalnewcode11 = 0x3f;
+	/* REXT=1 BYP=1 RCAL_st1<5:0>=new rcal code */
+	tmp = 0x00f8 + rcalnewcodelp * 256;
+	/* Program into AFE_CAL_CONFIG_2 */
+	bcm_phy_write_misc(phydev, 0x0039, 0x0003, tmp);
+	/* AFE_BIAS_CONFIG_0 10BT bias code (Bias: E4) */
+	bcm_phy_write_misc(phydev, 0x0038, 0x0001, 0xe7e4);
+	/* invert adc clock output and 'adc refp ldo current To correct
+	 * default
+	 */
+	bcm_phy_write_misc(phydev, 0x003b, 0x0000, 0x8002);
+	/* 100BT stair case, high BW, 1G stair case, alternate encode */
+	bcm_phy_write_misc(phydev, 0x003c, 0x0003, 0xf882);
+	/* 1000BT DAC transition method per Erol, bits[32], DAC Shuffle
+	 * sequence 1 + 10BT imp adjust bits
+	 */
+	bcm_phy_write_misc(phydev, 0x003d, 0x0000, 0x3201);
+	/* Non-overlap fix */
+	bcm_phy_write_misc(phydev, 0x003a, 0x0002, 0x0c00);
+
+	/* pwdb override (rxconfig<5>) to turn on RX LDO indpendent of
+	 * pwdb controls from DSP_TAP10
+	 */
+	bcm_phy_write_misc(phydev, 0x003a, 0x0001, 0x0020);
+
+	/* Remove references to channel 2 and 3 */
+	bcm_phy_write_misc(phydev, 0x003b, 0x0002, 0x0000);
+	bcm_phy_write_misc(phydev, 0x003b, 0x0003, 0x0000);
+
+	/* Set cal_bypassb bit rxconfig<43> */
+	bcm_phy_write_misc(phydev, 0x003a, 0x0003, 0x0800);
+	udelay(2);
+
+	/* Revert pwdb_override (rxconfig<5>) to 0 so that the RX pwr
+	 * is controlled by DSP.
+	 */
+	bcm_phy_write_misc(phydev, 0x003a, 0x0001, 0x0000);
+
+	/* Drop LSB */
+	rcalnewcode11d2 = (rcalnewcode11 & 0xfffe) / 2;
+	tmp = bcm_phy_read_misc(phydev, 0x003d, 0x0001);
+	/* Clear bits [11:5] */
+	tmp &= ~0xfe0;
+	/* set txcfg_ch0<5>=1 (enable + set local rcal) */
+	tmp |= 0x0020 | (rcalnewcode11d2 * 64);
+	bcm_phy_write_misc(phydev, 0x003d, 0x0001, tmp);
+	bcm_phy_write_misc(phydev, 0x003d, 0x0002, tmp);
+
+	tmp = bcm_phy_read_misc(phydev, 0x003d, 0x0000);
+	/* set txcfg<45:44>=11 (enable Rextra + invert fullscaledetect)
+	 */
+	tmp &= ~0x3000;
+	tmp |= 0x3000;
+	bcm_phy_write_misc(phydev, 0x003d, 0x0000, tmp);
+
+	return 0;
+}
+
+static int bcm7xxx_16nm_ephy_config_init(struct phy_device *phydev)
+{
+	int ret, val;
+
+	ret = bcm7xxx_16nm_ephy_afe_config(phydev);
+	if (ret)
+		return ret;
+
+	ret = bcm_phy_set_eee(phydev, true);
+	if (ret)
+		return ret;
+
+	ret = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR3);
+	if (ret < 0)
+		return ret;
+
+	val = ret;
+
+	/* Auto power down of DLL enabled,
+	 * TXC/RXC disabled during auto power down.
+	 */
+	val &= ~BCM54XX_SHD_SCR3_DLLAPD_DIS;
+	val |= BIT(8);
+
+	ret = bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR3, val);
+	if (ret < 0)
+		return ret;
+
+	return bcm_phy_enable_apd(phydev, true);
+}
+
+static int bcm7xxx_16nm_ephy_resume(struct phy_device *phydev)
+{
+	int ret;
+
+	/* Re-apply workarounds coming out suspend/resume */
+	ret = bcm7xxx_16nm_ephy_config_init(phydev);
+	if (ret)
+		return ret;
+
+	return genphy_config_aneg(phydev);
+}
+
 static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev)
 {
 	int ret;
@@ -610,9 +794,25 @@ static void bcm7xxx_28nm_remove(struct phy_device *phydev)
 	.resume         = bcm7xxx_config_init,				\
 }
 
+#define BCM7XXX_16NM_EPHY(_oui, _name)					\
+{									\
+	.phy_id		= (_oui),					\
+	.phy_id_mask	= 0xfffffff0,					\
+	.name		= _name,					\
+	/* PHY_BASIC_FEATURES */					\
+	.flags		= PHY_IS_INTERNAL,				\
+	.probe		= bcm7xxx_28nm_probe,				\
+	.remove		= bcm7xxx_28nm_remove,				\
+	.config_init	= bcm7xxx_16nm_ephy_config_init,		\
+	.config_aneg	= genphy_config_aneg,				\
+	.read_status	= genphy_read_status,				\
+	.resume		= bcm7xxx_16nm_ephy_resume,			\
+}
+
 static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_EPHY(PHY_ID_BCM72113, "Broadcom BCM72113"),
 	BCM7XXX_28NM_EPHY(PHY_ID_BCM72116, "Broadcom BCM72116"),
+	BCM7XXX_16NM_EPHY(PHY_ID_BCM72165, "Broadcom BCM72165"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
 	BCM7XXX_28NM_EPHY(PHY_ID_BCM7255, "Broadcom BCM7255"),
 	BCM7XXX_28NM_EPHY(PHY_ID_BCM7260, "Broadcom BCM7260"),
@@ -635,6 +835,7 @@ static struct phy_driver bcm7xxx_driver[] = {
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM72113, 0xfffffff0 },
 	{ PHY_ID_BCM72116, 0xfffffff0, },
+	{ PHY_ID_BCM72165, 0xfffffff0, },
 	{ PHY_ID_BCM7250, 0xfffffff0, },
 	{ PHY_ID_BCM7255, 0xfffffff0, },
 	{ PHY_ID_BCM7260, 0xfffffff0, },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 3308cebe1c19..07e1dfadbbdf 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -32,6 +32,7 @@
 
 #define PHY_ID_BCM72113			0x35905310
 #define PHY_ID_BCM72116			0x35905350
+#define PHY_ID_BCM72165			0x35905340
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7255			0xae025120
 #define PHY_ID_BCM7260			0xae025190
-- 
cgit v1.2.3


From 335ff4990cf3bfa42d8846f9b3d8c09456f51801 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 17 Sep 2021 11:29:03 -0700
Subject: bpf: Merge printk and seq_printf VARARG max macros

MAX_SNPRINTF_VARARGS and MAX_SEQ_PRINTF_VARARGS are used by bpf helpers
bpf_snprintf and bpf_seq_printf to limit their varargs. Both call into
bpf_bprintf_prepare for print formatting logic and have convenience
macros in libbpf (BPF_SNPRINTF, BPF_SEQ_PRINTF) which use the same
helper macros to convert varargs to a byte array.

Changing shared functionality to support more varargs for either bpf
helper would affect the other as well, so let's combine the _VARARGS
macros to make this more obvious.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210917182911.2426606-2-davemarchevsky@fb.com
---
 include/linux/bpf.h      | 2 ++
 kernel/bpf/helpers.c     | 4 +---
 kernel/trace/bpf_trace.c | 4 +---
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f4c16f19f83e..be8d57e6e78a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2216,6 +2216,8 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
 struct btf_id_set;
 bool btf_id_set_contains(const struct btf_id_set *set, u32 id);
 
+#define MAX_BPRINTF_VARARGS		12
+
 int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 			u32 **bin_buf, u32 num_args);
 void bpf_bprintf_cleanup(void);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9aabf84afd4b..8f9f392c1322 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -979,15 +979,13 @@ out:
 	return err;
 }
 
-#define MAX_SNPRINTF_VARARGS		12
-
 BPF_CALL_5(bpf_snprintf, char *, str, u32, str_size, char *, fmt,
 	   const void *, data, u32, data_len)
 {
 	int err, num_args;
 	u32 *bin_args;
 
-	if (data_len % 8 || data_len > MAX_SNPRINTF_VARARGS * 8 ||
+	if (data_len % 8 || data_len > MAX_BPRINTF_VARARGS * 8 ||
 	    (data_len && !data))
 		return -EINVAL;
 	num_args = data_len / 8;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 067e88c3d2ee..4ec779fa0c1d 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -414,15 +414,13 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	return &bpf_trace_printk_proto;
 }
 
-#define MAX_SEQ_PRINTF_VARARGS		12
-
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
 	int err, num_args;
 	u32 *bin_args;
 
-	if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
 	    (data_len && !data))
 		return -EINVAL;
 	num_args = data_len / 8;
-- 
cgit v1.2.3


From 10aceb629e198429c849d5e995c3bb1ba7a9aaa3 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Fri, 17 Sep 2021 11:29:05 -0700
Subject: bpf: Add bpf_trace_vprintk helper

This helper is meant to be "bpf_trace_printk, but with proper vararg
support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
array. Write to /sys/kernel/debug/tracing/trace_pipe using the same
mechanism as bpf_trace_printk. The functionality of this helper was
requested in the libbpf issue tracker [0].

[0] Closes: https://github.com/libbpf/libbpf/issues/315

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210917182911.2426606-4-davemarchevsky@fb.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 +++++++++
 kernel/bpf/core.c              |  5 ++++
 kernel/bpf/helpers.c           |  2 ++
 kernel/trace/bpf_trace.c       | 52 +++++++++++++++++++++++++++++++++++++++++-
 tools/include/uapi/linux/bpf.h | 11 +++++++++
 6 files changed, 81 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be8d57e6e78a..b6c45a6cbbba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3e9785f1064a..98ca79a67937 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4898,6 +4898,16 @@ union bpf_attr {
  *		**-EINVAL** if *flags* is not zero.
  *
  *		**-ENOENT** if architecture does not support branch records.
+ *
+ * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *	Description
+ *		Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *		to format and can handle more format args as a result.
+ *
+ *		Arguments are to be used as in **bpf_seq_printf**\ () helper.
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5077,6 +5087,7 @@ union bpf_attr {
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
+	FN(trace_vprintk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 9f4636d021b1..6fddc13fe67f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2357,6 +2357,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 	return NULL;
 }
 
+const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
+{
+	return NULL;
+}
+
 u64 __weak
 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 8f9f392c1322..2c604ff8c7fb 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1435,6 +1435,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_snprintf_proto;
 	case BPF_FUNC_task_pt_regs:
 		return &bpf_task_pt_regs_proto;
+	case BPF_FUNC_trace_vprintk:
+		return bpf_get_trace_vprintk_proto();
 	default:
 		return NULL;
 	}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4ec779fa0c1d..6b3153841a33 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+static void __set_printk_clr_event(void)
 {
 	/*
 	 * This program might be calling bpf_trace_printk,
@@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	 */
 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
+}
 
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+	__set_printk_clr_event();
 	return &bpf_trace_printk_proto;
 }
 
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+	   u32, data_len)
+{
+	static char buf[BPF_TRACE_PRINTK_SIZE];
+	unsigned long flags;
+	int ret, num_args;
+	u32 *bin_args;
+
+	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
+	    (data_len && !data))
+		return -EINVAL;
+	num_args = data_len / 8;
+
+	ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+	if (ret < 0)
+		return ret;
+
+	raw_spin_lock_irqsave(&trace_printk_lock, flags);
+	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+
+	trace_bpf_trace_printk(buf);
+	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+	bpf_bprintf_cleanup();
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_trace_vprintk_proto = {
+	.func		= bpf_trace_vprintk,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
+	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
+{
+	__set_printk_clr_event();
+	return &bpf_trace_vprintk_proto;
+}
+
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
@@ -1160,6 +1208,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_func_ip_proto_tracing;
 	case BPF_FUNC_get_branch_snapshot:
 		return &bpf_get_branch_snapshot_proto;
+	case BPF_FUNC_trace_vprintk:
+		return bpf_get_trace_vprintk_proto();
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3e9785f1064a..98ca79a67937 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4898,6 +4898,16 @@ union bpf_attr {
  *		**-EINVAL** if *flags* is not zero.
  *
  *		**-ENOENT** if architecture does not support branch records.
+ *
+ * long bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *	Description
+ *		Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *		to format and can handle more format args as a result.
+ *
+ *		Arguments are to be used as in **bpf_seq_printf**\ () helper.
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5077,6 +5087,7 @@ union bpf_attr {
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
+	FN(trace_vprintk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 55c42fa7fa331f98062c32799456420930b8bf8c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 17 Sep 2021 16:33:19 -0700
Subject: mptcp: add MPTCP_INFO getsockopt

Its not compatible with multipath-tcp.org kernel one.

1. The out-of-tree implementation defines a different 'struct mptcp_info',
   with embedded __user addresses for additional data such as
   endpoint addresses.

2. Mat Martineau points out that embedded __user addresses doesn't work
with BPF_CGROUP_RUN_PROG_GETSOCKOPT() which assumes that copying in
optsize bytes from optval provides all data that got copied to userspace.

This provides mptcp_info data for the given mptcp socket.

Userspace sets optlen to the size of the structure it expects.
The kernel updates it to contain the number of bytes that it copied.

This allows to append more information to the structure later.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h     |  1 +
 include/uapi/linux/mptcp.h |  3 +++
 net/mptcp/sockopt.c        | 40 +++++++++++++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 041d6032a348..7612d760b6a9 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -364,6 +364,7 @@ struct ucred {
 #define SOL_KCM		281
 #define SOL_TLS		282
 #define SOL_XDP		283
+#define SOL_MPTCP	284
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h
index f66038b9551f..3e9caeddda7e 100644
--- a/include/uapi/linux/mptcp.h
+++ b/include/uapi/linux/mptcp.h
@@ -193,4 +193,7 @@ enum mptcp_event_attr {
 #define MPTCP_RST_EBADPERF	5
 #define MPTCP_RST_EMIDDLEBOX	6
 
+/* MPTCP socket options */
+#define MPTCP_INFO 1
+
 #endif /* _UAPI_MPTCP_H */
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 54f0d521a399..f7683c22911f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -673,10 +673,14 @@ out:
 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 {
 	struct sock *sk = &msk->sk.icsk_inet.sk;
-	bool slow = lock_sock_fast(sk);
 	u32 flags = 0;
+	bool slow;
 	u8 val;
 
+	memset(info, 0, sizeof(*info));
+
+	slow = lock_sock_fast(sk);
+
 	info->mptcpi_subflows = READ_ONCE(msk->pm.subflows);
 	info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled);
 	info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
@@ -702,6 +706,27 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
 }
 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
 
+static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen)
+{
+	struct mptcp_info m_info;
+	int len;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(struct mptcp_info));
+
+	mptcp_diag_fill_info(msk, &m_info);
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+
+	if (copy_to_user(optval, &m_info, len))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 				    char __user *optval, int __user *optlen)
 {
@@ -716,6 +741,17 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
 	return -EOPNOTSUPP;
 }
 
+static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
+				      char __user *optval, int __user *optlen)
+{
+	switch (optname) {
+	case MPTCP_INFO:
+		return mptcp_getsockopt_info(msk, optval, optlen);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 int mptcp_getsockopt(struct sock *sk, int level, int optname,
 		     char __user *optval, int __user *option)
 {
@@ -738,6 +774,8 @@ int mptcp_getsockopt(struct sock *sk, int level, int optname,
 
 	if (level == SOL_TCP)
 		return mptcp_getsockopt_sol_tcp(msk, optname, optval, option);
+	if (level == SOL_MPTCP)
+		return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option);
 	return -EOPNOTSUPP;
 }
 
-- 
cgit v1.2.3


From cf9579976f724ad517cc15b7caadea728c7e245c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Sep 2021 16:34:32 +0300
Subject: net: mdio: introduce a shutdown method to mdio device drivers

MDIO-attached devices might have interrupts and other things that might
need quiesced when we kexec into a new kernel. Things are even more
creepy when those interrupt lines are shared, and in that case it is
absolutely mandatory to disable all interrupt sources.

Moreover, MDIO devices might be DSA switches, and DSA needs its own
shutdown method to unlink from the DSA master, which is a new
requirement that appeared after commit 2f1e8ea726e9 ("net: dsa: link
interfaces with the DSA master to get rid of lockdep warnings").

So introduce a ->shutdown method in the MDIO device driver structure.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_device.c | 11 +++++++++++
 include/linux/mdio.h          |  3 +++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c
index c94cb5382dc9..250742ffdfd9 100644
--- a/drivers/net/phy/mdio_device.c
+++ b/drivers/net/phy/mdio_device.c
@@ -179,6 +179,16 @@ static int mdio_remove(struct device *dev)
 	return 0;
 }
 
+static void mdio_shutdown(struct device *dev)
+{
+	struct mdio_device *mdiodev = to_mdio_device(dev);
+	struct device_driver *drv = mdiodev->dev.driver;
+	struct mdio_driver *mdiodrv = to_mdio_driver(drv);
+
+	if (mdiodrv->shutdown)
+		mdiodrv->shutdown(mdiodev);
+}
+
 /**
  * mdio_driver_register - register an mdio_driver with the MDIO layer
  * @drv: new mdio_driver to register
@@ -193,6 +203,7 @@ int mdio_driver_register(struct mdio_driver *drv)
 	mdiodrv->driver.bus = &mdio_bus_type;
 	mdiodrv->driver.probe = mdio_probe;
 	mdiodrv->driver.remove = mdio_remove;
+	mdiodrv->driver.shutdown = mdio_shutdown;
 
 	retval = driver_register(&mdiodrv->driver);
 	if (retval) {
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index ffb787d5ebde..5e6dc38f418e 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -80,6 +80,9 @@ struct mdio_driver {
 
 	/* Clears up any memory if needed */
 	void (*remove)(struct mdio_device *mdiodev);
+
+	/* Quiesces the device on system shutdown, turns off interrupts etc */
+	void (*shutdown)(struct mdio_device *mdiodev);
 };
 
 static inline struct mdio_driver *
-- 
cgit v1.2.3


From 5bd2182d58e9d9c6279b7a8a2f9b41add0e7f9cb Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 16 Feb 2021 19:46:48 -0500
Subject: audit,io_uring,io-wq: add some basic audit support to io_uring

This patch adds basic auditing to io_uring operations, regardless of
their context.  This is accomplished by allocating audit_context
structures for the io-wq worker and io_uring SQPOLL kernel threads
as well as explicitly auditing the io_uring operations in
io_issue_sqe().  Individual io_uring operations can bypass auditing
through the "audit_skip" field in the struct io_op_def definition for
the operation; although great care must be taken so that security
relevant io_uring operations do not bypass auditing; please contact
the audit mailing list (see the MAINTAINERS file) with any questions.

The io_uring operations are audited using a new AUDIT_URINGOP record,
an example is shown below:

  type=UNKNOWN[1336] msg=audit(1631800225.981:37289):
    uring_op=19 success=yes exit=0 items=0 ppid=15454 pid=15681
    uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0
    subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
    key=(null)

Thanks to Richard Guy Briggs for review and feedback.

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/io-wq.c                 |   4 ++
 fs/io_uring.c              |  55 +++++++++++++--
 include/linux/audit.h      |  26 +++++++
 include/uapi/linux/audit.h |   1 +
 kernel/audit.h             |   2 +
 kernel/auditsc.c           | 166 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 248 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 6c55362c1f99..dac5c5961c9d 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -14,6 +14,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/cpu.h>
 #include <linux/tracehook.h>
+#include <linux/audit.h>
 
 #include "io-wq.h"
 
@@ -562,6 +563,8 @@ static int io_wqe_worker(void *data)
 	snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
 	set_task_comm(current, buf);
 
+	audit_alloc_kernel(current);
+
 	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
 		long ret;
 
@@ -601,6 +604,7 @@ loop:
 		io_worker_handle_work(worker);
 	}
 
+	audit_free(current);
 	io_worker_exit(worker);
 	return 0;
 }
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 16fb7436043c..388754b24785 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -79,6 +79,7 @@
 #include <linux/pagemap.h>
 #include <linux/io_uring.h>
 #include <linux/tracehook.h>
+#include <linux/audit.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -917,6 +918,8 @@ struct io_op_def {
 	unsigned		needs_async_setup : 1;
 	/* should block plug */
 	unsigned		plug : 1;
+	/* skip auditing */
+	unsigned		audit_skip : 1;
 	/* size of async data needed, if any */
 	unsigned short		async_size;
 };
@@ -930,6 +933,7 @@ static const struct io_op_def io_op_defs[] = {
 		.buffer_select		= 1,
 		.needs_async_setup	= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITEV] = {
@@ -939,16 +943,19 @@ static const struct io_op_def io_op_defs[] = {
 		.pollout		= 1,
 		.needs_async_setup	= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_FSYNC] = {
 		.needs_file		= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_READ_FIXED] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITE_FIXED] = {
@@ -957,15 +964,20 @@ static const struct io_op_def io_op_defs[] = {
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_POLL_ADD] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
+		.audit_skip		= 1,
+	},
+	[IORING_OP_POLL_REMOVE] = {
+		.audit_skip		= 1,
 	},
-	[IORING_OP_POLL_REMOVE] = {},
 	[IORING_OP_SYNC_FILE_RANGE] = {
 		.needs_file		= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_SENDMSG] = {
 		.needs_file		= 1,
@@ -983,18 +995,23 @@ static const struct io_op_def io_op_defs[] = {
 		.async_size		= sizeof(struct io_async_msghdr),
 	},
 	[IORING_OP_TIMEOUT] = {
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_timeout_data),
 	},
 	[IORING_OP_TIMEOUT_REMOVE] = {
 		/* used by timeout updates' prep() */
+		.audit_skip		= 1,
 	},
 	[IORING_OP_ACCEPT] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 	},
-	[IORING_OP_ASYNC_CANCEL] = {},
+	[IORING_OP_ASYNC_CANCEL] = {
+		.audit_skip		= 1,
+	},
 	[IORING_OP_LINK_TIMEOUT] = {
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_timeout_data),
 	},
 	[IORING_OP_CONNECT] = {
@@ -1009,14 +1026,19 @@ static const struct io_op_def io_op_defs[] = {
 	},
 	[IORING_OP_OPENAT] = {},
 	[IORING_OP_CLOSE] = {},
-	[IORING_OP_FILES_UPDATE] = {},
-	[IORING_OP_STATX] = {},
+	[IORING_OP_FILES_UPDATE] = {
+		.audit_skip		= 1,
+	},
+	[IORING_OP_STATX] = {
+		.audit_skip		= 1,
+	},
 	[IORING_OP_READ] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 		.buffer_select		= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_WRITE] = {
@@ -1025,39 +1047,50 @@ static const struct io_op_def io_op_defs[] = {
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
 		.plug			= 1,
+		.audit_skip		= 1,
 		.async_size		= sizeof(struct io_async_rw),
 	},
 	[IORING_OP_FADVISE] = {
 		.needs_file		= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_MADVISE] = {},
 	[IORING_OP_SEND] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollout		= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_RECV] = {
 		.needs_file		= 1,
 		.unbound_nonreg_file	= 1,
 		.pollin			= 1,
 		.buffer_select		= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_OPENAT2] = {
 	},
 	[IORING_OP_EPOLL_CTL] = {
 		.unbound_nonreg_file	= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_SPLICE] = {
 		.needs_file		= 1,
 		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
+		.audit_skip		= 1,
+	},
+	[IORING_OP_PROVIDE_BUFFERS] = {
+		.audit_skip		= 1,
+	},
+	[IORING_OP_REMOVE_BUFFERS] = {
+		.audit_skip		= 1,
 	},
-	[IORING_OP_PROVIDE_BUFFERS] = {},
-	[IORING_OP_REMOVE_BUFFERS] = {},
 	[IORING_OP_TEE] = {
 		.needs_file		= 1,
 		.hash_reg_file		= 1,
 		.unbound_nonreg_file	= 1,
+		.audit_skip		= 1,
 	},
 	[IORING_OP_SHUTDOWN] = {
 		.needs_file		= 1,
@@ -6591,6 +6624,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 	if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
 		creds = override_creds(req->creds);
 
+	if (!io_op_defs[req->opcode].audit_skip)
+		audit_uring_entry(req->opcode);
+
 	switch (req->opcode) {
 	case IORING_OP_NOP:
 		ret = io_nop(req, issue_flags);
@@ -6706,6 +6742,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 		break;
 	}
 
+	if (!io_op_defs[req->opcode].audit_skip)
+		audit_uring_exit(!ret, ret);
+
 	if (creds)
 		revert_creds(creds);
 	if (ret)
@@ -7360,6 +7399,8 @@ static int io_sq_thread(void *data)
 		set_cpus_allowed_ptr(current, cpu_online_mask);
 	current->flags |= PF_NO_SETAFFINITY;
 
+	audit_alloc_kernel(current);
+
 	mutex_lock(&sqd->lock);
 	while (1) {
 		bool cap_entries, sqt_spin = false;
@@ -7425,6 +7466,8 @@ static int io_sq_thread(void *data)
 	io_run_task_work();
 	mutex_unlock(&sqd->lock);
 
+	audit_free(current);
+
 	complete(&sqd->exited);
 	do_exit(0);
 }
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 82b7c1116a85..d656a06dd909 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -286,7 +286,10 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
 /* These are defined in auditsc.c */
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
+extern int  audit_alloc_kernel(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
+extern void __audit_uring_entry(u8 op);
+extern void __audit_uring_exit(int success, long code);
 extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -323,6 +326,21 @@ static inline void audit_free(struct task_struct *task)
 	if (unlikely(task->audit_context))
 		__audit_free(task);
 }
+static inline void audit_uring_entry(u8 op)
+{
+	/*
+	 * We intentionally check audit_context() before audit_enabled as most
+	 * Linux systems (as of ~2021) rely on systemd which forces audit to
+	 * be enabled regardless of the user's audit configuration.
+	 */
+	if (unlikely(audit_context() && audit_enabled))
+		__audit_uring_entry(op);
+}
+static inline void audit_uring_exit(int success, long code)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_uring_exit(success, code);
+}
 static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
@@ -554,8 +572,16 @@ static inline int audit_alloc(struct task_struct *task)
 {
 	return 0;
 }
+static inline int audit_alloc_kernel(struct task_struct *task)
+{
+	return 0;
+}
 static inline void audit_free(struct task_struct *task)
 { }
+static inline void audit_uring_entry(u8 op)
+{ }
+static inline void audit_uring_exit(int success, long code)
+{ }
 static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index daa481729e9b..a1997697c8b1 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -118,6 +118,7 @@
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
 #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
+#define AUDIT_URINGOP		1336	/* io_uring operation */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/kernel/audit.h b/kernel/audit.h
index 13abc48de0bd..d1161e3b83e2 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -103,10 +103,12 @@ struct audit_context {
 	enum {
 		AUDIT_CTX_UNUSED,	/* audit_context is currently unused */
 		AUDIT_CTX_SYSCALL,	/* in use by syscall */
+		AUDIT_CTX_URING,	/* in use by io_uring */
 	} context;
 	enum audit_state    state, current_state;
 	unsigned int	    serial;     /* serial number for record */
 	int		    major;      /* syscall number */
+	int		    uring_op;   /* uring operation */
 	struct timespec64   ctime;      /* time of syscall entry */
 	unsigned long	    argv[4];    /* syscall arguments */
 	long		    return_code;/* syscall return code */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f3d309b05c2d..6dda448fb826 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -959,6 +959,7 @@ static void audit_reset_context(struct audit_context *ctx)
 	ctx->current_state = ctx->state;
 	ctx->serial = 0;
 	ctx->major = 0;
+	ctx->uring_op = 0;
 	ctx->ctime = (struct timespec64){ .tv_sec = 0, .tv_nsec = 0 };
 	memset(ctx->argv, 0, sizeof(ctx->argv));
 	ctx->return_code = 0;
@@ -1044,6 +1045,31 @@ int audit_alloc(struct task_struct *tsk)
 	return 0;
 }
 
+/**
+ * audit_alloc_kernel - allocate an audit_context for a kernel task
+ * @tsk: the kernel task
+ *
+ * Similar to the audit_alloc() function, but intended for kernel private
+ * threads.  Returns zero on success, negative values on failure.
+ */
+int audit_alloc_kernel(struct task_struct *tsk)
+{
+	/*
+	 * At the moment we are just going to call into audit_alloc() to
+	 * simplify the code, but there two things to keep in mind with this
+	 * approach:
+	 *
+	 * 1. Filtering internal kernel tasks is a bit laughable in almost all
+	 * cases, but there is at least one case where there is a benefit:
+	 * the '-a task,never' case allows the admin to effectively disable
+	 * task auditing at runtime.
+	 *
+	 * 2. The {set,clear}_task_syscall_work() ops likely have zero effect
+	 * on these internal kernel tasks, but they probably don't hurt either.
+	 */
+	return audit_alloc(tsk);
+}
+
 static inline void audit_free_context(struct audit_context *context)
 {
 	/* resetting is extra work, but it is likely just noise */
@@ -1546,6 +1572,44 @@ out:
 	audit_log_end(ab);
 }
 
+/**
+ * audit_log_uring - generate a AUDIT_URINGOP record
+ * @ctx: the audit context
+ */
+static void audit_log_uring(struct audit_context *ctx)
+{
+	struct audit_buffer *ab;
+	const struct cred *cred;
+
+	ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_URINGOP);
+	if (!ab)
+		return;
+	cred = current_cred();
+	audit_log_format(ab, "uring_op=%d", ctx->uring_op);
+	if (ctx->return_valid != AUDITSC_INVALID)
+		audit_log_format(ab, " success=%s exit=%ld",
+				 (ctx->return_valid == AUDITSC_SUCCESS ?
+				  "yes" : "no"),
+				 ctx->return_code);
+	audit_log_format(ab,
+			 " items=%d"
+			 " ppid=%d pid=%d uid=%u gid=%u euid=%u suid=%u"
+			 " fsuid=%u egid=%u sgid=%u fsgid=%u",
+			 ctx->name_count,
+			 task_ppid_nr(current), task_tgid_nr(current),
+			 from_kuid(&init_user_ns, cred->uid),
+			 from_kgid(&init_user_ns, cred->gid),
+			 from_kuid(&init_user_ns, cred->euid),
+			 from_kuid(&init_user_ns, cred->suid),
+			 from_kuid(&init_user_ns, cred->fsuid),
+			 from_kgid(&init_user_ns, cred->egid),
+			 from_kgid(&init_user_ns, cred->sgid),
+			 from_kgid(&init_user_ns, cred->fsgid));
+	audit_log_task_context(ab);
+	audit_log_key(ab, ctx->filterkey);
+	audit_log_end(ab);
+}
+
 static void audit_log_exit(void)
 {
 	int i, call_panic = 0;
@@ -1581,6 +1645,9 @@ static void audit_log_exit(void)
 		audit_log_key(ab, context->filterkey);
 		audit_log_end(ab);
 		break;
+	case AUDIT_CTX_URING:
+		audit_log_uring(context);
+		break;
 	default:
 		BUG();
 		break;
@@ -1751,6 +1818,105 @@ static void audit_return_fixup(struct audit_context *ctx,
 	ctx->return_valid = (success ? AUDITSC_SUCCESS : AUDITSC_FAILURE);
 }
 
+/**
+ * __audit_uring_entry - prepare the kernel task's audit context for io_uring
+ * @op: the io_uring opcode
+ *
+ * This is similar to audit_syscall_entry() but is intended for use by io_uring
+ * operations.  This function should only ever be called from
+ * audit_uring_entry() as we rely on the audit context checking present in that
+ * function.
+ */
+void __audit_uring_entry(u8 op)
+{
+	struct audit_context *ctx = audit_context();
+
+	if (ctx->state == AUDIT_STATE_DISABLED)
+		return;
+
+	/*
+	 * NOTE: It's possible that we can be called from the process' context
+	 *       before it returns to userspace, and before audit_syscall_exit()
+	 *       is called.  In this case there is not much to do, just record
+	 *       the io_uring details and return.
+	 */
+	ctx->uring_op = op;
+	if (ctx->context == AUDIT_CTX_SYSCALL)
+		return;
+
+	ctx->dummy = !audit_n_rules;
+	if (!ctx->dummy && ctx->state == AUDIT_STATE_BUILD)
+		ctx->prio = 0;
+
+	ctx->context = AUDIT_CTX_URING;
+	ctx->current_state = ctx->state;
+	ktime_get_coarse_real_ts64(&ctx->ctime);
+}
+
+/**
+ * __audit_uring_exit - wrap up the kernel task's audit context after io_uring
+ * @success: true/false value to indicate if the operation succeeded or not
+ * @code: operation return code
+ *
+ * This is similar to audit_syscall_exit() but is intended for use by io_uring
+ * operations.  This function should only ever be called from
+ * audit_uring_exit() as we rely on the audit context checking present in that
+ * function.
+ */
+void __audit_uring_exit(int success, long code)
+{
+	struct audit_context *ctx = audit_context();
+
+	/*
+	 * TODO: At some point we will likely want to filter on io_uring ops
+	 *       and other things similar to what we do for syscalls, but that
+	 *       is something for another day; just record what we can here.
+	 */
+
+	if (ctx->context == AUDIT_CTX_SYSCALL) {
+		/*
+		 * NOTE: See the note in __audit_uring_entry() about the case
+		 *       where we may be called from process context before we
+		 *       return to userspace via audit_syscall_exit().  In this
+		 *       case we simply emit a URINGOP record and bail, the
+		 *       normal syscall exit handling will take care of
+		 *       everything else.
+		 *       It is also worth mentioning that when we are called,
+		 *       the current process creds may differ from the creds
+		 *       used during the normal syscall processing; keep that
+		 *       in mind if/when we move the record generation code.
+		 */
+
+		/*
+		 * We need to filter on the syscall info here to decide if we
+		 * should emit a URINGOP record.  I know it seems odd but this
+		 * solves the problem where users have a filter to block *all*
+		 * syscall records in the "exit" filter; we want to preserve
+		 * the behavior here.
+		 */
+		audit_filter_syscall(current, ctx);
+		audit_filter_inodes(current, ctx);
+		if (ctx->current_state != AUDIT_STATE_RECORD)
+			return;
+
+		audit_log_uring(ctx);
+		return;
+	}
+
+	/* this may generate CONFIG_CHANGE records */
+	if (!list_empty(&ctx->killed_trees))
+		audit_kill_trees(ctx);
+
+	audit_filter_inodes(current, ctx);
+	if (ctx->current_state != AUDIT_STATE_RECORD)
+		goto out;
+	audit_return_fixup(ctx, success, code);
+	audit_log_exit();
+
+out:
+	audit_reset_context(ctx);
+}
+
 /**
  * __audit_syscall_entry - fill in an audit record at syscall entry
  * @major: major syscall type (function)
-- 
cgit v1.2.3


From 3a862cacf8670729b1ad8fc28e4f7e078f9c570c Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 1 Feb 2021 19:22:44 -0500
Subject: fs: add anon_inode_getfile_secure() similar to
 anon_inode_getfd_secure()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extending the secure anonymous inode support to other subsystems
requires that we have a secure anon_inode_getfile() variant in
addition to the existing secure anon_inode_getfd() variant.

Thankfully we can reuse the existing __anon_inode_getfile() function
and just wrap it with the proper arguments.

Acked-by: Mickaël Salaün <mic@linux.microsoft.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/anon_inodes.c            | 29 +++++++++++++++++++++++++++++
 include/linux/anon_inodes.h |  4 ++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index a280156138ed..e0c3e33c4177 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -148,6 +148,35 @@ struct file *anon_inode_getfile(const char *name,
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfile);
 
+/**
+ * anon_inode_getfile_secure - Like anon_inode_getfile(), but creates a new
+ *                             !S_PRIVATE anon inode rather than reuse the
+ *                             singleton anon inode and calls the
+ *                             inode_init_security_anon() LSM hook.  This
+ *                             allows for both the inode to have its own
+ *                             security context and for the LSM to enforce
+ *                             policy on the inode's creation.
+ *
+ * @name:    [in]    name of the "class" of the new file
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
+ * @context_inode:
+ *           [in]    the logical relationship with the new inode (optional)
+ *
+ * The LSM may use @context_inode in inode_init_security_anon(), but a
+ * reference to it is not held.  Returns the newly created file* or an error
+ * pointer.  See the anon_inode_getfile() documentation for more information.
+ */
+struct file *anon_inode_getfile_secure(const char *name,
+				       const struct file_operations *fops,
+				       void *priv, int flags,
+				       const struct inode *context_inode)
+{
+	return __anon_inode_getfile(name, fops, priv, flags,
+				    context_inode, true);
+}
+
 static int __anon_inode_getfd(const char *name,
 			      const struct file_operations *fops,
 			      void *priv, int flags,
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index 71881a2b6f78..5deaddbd7927 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -15,6 +15,10 @@ struct inode;
 struct file *anon_inode_getfile(const char *name,
 				const struct file_operations *fops,
 				void *priv, int flags);
+struct file *anon_inode_getfile_secure(const char *name,
+				       const struct file_operations *fops,
+				       void *priv, int flags,
+				       const struct inode *context_inode);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 int anon_inode_getfd_secure(const char *name,
-- 
cgit v1.2.3


From cdc1404a40461faba23c5a5ad40adcc7eecc1580 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Mon, 1 Feb 2021 19:56:49 -0500
Subject: lsm,io_uring: add LSM hooks to io_uring

A full expalantion of io_uring is beyond the scope of this commit
description, but in summary it is an asynchronous I/O mechanism
which allows for I/O requests and the resulting data to be queued
in memory mapped "rings" which are shared between the kernel and
userspace.  Optionally, io_uring offers the ability for applications
to spawn kernel threads to dequeue I/O requests from the ring and
submit the requests in the kernel, helping to minimize the syscall
overhead.  Rings are accessed in userspace by memory mapping a file
descriptor provided by the io_uring_setup(2), and can be shared
between applications as one might do with any open file descriptor.
Finally, process credentials can be registered with a given ring
and any process with access to that ring can submit I/O requests
using any of the registered credentials.

While the io_uring functionality is widely recognized as offering a
vastly improved, and high performing asynchronous I/O mechanism, its
ability to allow processes to submit I/O requests with credentials
other than its own presents a challenge to LSMs.  When a process
creates a new io_uring ring the ring's credentials are inhertied
from the calling process; if this ring is shared with another
process operating with different credentials there is the potential
to bypass the LSMs security policy.  Similarly, registering
credentials with a given ring allows any process with access to that
ring to submit I/O requests with those credentials.

In an effort to allow LSMs to apply security policy to io_uring I/O
operations, this patch adds two new LSM hooks.  These hooks, in
conjunction with the LSM anonymous inode support previously
submitted, allow an LSM to apply access control policy to the
sharing of io_uring rings as well as any io_uring credential changes
requested by a process.

The new LSM hooks are described below:

 * int security_uring_override_creds(cred)
   Controls if the current task, executing an io_uring operation,
   is allowed to override it's credentials with @cred.  In cases
   where the current task is a user application, the current
   credentials will be those of the user application.  In cases
   where the current task is a kernel thread servicing io_uring
   requests the current credentials will be those of the io_uring
   ring (inherited from the process that created the ring).

 * int security_uring_sqpoll(void)
   Controls if the current task is allowed to create an io_uring
   polling thread (IORING_SETUP_SQPOLL).  Without a SQPOLL thread
   in the kernel processes must submit I/O requests via
   io_uring_enter(2) which allows us to compare any requested
   credential changes against the application making the request.
   With a SQPOLL thread, we can no longer compare requested
   credential changes against the application making the request,
   the comparison is made against the ring's credentials.

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/io_uring.c                 | 10 ++++++++++
 include/linux/lsm_hook_defs.h |  5 +++++
 include/linux/lsm_hooks.h     | 13 +++++++++++++
 include/linux/security.h      | 16 ++++++++++++++++
 security/security.c           | 12 ++++++++++++
 5 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 56cc9aba0d01..f89d00af3a67 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -80,6 +80,7 @@
 #include <linux/io_uring.h>
 #include <linux/tracehook.h>
 #include <linux/audit.h>
+#include <linux/security.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -7070,6 +7071,11 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		if (!req->creds)
 			return -EINVAL;
 		get_cred(req->creds);
+		ret = security_uring_override_creds(req->creds);
+		if (ret) {
+			put_cred(req->creds);
+			return ret;
+		}
 		req->flags |= REQ_F_CREDS;
 	}
 	state = &ctx->submit_state;
@@ -8566,6 +8572,10 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		struct io_sq_data *sqd;
 		bool attached;
 
+		ret = security_uring_sqpoll();
+		if (ret)
+			return ret;
+
 		sqd = io_get_sq_data(p, &attached);
 		if (IS_ERR(sqd)) {
 			ret = PTR_ERR(sqd);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 2adeea44c0d5..b3c525353769 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -402,3 +402,8 @@ LSM_HOOK(void, LSM_RET_VOID, perf_event_free, struct perf_event *event)
 LSM_HOOK(int, 0, perf_event_read, struct perf_event *event)
 LSM_HOOK(int, 0, perf_event_write, struct perf_event *event)
 #endif /* CONFIG_PERF_EVENTS */
+
+#ifdef CONFIG_IO_URING
+LSM_HOOK(int, 0, uring_override_creds, const struct cred *new)
+LSM_HOOK(int, 0, uring_sqpoll, void)
+#endif /* CONFIG_IO_URING */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 5c4c5c0602cb..0eb0ae95c4c4 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1557,6 +1557,19 @@
  * 	Read perf_event security info if allowed.
  * @perf_event_write:
  * 	Write perf_event security info if allowed.
+ *
+ * Security hooks for io_uring
+ *
+ * @uring_override_creds:
+ *      Check if the current task, executing an io_uring operation, is allowed
+ *      to override it's credentials with @new.
+ *
+ *      @new: the new creds to use
+ *
+ * @uring_sqpoll:
+ *      Check whether the current task is allowed to spawn a io_uring polling
+ *      thread (IORING_SETUP_SQPOLL).
+ *
  */
 union security_list_options {
 	#define LSM_HOOK(RET, DEFAULT, NAME, ...) RET (*NAME)(__VA_ARGS__);
diff --git a/include/linux/security.h b/include/linux/security.h
index 5b7288521300..7979b9629a42 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2038,4 +2038,20 @@ static inline int security_perf_event_write(struct perf_event *event)
 #endif /* CONFIG_SECURITY */
 #endif /* CONFIG_PERF_EVENTS */
 
+#ifdef CONFIG_IO_URING
+#ifdef CONFIG_SECURITY
+extern int security_uring_override_creds(const struct cred *new);
+extern int security_uring_sqpoll(void);
+#else
+static inline int security_uring_override_creds(const struct cred *new)
+{
+	return 0;
+}
+static inline int security_uring_sqpoll(void)
+{
+	return 0;
+}
+#endif /* CONFIG_SECURITY */
+#endif /* CONFIG_IO_URING */
+
 #endif /* ! __LINUX_SECURITY_H */
diff --git a/security/security.c b/security/security.c
index 9ffa9e9c5c55..c49a2c0cc1c1 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2625,3 +2625,15 @@ int security_perf_event_write(struct perf_event *event)
 	return call_int_hook(perf_event_write, 0, event);
 }
 #endif /* CONFIG_PERF_EVENTS */
+
+#ifdef CONFIG_IO_URING
+int security_uring_override_creds(const struct cred *new)
+{
+	return call_int_hook(uring_override_creds, 0, new);
+}
+
+int security_uring_sqpoll(void)
+{
+	return call_int_hook(uring_sqpoll, 0);
+}
+#endif /* CONFIG_IO_URING */
-- 
cgit v1.2.3


From e840f42a49925707fca90e6c7a4095118fdb8c4d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 19 Sep 2021 14:09:49 +0100
Subject: KVM: arm64: Fix PMU probe ordering

Russell reported that since 5.13, KVM's probing of the PMU has
started to fail on his HW. As it turns out, there is an implicit
ordering dependency between the architectural PMU probing code and
and KVM's own probing. If, due to probe ordering reasons, KVM probes
before the PMU driver, it will fail to detect the PMU and prevent it
from being advertised to guests as well as the VMM.

Obviously, this is one probing too many, and we should be able to
deal with any ordering.

Add a callback from the PMU code into KVM to advertise the registration
of a host CPU PMU, allowing for any probing order.

Fixes: 5421db1be3b1 ("KVM: arm64: Divorce the perf code from oprofile helpers")
Reported-by: "Russell King (Oracle)" <linux@armlinux.org.uk>
Tested-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/YUYRKVflRtUytzy5@shell.armlinux.org.uk
Cc: stable@vger.kernel.org
---
 arch/arm64/kvm/perf.c        | 3 ---
 arch/arm64/kvm/pmu-emul.c    | 9 ++++++++-
 drivers/perf/arm_pmu.c       | 2 ++
 include/kvm/arm_pmu.h        | 3 ---
 include/linux/perf/arm_pmu.h | 6 ++++++
 5 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c
index f9bb3b14130e..c84fe24b2ea1 100644
--- a/arch/arm64/kvm/perf.c
+++ b/arch/arm64/kvm/perf.c
@@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
-	if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled())
-		static_branch_enable(&kvm_arm_pmu_available);
-
 	return perf_register_guest_info_callbacks(&kvm_guest_cbs);
 }
 
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index f5065f23b413..2af3c37445e0 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -740,7 +740,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 	kvm_pmu_create_perf_event(vcpu, select_idx);
 }
 
-int kvm_pmu_probe_pmuver(void)
+void kvm_host_pmu_init(struct arm_pmu *pmu)
+{
+	if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF &&
+	    !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled())
+		static_branch_enable(&kvm_arm_pmu_available);
+}
+
+static int kvm_pmu_probe_pmuver(void)
 {
 	struct perf_event_attr attr = { };
 	struct perf_event *event;
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 3cbc3baf087f..295cc7952d0e 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -952,6 +952,8 @@ int armpmu_register(struct arm_pmu *pmu)
 		pmu->name, pmu->num_events,
 		has_nmi ? ", using NMIs" : "");
 
+	kvm_host_pmu_init(pmu);
+
 	return 0;
 
 out_destroy:
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 864b9997efb2..90f21898aad8 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu,
 			    struct kvm_device_attr *attr);
 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu);
-int kvm_pmu_probe_pmuver(void);
 #else
 struct kvm_pmu {
 };
@@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 	return 0;
 }
 
-static inline int kvm_pmu_probe_pmuver(void) { return 0xf; }
-
 #endif
 
 #endif
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 505480217cf1..2512e2f9cd4e 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
 static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
 #endif
 
+#ifdef CONFIG_KVM
+void kvm_host_pmu_init(struct arm_pmu *pmu);
+#else
+#define kvm_host_pmu_init(x)	do { } while(0)
+#endif
+
 /* Internal functions only for core arm_pmu code */
 struct arm_pmu *armpmu_alloc(void);
 struct arm_pmu *armpmu_alloc_atomic(void);
-- 
cgit v1.2.3


From f279294b329363eb6ada568e494d609ef78e3e8e Mon Sep 17 00:00:00 2001
From: Chunguang Xu <brookxu@tencent.com>
Date: Fri, 17 Sep 2021 20:44:14 +0800
Subject: misc_cgroup: introduce misc.events to count failures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce misc.events to make it easier for us to understand
the pressure of resources. Currently only the 'max' event is
implemented, which indicates the times the resource is about
to exceeds the max limit.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Reviewed-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h |  5 +++++
 kernel/cgroup/misc.c        | 24 ++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index da2367e2ac1e..091f2d2a1aec 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -36,6 +36,7 @@ struct misc_cg;
 struct misc_res {
 	unsigned long max;
 	atomic_long_t usage;
+	atomic_long_t events;
 	bool failed;
 };
 
@@ -46,6 +47,10 @@ struct misc_res {
  */
 struct misc_cg {
 	struct cgroup_subsys_state css;
+
+	/* misc.events */
+	struct cgroup_file events_file;
+
 	struct misc_res res[MISC_CG_RES_TYPES];
 };
 
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index ec02d963cad1..4b2b49267384 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -171,6 +171,11 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg,
 	return 0;
 
 err_charge:
+	for (j = i; j; j = parent_misc(j)) {
+		atomic_long_inc(&j->res[type].events);
+		cgroup_file_notify(&j->events_file);
+	}
+
 	for (j = cg; j != i; j = parent_misc(j))
 		misc_cg_cancel_charge(type, j, amount);
 	misc_cg_cancel_charge(type, i, amount);
@@ -335,6 +340,19 @@ static int misc_cg_capacity_show(struct seq_file *sf, void *v)
 	return 0;
 }
 
+static int misc_events_show(struct seq_file *sf, void *v)
+{
+	struct misc_cg *cg = css_misc(seq_css(sf));
+	unsigned long events, i;
+
+	for (i = 0; i < MISC_CG_RES_TYPES; i++) {
+		events = atomic_long_read(&cg->res[i].events);
+		if (READ_ONCE(misc_res_capacity[i]) || events)
+			seq_printf(sf, "%s.max %lu\n", misc_res_name[i], events);
+	}
+	return 0;
+}
+
 /* Misc cgroup interface files */
 static struct cftype misc_cg_files[] = {
 	{
@@ -353,6 +371,12 @@ static struct cftype misc_cg_files[] = {
 		.seq_show = misc_cg_capacity_show,
 		.flags = CFTYPE_ONLY_ON_ROOT,
 	},
+	{
+		.name = "events",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.file_offset = offsetof(struct misc_cg, events_file),
+		.seq_show = misc_events_show,
+	},
 	{}
 };
 
-- 
cgit v1.2.3


From b03357528fd9516600ff358ab5d4b887b8cb80e8 Mon Sep 17 00:00:00 2001
From: Chunguang Xu <brookxu@tencent.com>
Date: Fri, 17 Sep 2021 20:44:15 +0800
Subject: misc_cgroup: remove error log to avoid log flood
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In scenarios where containers are frequently created and deleted,
a large number of error logs maybe generated. The logs only show
which node is about to go over the max limit, not the node which
resource request failed. As misc.events has provided relevant
information, maybe we can remove this log.

Signed-off-by: Chunguang Xu <brookxu@tencent.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/misc_cgroup.h | 1 -
 kernel/cgroup/misc.c        | 7 -------
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/misc_cgroup.h b/include/linux/misc_cgroup.h
index 091f2d2a1aec..c238207d1615 100644
--- a/include/linux/misc_cgroup.h
+++ b/include/linux/misc_cgroup.h
@@ -37,7 +37,6 @@ struct misc_res {
 	unsigned long max;
 	atomic_long_t usage;
 	atomic_long_t events;
-	bool failed;
 };
 
 /**
diff --git a/kernel/cgroup/misc.c b/kernel/cgroup/misc.c
index 4b2b49267384..fe3e8a0eb7ed 100644
--- a/kernel/cgroup/misc.c
+++ b/kernel/cgroup/misc.c
@@ -157,13 +157,6 @@ int misc_cg_try_charge(enum misc_res_type type, struct misc_cg *cg,
 		new_usage = atomic_long_add_return(amount, &res->usage);
 		if (new_usage > READ_ONCE(res->max) ||
 		    new_usage > READ_ONCE(misc_res_capacity[type])) {
-			if (!res->failed) {
-				pr_info("cgroup: charge rejected by the misc controller for %s resource in ",
-					misc_res_name[type]);
-				pr_cont_cgroup_path(i->css.cgroup);
-				pr_cont("\n");
-				res->failed = true;
-			}
 			ret = -EBUSY;
 			goto err_charge;
 		}
-- 
cgit v1.2.3


From d4ffd5df9d18031b6a53f934388726775b4452d3 Mon Sep 17 00:00:00 2001
From: Jiashuo Liang <liangjs@pku.edu.cn>
Date: Fri, 30 Jul 2021 11:01:52 +0800
Subject: x86/fault: Fix wrong signal when vsyscall fails with pkey

The function __bad_area_nosemaphore() calls kernelmode_fixup_or_oops()
with the parameter @signal being actually @pkey, which will send a
signal numbered with the argument in @pkey.

This bug can be triggered when the kernel fails to access user-given
memory pages that are protected by a pkey, so it can go down the
do_user_addr_fault() path and pass the !user_mode() check in
__bad_area_nosemaphore().

Most cases will simply run the kernel fixup code to make an -EFAULT. But
when another condition current->thread.sig_on_uaccess_err is met, which
is only used to emulate vsyscall, the kernel will generate the wrong
signal.

Add a new parameter @pkey to kernelmode_fixup_or_oops() to fix this.

 [ bp: Massage commit message, fix build error as reported by the 0day
   bot: https://lkml.kernel.org/r/202109202245.APvuT8BX-lkp@intel.com ]

Fixes: 5042d40a264c ("x86/fault: Bypass no_context() for implicit kernel faults from usermode")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Jiashuo Liang <liangjs@pku.edu.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20210730030152.249106-1-liangjs@pku.edu.cn
---
 arch/x86/include/asm/pkeys.h |  2 --
 arch/x86/mm/fault.c          | 26 ++++++++++++++++++--------
 include/linux/pkeys.h        |  2 ++
 3 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index 5c7bcaa79623..1d5f14aff5f6 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_X86_PKEYS_H
 #define _ASM_X86_PKEYS_H
 
-#define ARCH_DEFAULT_PKEY	0
-
 /*
  * If more than 16 keys are ever supported, a thorough audit
  * will be necessary to ensure that the types that store key
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b2eefdefc108..84a2c8c4af73 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -710,7 +710,8 @@ oops:
 
 static noinline void
 kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
-			 unsigned long address, int signal, int si_code)
+			 unsigned long address, int signal, int si_code,
+			 u32 pkey)
 {
 	WARN_ON_ONCE(user_mode(regs));
 
@@ -735,8 +736,12 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
 
 			set_signal_archinfo(address, error_code);
 
-			/* XXX: hwpoison faults will set the wrong code. */
-			force_sig_fault(signal, si_code, (void __user *)address);
+			if (si_code == SEGV_PKUERR) {
+				force_sig_pkuerr((void __user *)address, pkey);
+			} else {
+				/* XXX: hwpoison faults will set the wrong code. */
+				force_sig_fault(signal, si_code, (void __user *)address);
+			}
 		}
 
 		/*
@@ -798,7 +803,8 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 	struct task_struct *tsk = current;
 
 	if (!user_mode(regs)) {
-		kernelmode_fixup_or_oops(regs, error_code, address, pkey, si_code);
+		kernelmode_fixup_or_oops(regs, error_code, address,
+					 SIGSEGV, si_code, pkey);
 		return;
 	}
 
@@ -930,7 +936,8 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 {
 	/* Kernel mode? Handle exceptions or die: */
 	if (!user_mode(regs)) {
-		kernelmode_fixup_or_oops(regs, error_code, address, SIGBUS, BUS_ADRERR);
+		kernelmode_fixup_or_oops(regs, error_code, address,
+					 SIGBUS, BUS_ADRERR, ARCH_DEFAULT_PKEY);
 		return;
 	}
 
@@ -1396,7 +1403,8 @@ good_area:
 		 */
 		if (!user_mode(regs))
 			kernelmode_fixup_or_oops(regs, error_code, address,
-						 SIGBUS, BUS_ADRERR);
+						 SIGBUS, BUS_ADRERR,
+						 ARCH_DEFAULT_PKEY);
 		return;
 	}
 
@@ -1416,7 +1424,8 @@ good_area:
 		return;
 
 	if (fatal_signal_pending(current) && !user_mode(regs)) {
-		kernelmode_fixup_or_oops(regs, error_code, address, 0, 0);
+		kernelmode_fixup_or_oops(regs, error_code, address,
+					 0, 0, ARCH_DEFAULT_PKEY);
 		return;
 	}
 
@@ -1424,7 +1433,8 @@ good_area:
 		/* Kernel mode? Handle exceptions or die: */
 		if (!user_mode(regs)) {
 			kernelmode_fixup_or_oops(regs, error_code, address,
-						 SIGSEGV, SEGV_MAPERR);
+						 SIGSEGV, SEGV_MAPERR,
+						 ARCH_DEFAULT_PKEY);
 			return;
 		}
 
diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h
index 6beb26b7151d..86be8bf27b41 100644
--- a/include/linux/pkeys.h
+++ b/include/linux/pkeys.h
@@ -4,6 +4,8 @@
 
 #include <linux/mm.h>
 
+#define ARCH_DEFAULT_PKEY	0
+
 #ifdef CONFIG_ARCH_HAS_PKEYS
 #include <asm/pkeys.h>
 #else /* ! CONFIG_ARCH_HAS_PKEYS */
-- 
cgit v1.2.3


From 4373b3dc922038e8924f648506f6556f2afa7e77 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Thu, 9 Sep 2021 11:45:13 -0700
Subject: fscrypt: remove fscrypt_operations::max_namelen

The max_namelen field is unnecessary, as it is set to 255 (NAME_MAX) on
all filesystems that support fscrypt (or plan to support fscrypt).  For
simplicity, just use NAME_MAX directly instead.

Link: https://lore.kernel.org/r/20210909184513.139281-1-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 fs/crypto/fname.c       | 3 +--
 fs/ext4/super.c         | 1 -
 fs/f2fs/super.c         | 1 -
 fs/ubifs/crypto.c       | 1 -
 include/linux/fscrypt.h | 3 ---
 5 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index eb538c28df94..a9be4bc74a94 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -429,8 +429,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
 
 	if (fscrypt_has_encryption_key(dir)) {
 		if (!fscrypt_fname_encrypted_size(&dir->i_crypt_info->ci_policy,
-						  iname->len,
-						  dir->i_sb->s_cop->max_namelen,
+						  iname->len, NAME_MAX,
 						  &fname->crypto_buf.len))
 			return -ENAMETOOLONG;
 		fname->crypto_buf.name = kmalloc(fname->crypto_buf.len,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 0775950ee84e..f37e64f72b5d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1566,7 +1566,6 @@ static const struct fscrypt_operations ext4_cryptops = {
 	.set_context		= ext4_set_context,
 	.get_dummy_policy	= ext4_get_dummy_policy,
 	.empty_dir		= ext4_empty_dir,
-	.max_namelen		= EXT4_NAME_LEN,
 	.has_stable_inodes	= ext4_has_stable_inodes,
 	.get_ino_and_lblk_bits	= ext4_get_ino_and_lblk_bits,
 };
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 78ebc306ee2b..cf049a042482 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2976,7 +2976,6 @@ static const struct fscrypt_operations f2fs_cryptops = {
 	.set_context		= f2fs_set_context,
 	.get_dummy_policy	= f2fs_get_dummy_policy,
 	.empty_dir		= f2fs_empty_dir,
-	.max_namelen		= F2FS_NAME_LEN,
 	.has_stable_inodes	= f2fs_has_stable_inodes,
 	.get_ino_and_lblk_bits	= f2fs_get_ino_and_lblk_bits,
 	.get_num_devices	= f2fs_get_num_devices,
diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c
index 22be7aeb96c4..c57b46a352d8 100644
--- a/fs/ubifs/crypto.c
+++ b/fs/ubifs/crypto.c
@@ -82,5 +82,4 @@ const struct fscrypt_operations ubifs_crypt_operations = {
 	.get_context		= ubifs_crypt_get_context,
 	.set_context		= ubifs_crypt_set_context,
 	.empty_dir		= ubifs_crypt_empty_dir,
-	.max_namelen		= UBIFS_MAX_NLEN,
 };
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index e912ed9141d9..91ea9477e9bd 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -118,9 +118,6 @@ struct fscrypt_operations {
 	 */
 	bool (*empty_dir)(struct inode *inode);
 
-	/* The filesystem's maximum ciphertext filename length, in bytes */
-	unsigned int max_namelen;
-
 	/*
 	 * Check whether the filesystem's inode numbers and UUID are stable,
 	 * meaning that they will never be changed even by offline operations
-- 
cgit v1.2.3


From 9d881361206ebcf6285c2ec2ef275aff80875347 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 21 Sep 2021 12:42:25 +0300
Subject: bus: ti-sysc: Add quirk handling for reinit on context lost

Some interconnect target modules such as otg and gpmc on am335x need a
re-init after resume. As we also have PM runtime cases where the context
may be lost, let's handle these all with cpu_pm.

For the am335x resume path, we already have cpu_pm_resume() call
cpu_pm_cluster_exit().

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 108 ++++++++++++++++++++++++++++++++--
 include/linux/platform_data/ti-sysc.h |   1 +
 2 files changed, 103 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 9e958343a719..a73c707d14c3 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -6,6 +6,7 @@
 #include <linux/io.h>
 #include <linux/clk.h>
 #include <linux/clkdev.h>
+#include <linux/cpu_pm.h>
 #include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -52,11 +53,18 @@ struct sysc_address {
 	struct list_head node;
 };
 
+struct sysc_module {
+	struct sysc *ddata;
+	struct list_head node;
+};
+
 struct sysc_soc_info {
 	unsigned long general_purpose:1;
 	enum sysc_soc soc;
-	struct mutex list_lock;			/* disabled modules list lock */
+	struct mutex list_lock;	/* disabled and restored modules list lock */
 	struct list_head disabled_modules;
+	struct list_head restored_modules;
+	struct notifier_block nb;
 };
 
 enum sysc_clocks {
@@ -2477,6 +2485,79 @@ static struct dev_pm_domain sysc_child_pm_domain = {
 	}
 };
 
+/* Caller needs to take list_lock if ever used outside of cpu_pm */
+static void sysc_reinit_modules(struct sysc_soc_info *soc)
+{
+	struct sysc_module *module;
+	struct list_head *pos;
+	struct sysc *ddata;
+	int error = 0;
+
+	list_for_each(pos, &sysc_soc->restored_modules) {
+		module = list_entry(pos, struct sysc_module, node);
+		ddata = module->ddata;
+		error = sysc_reinit_module(ddata, ddata->enabled);
+	}
+}
+
+/**
+ * sysc_context_notifier - optionally reset and restore module after idle
+ * @nb: notifier block
+ * @cmd: unused
+ * @v: unused
+ *
+ * Some interconnect target modules need to be restored, or reset and restored
+ * on CPU_PM CPU_PM_CLUSTER_EXIT notifier. This is needed at least for am335x
+ * OTG and GPMC target modules even if the modules are unused.
+ */
+static int sysc_context_notifier(struct notifier_block *nb, unsigned long cmd,
+				 void *v)
+{
+	struct sysc_soc_info *soc;
+
+	soc = container_of(nb, struct sysc_soc_info, nb);
+
+	switch (cmd) {
+	case CPU_CLUSTER_PM_ENTER:
+		break;
+	case CPU_CLUSTER_PM_ENTER_FAILED:	/* No need to restore context */
+		break;
+	case CPU_CLUSTER_PM_EXIT:
+		sysc_reinit_modules(soc);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/**
+ * sysc_add_restored - optionally add reset and restore quirk hanlling
+ * @ddata: device data
+ */
+static void sysc_add_restored(struct sysc *ddata)
+{
+	struct sysc_module *restored_module;
+
+	restored_module = kzalloc(sizeof(*restored_module), GFP_KERNEL);
+	if (!restored_module)
+		return;
+
+	restored_module->ddata = ddata;
+
+	mutex_lock(&sysc_soc->list_lock);
+
+	list_add(&restored_module->node, &sysc_soc->restored_modules);
+
+	if (sysc_soc->nb.notifier_call)
+		goto out_unlock;
+
+	sysc_soc->nb.notifier_call = sysc_context_notifier;
+	cpu_pm_register_notifier(&sysc_soc->nb);
+
+out_unlock:
+	mutex_unlock(&sysc_soc->list_lock);
+}
+
 /**
  * sysc_legacy_idle_quirk - handle children in omap_device compatible way
  * @ddata: device driver data
@@ -2976,12 +3057,14 @@ static int sysc_add_disabled(unsigned long base)
 }
 
 /*
- * One time init to detect the booted SoC and disable unavailable features.
+ * One time init to detect the booted SoC, disable unavailable features
+ * and initialize list for optional cpu_pm notifier.
+ *
  * Note that we initialize static data shared across all ti-sysc instances
  * so ddata is only used for SoC type. This can be called from module_init
  * once we no longer need to rely on platform data.
  */
-static int sysc_init_soc(struct sysc *ddata)
+static int sysc_init_static_data(struct sysc *ddata)
 {
 	const struct soc_device_attribute *match;
 	struct ti_sysc_platform_data *pdata;
@@ -2997,6 +3080,7 @@ static int sysc_init_soc(struct sysc *ddata)
 
 	mutex_init(&sysc_soc->list_lock);
 	INIT_LIST_HEAD(&sysc_soc->disabled_modules);
+	INIT_LIST_HEAD(&sysc_soc->restored_modules);
 	sysc_soc->general_purpose = true;
 
 	pdata = dev_get_platdata(ddata->dev);
@@ -3060,15 +3144,24 @@ static int sysc_init_soc(struct sysc *ddata)
 	return 0;
 }
 
-static void sysc_cleanup_soc(void)
+static void sysc_cleanup_static_data(void)
 {
+	struct sysc_module *restored_module;
 	struct sysc_address *disabled_module;
 	struct list_head *pos, *tmp;
 
 	if (!sysc_soc)
 		return;
 
+	if (sysc_soc->nb.notifier_call)
+		cpu_pm_unregister_notifier(&sysc_soc->nb);
+
 	mutex_lock(&sysc_soc->list_lock);
+	list_for_each_safe(pos, tmp, &sysc_soc->restored_modules) {
+		restored_module = list_entry(pos, struct sysc_module, node);
+		list_del(pos);
+		kfree(restored_module);
+	}
 	list_for_each_safe(pos, tmp, &sysc_soc->disabled_modules) {
 		disabled_module = list_entry(pos, struct sysc_address, node);
 		list_del(pos);
@@ -3136,7 +3229,7 @@ static int sysc_probe(struct platform_device *pdev)
 	ddata->dev = &pdev->dev;
 	platform_set_drvdata(pdev, ddata);
 
-	error = sysc_init_soc(ddata);
+	error = sysc_init_static_data(ddata);
 	if (error)
 		return error;
 
@@ -3234,6 +3327,9 @@ static int sysc_probe(struct platform_device *pdev)
 		pm_runtime_put(&pdev->dev);
 	}
 
+	if (ddata->cfg.quirks & SYSC_QUIRK_REINIT_ON_CTX_LOST)
+		sysc_add_restored(ddata);
+
 	return 0;
 
 err:
@@ -3315,7 +3411,7 @@ static void __exit sysc_exit(void)
 {
 	bus_unregister_notifier(&platform_bus_type, &sysc_nb);
 	platform_driver_unregister(&sysc_driver);
-	sysc_cleanup_soc();
+	sysc_cleanup_static_data();
 }
 module_exit(sysc_exit);
 
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 9837fb011f2f..989aa30c598d 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -50,6 +50,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_QUIRK_REINIT_ON_CTX_LOST	BIT(28)
 #define SYSC_QUIRK_REINIT_ON_RESUME	BIT(27)
 #define SYSC_QUIRK_GPMC_DEBUG		BIT(26)
 #define SYSC_MODULE_QUIRK_ENA_RESETDONE	BIT(25)
-- 
cgit v1.2.3


From 6a52bc2b81fa06b7ab98472c7d80644e8e071af6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 21 Sep 2021 12:42:25 +0300
Subject: bus: ti-sysc: Add quirk handling for reset on re-init

At least am335x gpmc module needs a reset in addition to re-init on resume.
Let's add a quirk handling for reset on re-init.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 10 ++++++++++
 include/linux/platform_data/ti-sysc.h |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index a73c707d14c3..cacdc4e301fb 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -157,6 +157,7 @@ struct sysc {
 
 static void sysc_parse_dts_quirks(struct sysc *ddata, struct device_node *np,
 				  bool is_child);
+static int sysc_reset(struct sysc *ddata);
 
 static void sysc_write(struct sysc *ddata, int offset, u32 value)
 {
@@ -1440,6 +1441,15 @@ static int sysc_reinit_module(struct sysc *ddata, bool leave_enabled)
 	if (error)
 		dev_warn(dev, "reinit resume failed: %i\n", error);
 
+	/* Some modules like am335x gpmc need reset and restore of sysconfig */
+	if (ddata->cfg.quirks & SYSC_QUIRK_RESET_ON_CTX_LOST) {
+		error = sysc_reset(ddata);
+		if (error)
+			dev_warn(dev, "reinit reset failed: %i\n", error);
+
+		sysc_write_sysconfig(ddata, ddata->sysconfig);
+	}
+
 	if (leave_enabled)
 		return error;
 
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 989aa30c598d..f11244f5c0b6 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -50,6 +50,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_QUIRK_RESET_ON_CTX_LOST	BIT(29)
 #define SYSC_QUIRK_REINIT_ON_CTX_LOST	BIT(28)
 #define SYSC_QUIRK_REINIT_ON_RESUME	BIT(27)
 #define SYSC_QUIRK_GPMC_DEBUG		BIT(26)
-- 
cgit v1.2.3


From 5c99fa737c693a50419606d8f8266974a3521b32 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 21 Sep 2021 12:42:25 +0300
Subject: bus: ti-sysc: Handle otg force idle quirk

Let's add handling the otg force idle quirk for the old omap2430 glue layer
used up to omap4 as the musb driver quirk only works if the driver is
loaded. Unlike with the am335x glue layer, looks like we don't need the
quirk handling for SYSC_QUIRK_REINIT_ON_CTX_LOST.

Eventually when all the musb using SoCs are booting with device tree based
configuration, we can just remove the related quirk handling from the
musb driver.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 24 +++++++++++++++++++++++-
 include/linux/platform_data/ti-sysc.h |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 88524faa61cc..fc6968dc7397 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -1610,7 +1610,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, -ENODEV, 0x50700101, 0xffffffff,
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
 	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
-		   0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY),
+		   0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY |
+		   SYSC_MODULE_QUIRK_OTG),
 	SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -ENODEV, 0x4ea2080d, 0xffffffff,
 		   SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY |
 		   SYSC_QUIRK_REINIT_ON_CTX_LOST),
@@ -1969,6 +1970,22 @@ static void sysc_module_lock_quirk_rtc(struct sysc *ddata)
 	sysc_quirk_rtc(ddata, true);
 }
 
+/* OTG omap2430 glue layer up to omap4 needs OTG_FORCESTDBY configured */
+static void sysc_module_enable_quirk_otg(struct sysc *ddata)
+{
+	int offset = 0x414;	/* OTG_FORCESTDBY */
+
+	sysc_write(ddata, offset, 0);
+}
+
+static void sysc_module_disable_quirk_otg(struct sysc *ddata)
+{
+	int offset = 0x414;	/* OTG_FORCESTDBY */
+	u32 val = BIT(0);	/* ENABLEFORCE */
+
+	sysc_write(ddata, offset, val);
+}
+
 /* 36xx SGX needs a quirk for to bypass OCP IPG interrupt logic */
 static void sysc_module_enable_quirk_sgx(struct sysc *ddata)
 {
@@ -2051,6 +2068,11 @@ static void sysc_init_module_quirks(struct sysc *ddata)
 		return;
 	}
 
+	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_OTG) {
+		ddata->module_enable_quirk = sysc_module_enable_quirk_otg;
+		ddata->module_disable_quirk = sysc_module_disable_quirk_otg;
+	}
+
 	if (ddata->cfg.quirks & SYSC_MODULE_QUIRK_SGX)
 		ddata->module_enable_quirk = sysc_module_enable_quirk_sgx;
 
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index f11244f5c0b6..eb556f988d57 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -50,6 +50,7 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
+#define SYSC_MODULE_QUIRK_OTG		BIT(30)
 #define SYSC_QUIRK_RESET_ON_CTX_LOST	BIT(29)
 #define SYSC_QUIRK_REINIT_ON_CTX_LOST	BIT(28)
 #define SYSC_QUIRK_REINIT_ON_RESUME	BIT(27)
-- 
cgit v1.2.3


From d6da08ed1425180b8d54c828ec06d247fd915d60 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 20 Sep 2021 14:54:14 -0700
Subject: net: phy: broadcom: Add IDDQ-SR mode

Add support for putting the PHY into IDDQ Soft Recovery mode by setting
the TOP_MISC register bits accordingly. This requires us to implement a
custom bcm54xx_suspend() routine which diverges from genphy_suspend() in
order to configure the PHY to enter IDDQ with software recovery as well
as avoid doing a read/modify/write on the BMCR register.

Doing a read/modify/write on the BMCR register means that the
auto-negotation bit may remain which interferes with the ability to put
the PHY into IDDQ-SR mode. We do software reset upon suspend in order to
put the PHY back into its state prior to suspend as recommended by the
datasheet.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h    |  8 ++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index add0c4e33425..f5868a0dee4b 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -392,10 +392,50 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 	return 0;
 }
 
+static int bcm54xx_iddq_set(struct phy_device *phydev, bool enable)
+{
+	int ret = 0;
+
+	if (!(phydev->dev_flags & PHY_BRCM_IDDQ_SUSPEND))
+		return ret;
+
+	ret = bcm_phy_read_exp(phydev, BCM54XX_TOP_MISC_IDDQ_CTRL);
+	if (ret < 0)
+		goto out;
+
+	if (enable)
+		ret |= BCM54XX_TOP_MISC_IDDQ_SR | BCM54XX_TOP_MISC_IDDQ_LP;
+	else
+		ret &= ~(BCM54XX_TOP_MISC_IDDQ_SR | BCM54XX_TOP_MISC_IDDQ_LP);
+
+	ret = bcm_phy_write_exp(phydev, BCM54XX_TOP_MISC_IDDQ_CTRL, ret);
+out:
+	return ret;
+}
+
+static int bcm54xx_suspend(struct phy_device *phydev)
+{
+	int ret;
+
+	/* We cannot use a read/modify/write here otherwise the PHY gets into
+	 * a bad state where its LEDs keep flashing, thus defeating the purpose
+	 * of low power mode.
+	 */
+	ret = phy_write(phydev, MII_BMCR, BMCR_PDOWN);
+	if (ret < 0)
+		return ret;
+
+	return bcm54xx_iddq_set(phydev, true);
+}
+
 static int bcm54xx_resume(struct phy_device *phydev)
 {
 	int ret;
 
+	ret = bcm54xx_iddq_set(phydev, false);
+	if (ret < 0)
+		return ret;
+
 	/* Writes to register other than BMCR would be ignored
 	 * unless we clear the PDOWN bit first
 	 */
@@ -408,6 +448,15 @@ static int bcm54xx_resume(struct phy_device *phydev)
 	 */
 	fsleep(40);
 
+	/* Issue a soft reset after clearing the power down bit
+	 * and before doing any other configuration.
+	 */
+	if (phydev->dev_flags & PHY_BRCM_IDDQ_SUSPEND) {
+		ret = genphy_soft_reset(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
 	return bcm54xx_config_init(phydev);
 }
 
@@ -772,6 +821,8 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr	= bcm_phy_config_intr,
 	.handle_interrupt = bcm_phy_handle_interrupt,
 	.link_change_notify	= bcm54xx_link_change_notify,
+	.suspend	= bcm54xx_suspend,
+	.resume		= bcm54xx_resume,
 }, {
 	.phy_id		= PHY_ID_BCM5461,
 	.phy_id_mask	= 0xfffffff0,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 07e1dfadbbdf..b119d6819d6c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -67,6 +67,7 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00000004
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00000008
 #define PHY_BRCM_EN_MASTER_MODE		0x00000010
+#define PHY_BRCM_IDDQ_SUSPEND		0x000000220
 
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
@@ -84,6 +85,7 @@
 
 #define MII_BCM54XX_EXP_DATA	0x15	/* Expansion register data */
 #define MII_BCM54XX_EXP_SEL	0x17	/* Expansion register select */
+#define MII_BCM54XX_EXP_SEL_TOP	0x0d00	/* TOP_MISC expansion register select */
 #define MII_BCM54XX_EXP_SEL_SSD	0x0e00	/* Secondary SerDes select */
 #define MII_BCM54XX_EXP_SEL_ER	0x0f00	/* Expansion register select */
 #define MII_BCM54XX_EXP_SEL_ETC	0x0d00	/* Expansion register spare + 2k mem */
@@ -243,6 +245,12 @@
 #define MII_BCM54XX_EXP_EXP97			0x0f97
 #define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
 
+/* Top-MISC expansion registers */
+#define BCM54XX_TOP_MISC_IDDQ_CTRL		(MII_BCM54XX_EXP_SEL_TOP + 0x06)
+#define BCM54XX_TOP_MISC_IDDQ_LP		(1 << 0)
+#define BCM54XX_TOP_MISC_IDDQ_SD		(1 << 2)
+#define BCM54XX_TOP_MISC_IDDQ_SR		(1 << 3)
+
 /*
  * BCM5482: Secondary SerDes registers
  */
-- 
cgit v1.2.3


From c86a2d9058c5a4a05d20ef89e699b7a6b2c89da6 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Fri, 17 Sep 2021 00:27:05 +0200
Subject: cpumask: Omit terminating null byte in
 cpumap_print_{list,bitmask}_to_buf

The changes in the patch series [1] introduced a terminating null byte
when reading from cpulist or cpumap sysfs files, for example:

  $ xxd /sys/devices/system/node/node0/cpulist
  00000000: 302d 310a 00                             0-1..

Before this change, the output looked as follows:

  $ xxd /sys/devices/system/node/node0/cpulist
  00000000: 302d 310a                                0-1.

Fix this regression by excluding the terminating null byte from the
returned length in cpumap_print_list_to_buf and
cpumap_print_bitmask_to_buf.

[1] https://lore.kernel.org/all/20210806110251.560-1-song.bao.hua@hisilicon.com/

Fixes: 1fae562983ca ("cpumask: introduce cpumap_print_list/bitmask_to_buf to support large bitmask and list")
Acked-by: Barry Song <song.bao.hua@hisilicon.com>
Acked-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20210916222705.13554-1-tklauser@distanz.ch
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/cpumask.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 5d4d07a9e1ed..1e7399fc69c0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -996,14 +996,15 @@ cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
  * cpumask; Typically used by bin_attribute to export cpumask bitmask
  * ABI.
  *
- * Returns the length of how many bytes have been copied.
+ * Returns the length of how many bytes have been copied, excluding
+ * terminating '\0'.
  */
 static inline ssize_t
 cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
 		loff_t off, size_t count)
 {
 	return bitmap_print_bitmask_to_buf(buf, cpumask_bits(mask),
-				   nr_cpu_ids, off, count);
+				   nr_cpu_ids, off, count) - 1;
 }
 
 /**
@@ -1018,7 +1019,7 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
 		loff_t off, size_t count)
 {
 	return bitmap_print_list_to_buf(buf, cpumask_bits(mask),
-				   nr_cpu_ids, off, count);
+				   nr_cpu_ids, off, count) - 1;
 }
 
 #if NR_CPUS <= BITS_PER_LONG
-- 
cgit v1.2.3


From 06dc660e6eb8817c4c379d2ca290ae0b3f77c69f Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Tue, 14 Sep 2021 01:27:08 +1000
Subject: PCI: Rename pcibios_add_device() to pcibios_device_add()

The general convention for pcibios_* hooks is that they're named after the
corresponding pci_* function they provide a hook for. The exception is
pcibios_add_device() which provides a hook for pci_device_add().

Rename pcibios_add_device() to pcibios_device_add() so it matches
pci_device_add().

Also, remove the export of the microblaze version. The only caller must be
compiled as a built-in so there's no reason for the export.

Link: https://lore.kernel.org/r/20210913152709.48013-1-oohall@gmail.com
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Niklas Schnelle <schnelle@linux.ibm.com>	# s390
---
 arch/microblaze/pci/pci-common.c           | 3 +--
 arch/powerpc/kernel/pci-common.c           | 2 +-
 arch/powerpc/platforms/powernv/pci-sriov.c | 2 +-
 arch/s390/pci/pci.c                        | 2 +-
 arch/sparc/kernel/pci.c                    | 2 +-
 arch/x86/pci/common.c                      | 2 +-
 drivers/pci/pci.c                          | 4 ++--
 drivers/pci/probe.c                        | 4 ++--
 include/linux/pci.h                        | 2 +-
 9 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 557585f1be41..622a4867f9e9 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -587,13 +587,12 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_fixup_resources);
 
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_device_add(struct pci_dev *dev)
 {
 	dev->irq = of_irq_parse_and_map_pci(dev, 0, 0);
 
 	return 0;
 }
-EXPORT_SYMBOL(pcibios_add_device);
 
 /*
  * Reparent resource children of pr that conflict with res
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index c3573430919d..6749905932f4 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1059,7 +1059,7 @@ void pcibios_bus_add_device(struct pci_dev *dev)
 		ppc_md.pcibios_bus_add_device(dev);
 }
 
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_device_add(struct pci_dev *dev)
 {
 	struct irq_domain *d;
 
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index 28aac933a439..486c2937b159 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -54,7 +54,7 @@
  * to "new_size", calculated above. Implementing this is a convoluted process
  * which requires several hooks in the PCI core:
  *
- * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov().
+ * 1. In pcibios_device_add() we call pnv_pci_ioda_fixup_iov().
  *
  *    At this point the device has been probed and the device's BARs are sized,
  *    but no resource allocations have been done. The SR-IOV BARs are sized
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index e7e6788d75a8..ded3321b7208 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -561,7 +561,7 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 	zdev->has_resources = 0;
 }
 
-int pcibios_add_device(struct pci_dev *pdev)
+int pcibios_device_add(struct pci_dev *pdev)
 {
 	struct zpci_dev *zdev = to_zpci(pdev);
 	struct resource *res;
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9c2b720bfd20..31b0c1983286 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -1010,7 +1010,7 @@ void pcibios_set_master(struct pci_dev *dev)
 }
 
 #ifdef CONFIG_PCI_IOV
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_device_add(struct pci_dev *dev)
 {
 	struct pci_dev *pdev;
 
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3507f456fcd0..9e1e6b8d8876 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -632,7 +632,7 @@ static void set_dev_domain_options(struct pci_dev *pdev)
 		pdev->hotplug_user_indicators = 1;
 }
 
-int pcibios_add_device(struct pci_dev *dev)
+int pcibios_device_add(struct pci_dev *dev)
 {
 	struct pci_setup_rom *rom;
 	struct irq_domain *msidom;
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..c63598c1cdd8 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2091,14 +2091,14 @@ void pcim_pin_device(struct pci_dev *pdev)
 EXPORT_SYMBOL(pcim_pin_device);
 
 /*
- * pcibios_add_device - provide arch specific hooks when adding device dev
+ * pcibios_device_add - provide arch specific hooks when adding device dev
  * @dev: the PCI device being added
  *
  * Permits the platform to provide architecture specific functionality when
  * devices are added. This is the default implementation. Architecture
  * implementations can override this.
  */
-int __weak pcibios_add_device(struct pci_dev *dev)
+int __weak pcibios_device_add(struct pci_dev *dev)
 {
 	return 0;
 }
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index d9fc02a71baa..2ba43b6adf31 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2450,7 +2450,7 @@ static struct irq_domain *pci_dev_msi_domain(struct pci_dev *dev)
 	struct irq_domain *d;
 
 	/*
-	 * If a domain has been set through the pcibios_add_device()
+	 * If a domain has been set through the pcibios_device_add()
 	 * callback, then this is the one (platform code knows best).
 	 */
 	d = dev_get_msi_domain(&dev->dev);
@@ -2518,7 +2518,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	list_add_tail(&dev->bus_list, &bus->devices);
 	up_write(&pci_bus_sem);
 
-	ret = pcibios_add_device(dev);
+	ret = pcibios_device_add(dev);
 	WARN_ON(ret < 0);
 
 	/* Set up MSI IRQ domain */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..7e0ce3a4d5a1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2126,7 +2126,7 @@ void pcibios_disable_device(struct pci_dev *dev);
 void pcibios_set_master(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
 				 enum pcie_reset_state state);
-int pcibios_add_device(struct pci_dev *dev);
+int pcibios_device_add(struct pci_dev *dev);
 void pcibios_release_device(struct pci_dev *dev);
 #ifdef CONFIG_PCI
 void pcibios_penalize_isa_irq(int irq, int active);
-- 
cgit v1.2.3


From d1c6e08e7503649e4a4f3f9e700e2c05300b6379 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 8 Sep 2021 22:11:37 -0700
Subject: libnvdimm/labels: Add uuid helpers

In preparation for CXL labels that move the uuid to a different offset
in the label, add nsl_{ref,get,validate}_uuid(). These helpers use the
proper uuid_t type. That type definition predated the libnvdimm
subsystem, so now is as a good a time as any to convert all the uuid
handling in the subsystem to uuid_t to match the helpers.

Note that the uuid fields in the label data and superblocks is not
replaced per Andy's expectation that uuid_t is a kernel internal type
not to appear in external ABI interfaces. So, in those case
{import,export}_uuid() is used to go between the 2 types.

Also note that this rework uncovered some unnecessary copies for label
comparisons, those are cleaned up with nsl_uuid_equal().

As for the whitespace changes, all new code is clang-format compliant.

Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/163116429748.2460985.15659993454313919977.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt.c            | 11 ++---
 drivers/nvdimm/btt_devs.c       | 14 ++++---
 drivers/nvdimm/core.c           | 40 ++----------------
 drivers/nvdimm/label.c          | 34 +++++++---------
 drivers/nvdimm/namespace_devs.c | 90 +++++++++++++++++++++++------------------
 drivers/nvdimm/nd-core.h        |  5 ++-
 drivers/nvdimm/nd.h             | 40 ++++++++++++++++--
 drivers/nvdimm/pfn_devs.c       |  2 +-
 include/linux/nd.h              |  4 +-
 9 files changed, 124 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 92dec4952297..52de60b7adee 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -973,7 +973,7 @@ static int btt_arena_write_layout(struct arena_info *arena)
 	u64 sum;
 	struct btt_sb *super;
 	struct nd_btt *nd_btt = arena->nd_btt;
-	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+	const uuid_t *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
 
 	ret = btt_map_init(arena);
 	if (ret)
@@ -988,8 +988,8 @@ static int btt_arena_write_layout(struct arena_info *arena)
 		return -ENOMEM;
 
 	strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
-	memcpy(super->uuid, nd_btt->uuid, 16);
-	memcpy(super->parent_uuid, parent_uuid, 16);
+	export_uuid(super->uuid, nd_btt->uuid);
+	export_uuid(super->parent_uuid, parent_uuid);
 	super->flags = cpu_to_le32(arena->flags);
 	super->version_major = cpu_to_le16(arena->version_major);
 	super->version_minor = cpu_to_le16(arena->version_minor);
@@ -1575,7 +1575,8 @@ static void btt_blk_cleanup(struct btt *btt)
  * Pointer to a new struct btt on success, NULL on failure.
  */
 static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
-		u32 lbasize, u8 *uuid, struct nd_region *nd_region)
+			    u32 lbasize, uuid_t *uuid,
+			    struct nd_region *nd_region)
 {
 	int ret;
 	struct btt *btt;
@@ -1694,7 +1695,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
 	}
 	nd_region = to_nd_region(nd_btt->dev.parent);
 	btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
-			nd_region);
+		       nd_region);
 	if (!btt)
 		return -ENOMEM;
 	nd_btt->btt = btt;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 05feb97e11ce..8b52e5144f08 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -180,8 +180,8 @@ bool is_nd_btt(struct device *dev)
 EXPORT_SYMBOL(is_nd_btt);
 
 static struct device *__nd_btt_create(struct nd_region *nd_region,
-		unsigned long lbasize, u8 *uuid,
-		struct nd_namespace_common *ndns)
+				      unsigned long lbasize, uuid_t *uuid,
+				      struct nd_namespace_common *ndns)
 {
 	struct nd_btt *nd_btt;
 	struct device *dev;
@@ -244,14 +244,16 @@ struct device *nd_btt_create(struct nd_region *nd_region)
  */
 bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
 {
-	const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+	const uuid_t *ns_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+	uuid_t parent_uuid;
 	u64 checksum;
 
 	if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
 		return false;
 
-	if (!guid_is_null((guid_t *)&super->parent_uuid))
-		if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+	import_uuid(&parent_uuid, super->parent_uuid);
+	if (!uuid_is_null(&parent_uuid))
+		if (!uuid_equal(&parent_uuid, ns_uuid))
 			return false;
 
 	checksum = le64_to_cpu(super->checksum);
@@ -319,7 +321,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
 		return rc;
 
 	nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
-	nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+	nd_btt->uuid = kmemdup(&btt_sb->uuid, sizeof(uuid_t), GFP_KERNEL);
 	if (!nd_btt->uuid)
 		return -ENOMEM;
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7de592d7eff4..690152d62bf0 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -206,38 +206,6 @@ struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
 }
 EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
 
-static bool is_uuid_sep(char sep)
-{
-	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
-		return true;
-	return false;
-}
-
-static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
-		size_t len)
-{
-	const char *str = buf;
-	u8 uuid[16];
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
-			dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
-					i, str - buf, str[0],
-					str + 1 - buf, str[1]);
-			return -EINVAL;
-		}
-
-		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
-		str += 2;
-		if (is_uuid_sep(*str))
-			str++;
-	}
-
-	memcpy(uuid_out, uuid, sizeof(uuid));
-	return 0;
-}
-
 /**
  * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
  * @dev: container device for the uuid property
@@ -248,21 +216,21 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
  * (driver detached)
  * LOCKING: expects nd_device_lock() is held on entry
  */
-int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf,
 		size_t len)
 {
-	u8 uuid[16];
+	uuid_t uuid;
 	int rc;
 
 	if (dev->driver)
 		return -EBUSY;
 
-	rc = nd_uuid_parse(dev, uuid, buf, len);
+	rc = uuid_parse(buf, &uuid);
 	if (rc)
 		return rc;
 
 	kfree(*uuid_out);
-	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+	*uuid_out = kmemdup(&uuid, sizeof(uuid), GFP_KERNEL);
 	if (!(*uuid_out))
 		return -ENOMEM;
 
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 7f473f9db300..e7fdb718ebf0 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -321,7 +321,8 @@ static bool preamble_index(struct nvdimm_drvdata *ndd, int idx,
 	return true;
 }
 
-char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
+		      u32 flags)
 {
 	if (!label_id || !uuid)
 		return NULL;
@@ -400,9 +401,9 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 		struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
 		struct nd_namespace_label *nd_label;
 		struct nd_region *nd_region = NULL;
-		u8 label_uuid[NSLABEL_UUID_LEN];
 		struct nd_label_id label_id;
 		struct resource *res;
+		uuid_t label_uuid;
 		u32 flags;
 
 		nd_label = to_label(ndd, slot);
@@ -410,11 +411,11 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 		if (!slot_valid(ndd, nd_label, slot))
 			continue;
 
-		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		nsl_get_uuid(ndd, nd_label, &label_uuid);
 		flags = nsl_get_flags(ndd, nd_label);
 		if (test_bit(NDD_NOBLK, &nvdimm->flags))
 			flags &= ~NSLABEL_FLAG_LOCAL;
-		nd_label_gen_id(&label_id, label_uuid, flags);
+		nd_label_gen_id(&label_id, &label_uuid, flags);
 		res = nvdimm_allocate_dpa(ndd, &label_id,
 					  nsl_get_dpa(ndd, nd_label),
 					  nsl_get_rawsize(ndd, nd_label));
@@ -851,7 +852,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 
 	nd_label = to_label(ndd, slot);
 	memset(nd_label, 0, sizeof_namespace_label(ndd));
-	memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
+	nsl_set_uuid(ndd, nd_label, nspm->uuid);
 	nsl_set_name(ndd, nd_label, nspm->alt_name);
 	nsl_set_flags(ndd, nd_label, flags);
 	nsl_set_nlabel(ndd, nd_label, nd_region->ndr_mappings);
@@ -878,9 +879,8 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	list_for_each_entry(label_ent, &nd_mapping->labels, list) {
 		if (!label_ent->label)
 			continue;
-		if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
-				|| memcmp(nspm->uuid, label_ent->label->uuid,
-					NSLABEL_UUID_LEN) == 0)
+		if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags) ||
+		    nsl_uuid_equal(ndd, label_ent->label, nspm->uuid))
 			reap_victim(nd_mapping, label_ent);
 	}
 
@@ -1005,7 +1005,6 @@ static int __blk_label_update(struct nd_region *nd_region,
 	unsigned long *free, *victim_map = NULL;
 	struct resource *res, **old_res_list;
 	struct nd_label_id label_id;
-	u8 uuid[NSLABEL_UUID_LEN];
 	int min_dpa_idx = 0;
 	LIST_HEAD(list);
 	u32 nslot, slot;
@@ -1043,8 +1042,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		/* mark unused labels for garbage collection */
 		for_each_clear_bit_le(slot, free, nslot) {
 			nd_label = to_label(ndd, slot);
-			memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-			if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+			if (!nsl_uuid_equal(ndd, nd_label, nsblk->uuid))
 				continue;
 			res = to_resource(ndd, nd_label);
 			if (res && is_old_resource(res, old_res_list,
@@ -1113,7 +1111,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 
 		nd_label = to_label(ndd, slot);
 		memset(nd_label, 0, sizeof_namespace_label(ndd));
-		memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
+		nsl_set_uuid(ndd, nd_label, nsblk->uuid);
 		nsl_set_name(ndd, nd_label, nsblk->alt_name);
 		nsl_set_flags(ndd, nd_label, NSLABEL_FLAG_LOCAL);
 
@@ -1161,8 +1159,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		if (!nd_label)
 			continue;
 		nlabel++;
-		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+		if (!nsl_uuid_equal(ndd, nd_label, nsblk->uuid))
 			continue;
 		nlabel--;
 		list_move(&label_ent->list, &list);
@@ -1192,8 +1189,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 	}
 	for_each_clear_bit_le(slot, free, nslot) {
 		nd_label = to_label(ndd, slot);
-		memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-		if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
+		if (!nsl_uuid_equal(ndd, nd_label, nsblk->uuid))
 			continue;
 		res = to_resource(ndd, nd_label);
 		res->flags &= ~DPA_RESOURCE_ADJUSTED;
@@ -1273,12 +1269,11 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
 	return max(num_labels, old_num_labels);
 }
 
-static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
+static int del_labels(struct nd_mapping *nd_mapping, uuid_t *uuid)
 {
 	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
 	struct nd_label_ent *label_ent, *e;
 	struct nd_namespace_index *nsindex;
-	u8 label_uuid[NSLABEL_UUID_LEN];
 	unsigned long *free;
 	LIST_HEAD(list);
 	u32 nslot, slot;
@@ -1298,8 +1293,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 		if (!nd_label)
 			continue;
 		active++;
-		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-		if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
+		if (!nsl_uuid_equal(ndd, nd_label, uuid))
 			continue;
 		active--;
 		slot = to_slot(ndd, nd_label);
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4cec171c934d..1415d543c3e3 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -51,7 +51,7 @@ static bool is_namespace_io(const struct device *dev);
 
 static int is_uuid_busy(struct device *dev, void *data)
 {
-	u8 *uuid1 = data, *uuid2 = NULL;
+	uuid_t *uuid1 = data, *uuid2 = NULL;
 
 	if (is_namespace_pmem(dev)) {
 		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
@@ -71,7 +71,7 @@ static int is_uuid_busy(struct device *dev, void *data)
 		uuid2 = nd_pfn->uuid;
 	}
 
-	if (uuid2 && memcmp(uuid1, uuid2, NSLABEL_UUID_LEN) == 0)
+	if (uuid2 && uuid_equal(uuid1, uuid2))
 		return -EBUSY;
 
 	return 0;
@@ -89,7 +89,7 @@ static int is_namespace_uuid_busy(struct device *dev, void *data)
  * @dev: any device on a nvdimm_bus
  * @uuid: uuid to check
  */
-bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+bool nd_is_uuid_unique(struct device *dev, uuid_t *uuid)
 {
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 
@@ -192,12 +192,10 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 }
 EXPORT_SYMBOL(nvdimm_namespace_disk_name);
 
-const u8 *nd_dev_to_uuid(struct device *dev)
+const uuid_t *nd_dev_to_uuid(struct device *dev)
 {
-	static const u8 null_uuid[16];
-
 	if (!dev)
-		return null_uuid;
+		return &uuid_null;
 
 	if (is_namespace_pmem(dev)) {
 		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
@@ -208,7 +206,7 @@ const u8 *nd_dev_to_uuid(struct device *dev)
 
 		return nsblk->uuid;
 	} else
-		return null_uuid;
+		return &uuid_null;
 }
 EXPORT_SYMBOL(nd_dev_to_uuid);
 
@@ -938,7 +936,8 @@ static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
 	res->end = res->start + size - 1;
 }
 
-static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
+static bool uuid_not_set(const uuid_t *uuid, struct device *dev,
+			 const char *where)
 {
 	if (!uuid) {
 		dev_dbg(dev, "%s: uuid not set\n", where);
@@ -957,7 +956,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	struct nd_label_id label_id;
 	u32 flags = 0, remainder;
 	int rc, i, id = -1;
-	u8 *uuid = NULL;
+	uuid_t *uuid = NULL;
 
 	if (dev->driver || ndns->claim)
 		return -EBUSY;
@@ -1050,7 +1049,7 @@ static ssize_t size_store(struct device *dev,
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	unsigned long long val;
-	u8 **uuid = NULL;
+	uuid_t **uuid = NULL;
 	int rc;
 
 	rc = kstrtoull(buf, 0, &val);
@@ -1147,7 +1146,7 @@ static ssize_t size_show(struct device *dev,
 }
 static DEVICE_ATTR(size, 0444, size_show, size_store);
 
-static u8 *namespace_to_uuid(struct device *dev)
+static uuid_t *namespace_to_uuid(struct device *dev)
 {
 	if (is_namespace_pmem(dev)) {
 		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
@@ -1161,10 +1160,10 @@ static u8 *namespace_to_uuid(struct device *dev)
 		return ERR_PTR(-ENXIO);
 }
 
-static ssize_t uuid_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
 {
-	u8 *uuid = namespace_to_uuid(dev);
+	uuid_t *uuid = namespace_to_uuid(dev);
 
 	if (IS_ERR(uuid))
 		return PTR_ERR(uuid);
@@ -1181,7 +1180,8 @@ static ssize_t uuid_show(struct device *dev,
  * @old_uuid: reference to the uuid storage location in the namespace object
  */
 static int namespace_update_uuid(struct nd_region *nd_region,
-		struct device *dev, u8 *new_uuid, u8 **old_uuid)
+				 struct device *dev, uuid_t *new_uuid,
+				 uuid_t **old_uuid)
 {
 	u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
 	struct nd_label_id old_label_id;
@@ -1231,10 +1231,12 @@ static int namespace_update_uuid(struct nd_region *nd_region,
 		list_for_each_entry(label_ent, &nd_mapping->labels, list) {
 			struct nd_namespace_label *nd_label = label_ent->label;
 			struct nd_label_id label_id;
+			uuid_t uuid;
 
 			if (!nd_label)
 				continue;
-			nd_label_gen_id(&label_id, nd_label->uuid,
+			nsl_get_uuid(ndd, nd_label, &uuid);
+			nd_label_gen_id(&label_id, &uuid,
 					nsl_get_flags(ndd, nd_label));
 			if (strcmp(old_label_id.id, label_id.id) == 0)
 				set_bit(ND_LABEL_REAP, &label_ent->flags);
@@ -1251,9 +1253,9 @@ static ssize_t uuid_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
-	u8 *uuid = NULL;
+	uuid_t *uuid = NULL;
+	uuid_t **ns_uuid;
 	ssize_t rc = 0;
-	u8 **ns_uuid;
 
 	if (is_namespace_pmem(dev)) {
 		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
@@ -1378,8 +1380,8 @@ static ssize_t dpa_extents_show(struct device *dev,
 {
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_label_id label_id;
+	uuid_t *uuid = NULL;
 	int count = 0, i;
-	u8 *uuid = NULL;
 	u32 flags = 0;
 
 	nvdimm_bus_lock(dev);
@@ -1831,8 +1833,8 @@ static struct device **create_namespace_io(struct nd_region *nd_region)
 	return devs;
 }
 
-static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
-		u64 cookie, u16 pos)
+static bool has_uuid_at_pos(struct nd_region *nd_region, const uuid_t *uuid,
+			    u64 cookie, u16 pos)
 {
 	struct nd_namespace_label *found = NULL;
 	int i;
@@ -1856,7 +1858,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 			if (!nsl_validate_isetcookie(ndd, nd_label, cookie))
 				continue;
 
-			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+			if (!nsl_uuid_equal(ndd, nd_label, uuid))
 				continue;
 
 			if (!nsl_validate_type_guid(ndd, nd_label,
@@ -1881,7 +1883,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 	return found != NULL;
 }
 
-static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+static int select_pmem_id(struct nd_region *nd_region, const uuid_t *pmem_id)
 {
 	int i;
 
@@ -1900,7 +1902,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 			nd_label = label_ent->label;
 			if (!nd_label)
 				continue;
-			if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+			if (nsl_uuid_equal(ndd, nd_label, pmem_id))
 				break;
 			nd_label = NULL;
 		}
@@ -1923,7 +1925,8 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
 			/* pass */;
 		else {
 			dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
-					dev_name(ndd->dev), nd_label->uuid);
+				dev_name(ndd->dev),
+				nsl_uuid_raw(ndd, nd_label));
 			return -EINVAL;
 		}
 
@@ -1953,6 +1956,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	resource_size_t size = 0;
 	struct resource *res;
 	struct device *dev;
+	uuid_t uuid;
 	int rc = 0;
 	u16 i;
 
@@ -1963,12 +1967,12 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 
 	if (!nsl_validate_isetcookie(ndd, nd_label, cookie)) {
 		dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
-				nd_label->uuid);
+			nsl_uuid_raw(ndd, nd_label));
 		if (!nsl_validate_isetcookie(ndd, nd_label, altcookie))
 			return ERR_PTR(-EAGAIN);
 
 		dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
-				nd_label->uuid);
+			nsl_uuid_raw(ndd, nd_label));
 	}
 
 	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
@@ -1984,9 +1988,12 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	res->flags = IORESOURCE_MEM;
 
 	for (i = 0; i < nd_region->ndr_mappings; i++) {
-		if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+		uuid_t uuid;
+
+		nsl_get_uuid(ndd, nd_label, &uuid);
+		if (has_uuid_at_pos(nd_region, &uuid, cookie, i))
 			continue;
-		if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
+		if (has_uuid_at_pos(nd_region, &uuid, altcookie, i))
 			continue;
 		break;
 	}
@@ -2000,7 +2007,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		 * find a dimm with two instances of the same uuid.
 		 */
 		dev_err(&nd_region->dev, "%s missing label for %pUb\n",
-				nvdimm_name(nvdimm), nd_label->uuid);
+			nvdimm_name(nvdimm), nsl_uuid_raw(ndd, nd_label));
 		rc = -EINVAL;
 		goto err;
 	}
@@ -2013,7 +2020,8 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
 	 * succeeded).
 	 */
-	rc = select_pmem_id(nd_region, nd_label->uuid);
+	nsl_get_uuid(ndd, nd_label, &uuid);
+	rc = select_pmem_id(nd_region, &uuid);
 	if (rc)
 		goto err;
 
@@ -2039,8 +2047,8 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		WARN_ON(nspm->alt_name || nspm->uuid);
 		nspm->alt_name = kmemdup(nsl_ref_name(ndd, label0),
 					 NSLABEL_NAME_LEN, GFP_KERNEL);
-		nspm->uuid = kmemdup((void __force *) label0->uuid,
-				NSLABEL_UUID_LEN, GFP_KERNEL);
+		nsl_get_uuid(ndd, label0, &uuid);
+		nspm->uuid = kmemdup(&uuid, sizeof(uuid_t), GFP_KERNEL);
 		nspm->lbasize = nsl_get_lbasize(ndd, label0);
 		nspm->nsio.common.claim_class =
 			nsl_get_claim_class(ndd, label0);
@@ -2217,15 +2225,15 @@ static int add_namespace_resource(struct nd_region *nd_region,
 	int i;
 
 	for (i = 0; i < count; i++) {
-		u8 *uuid = namespace_to_uuid(devs[i]);
+		uuid_t *uuid = namespace_to_uuid(devs[i]);
 		struct resource *res;
 
-		if (IS_ERR_OR_NULL(uuid)) {
+		if (IS_ERR(uuid)) {
 			WARN_ON(1);
 			continue;
 		}
 
-		if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0)
+		if (!nsl_uuid_equal(ndd, nd_label, uuid))
 			continue;
 		if (is_namespace_blk(devs[i])) {
 			res = nsblk_add_resource(nd_region, ndd,
@@ -2236,8 +2244,8 @@ static int add_namespace_resource(struct nd_region *nd_region,
 			nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
 		} else {
 			dev_err(&nd_region->dev,
-					"error: conflicting extents for uuid: %pUb\n",
-					nd_label->uuid);
+				"error: conflicting extents for uuid: %pUb\n",
+				uuid);
 			return -ENXIO;
 		}
 		break;
@@ -2257,6 +2265,7 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
 	char name[NSLABEL_NAME_LEN];
 	struct device *dev = NULL;
 	struct resource *res;
+	uuid_t uuid;
 
 	if (!nsl_validate_type_guid(ndd, nd_label, &nd_set->type_guid))
 		return ERR_PTR(-EAGAIN);
@@ -2271,7 +2280,8 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
 	dev->parent = &nd_region->dev;
 	nsblk->id = -1;
 	nsblk->lbasize = nsl_get_lbasize(ndd, nd_label);
-	nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, GFP_KERNEL);
+	nsl_get_uuid(ndd, nd_label, &uuid);
+	nsblk->uuid = kmemdup(&uuid, sizeof(uuid_t), GFP_KERNEL);
 	nsblk->common.claim_class = nsl_get_claim_class(ndd, nd_label);
 	if (!nsblk->uuid)
 		goto blk_err;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 564faa36a3ca..a11850dd475d 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -126,8 +126,9 @@ void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nd_synchronize(void);
 void __nd_device_register(struct device *dev);
 struct nd_label_id;
-char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
-bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
+		      u32 flags);
+bool nd_is_uuid_unique(struct device *dev, uuid_t *uuid);
 struct nd_region;
 struct nvdimm_drvdata;
 struct nd_mapping;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 5467ebbb4a6b..ec3c9aad7f50 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -177,6 +177,38 @@ static inline void nsl_set_lbasize(struct nvdimm_drvdata *ndd,
 	nd_label->lbasize = __cpu_to_le64(lbasize);
 }
 
+static inline const uuid_t *nsl_get_uuid(struct nvdimm_drvdata *ndd,
+					 struct nd_namespace_label *nd_label,
+					 uuid_t *uuid)
+{
+	import_uuid(uuid, nd_label->uuid);
+	return uuid;
+}
+
+static inline const uuid_t *nsl_set_uuid(struct nvdimm_drvdata *ndd,
+					 struct nd_namespace_label *nd_label,
+					 const uuid_t *uuid)
+{
+	export_uuid(nd_label->uuid, uuid);
+	return uuid;
+}
+
+static inline bool nsl_uuid_equal(struct nvdimm_drvdata *ndd,
+				  struct nd_namespace_label *nd_label,
+				  const uuid_t *uuid)
+{
+	uuid_t tmp;
+
+	import_uuid(&tmp, nd_label->uuid);
+	return uuid_equal(&tmp, uuid);
+}
+
+static inline const u8 *nsl_uuid_raw(struct nvdimm_drvdata *ndd,
+				     struct nd_namespace_label *nd_label)
+{
+	return nd_label->uuid;
+}
+
 bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
 				 struct nd_namespace_label *nd_label,
 				 u64 isetcookie);
@@ -335,7 +367,7 @@ struct nd_btt {
 	struct btt *btt;
 	unsigned long lbasize;
 	u64 size;
-	u8 *uuid;
+	uuid_t *uuid;
 	int id;
 	int initial_offset;
 	u16 version_major;
@@ -350,7 +382,7 @@ enum nd_pfn_mode {
 
 struct nd_pfn {
 	int id;
-	u8 *uuid;
+	uuid_t *uuid;
 	struct device dev;
 	unsigned long align;
 	unsigned long npfns;
@@ -378,7 +410,7 @@ void wait_nvdimm_bus_probe_idle(struct device *dev);
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
 void nd_device_notify(struct device *dev, enum nvdimm_event event);
-int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf,
 		size_t len);
 ssize_t nd_size_select_show(unsigned long current_size,
 		const unsigned long *supported, char *buf);
@@ -561,6 +593,6 @@ static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
 	return false;
 }
 resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
-const u8 *nd_dev_to_uuid(struct device *dev);
+const uuid_t *nd_dev_to_uuid(struct device *dev);
 bool pmem_should_map_pages(struct device *dev);
 #endif /* __ND_H__ */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index b499df630d4d..58eda16f5c53 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -452,7 +452,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
 	unsigned long align, start_pad;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	struct nd_namespace_common *ndns = nd_pfn->ndns;
-	const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
+	const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev);
 
 	if (!pfn_sb || !ndns)
 		return -ENODEV;
diff --git a/include/linux/nd.h b/include/linux/nd.h
index ee9ad76afbba..8a8c63edb1b2 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -88,7 +88,7 @@ struct nd_namespace_pmem {
 	struct nd_namespace_io nsio;
 	unsigned long lbasize;
 	char *alt_name;
-	u8 *uuid;
+	uuid_t *uuid;
 	int id;
 };
 
@@ -105,7 +105,7 @@ struct nd_namespace_pmem {
 struct nd_namespace_blk {
 	struct nd_namespace_common common;
 	char *alt_name;
-	u8 *uuid;
+	uuid_t *uuid;
 	int id;
 	unsigned long lbasize;
 	resource_size_t size;
-- 
cgit v1.2.3


From 8c75d585b931ac874fbe4ee5a8f1811d20c2817f Mon Sep 17 00:00:00 2001
From: Deepak Kumar Singh <deesin@codeaurora.org>
Date: Tue, 31 Aug 2021 20:00:27 +0530
Subject: soc: qcom: aoss: Expose send for generic usecase

Not all upcoming usecases will have an interface to allow the aoss
driver to hook onto. Expose the send api and create a get function to
enable drivers to send their own messages to aoss.

Signed-off-by: Chris Lew <clew@codeaurora.org>
Signed-off-by: Deepak Kumar Singh <deesin@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/1630420228-31075-2-git-send-email-deesin@codeaurora.org
---
 drivers/soc/qcom/qcom_aoss.c       | 54 +++++++++++++++++++++++++++++++++++++-
 include/linux/soc/qcom/qcom_aoss.h | 38 +++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/soc/qcom/qcom_aoss.h

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c
index 536c3e4114fb..bb336cc8df6f 100644
--- a/drivers/soc/qcom/qcom_aoss.c
+++ b/drivers/soc/qcom/qcom_aoss.c
@@ -8,10 +8,12 @@
 #include <linux/io.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/thermal.h>
 #include <linux/slab.h>
+#include <linux/soc/qcom/qcom_aoss.h>
 
 #define QMP_DESC_MAGIC			0x0
 #define QMP_DESC_VERSION		0x4
@@ -223,11 +225,14 @@ static bool qmp_message_empty(struct qmp *qmp)
  *
  * Return: 0 on success, negative errno on failure
  */
-static int qmp_send(struct qmp *qmp, const void *data, size_t len)
+int qmp_send(struct qmp *qmp, const void *data, size_t len)
 {
 	long time_left;
 	int ret;
 
+	if (WARN_ON(IS_ERR_OR_NULL(qmp) || !data))
+		return -EINVAL;
+
 	if (WARN_ON(len + sizeof(u32) > qmp->size))
 		return -EINVAL;
 
@@ -261,6 +266,7 @@ static int qmp_send(struct qmp *qmp, const void *data, size_t len)
 
 	return ret;
 }
+EXPORT_SYMBOL(qmp_send);
 
 static int qmp_qdss_clk_prepare(struct clk_hw *hw)
 {
@@ -519,6 +525,51 @@ static void qmp_cooling_devices_remove(struct qmp *qmp)
 		thermal_cooling_device_unregister(qmp->cooling_devs[i].cdev);
 }
 
+/**
+ * qmp_get() - get a qmp handle from a device
+ * @dev: client device pointer
+ *
+ * Return: handle to qmp device on success, ERR_PTR() on failure
+ */
+struct qmp *qmp_get(struct device *dev)
+{
+	struct platform_device *pdev;
+	struct device_node *np;
+	struct qmp *qmp;
+
+	if (!dev || !dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	np = of_parse_phandle(dev->of_node, "qcom,qmp", 0);
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+	if (!pdev)
+		return ERR_PTR(-EINVAL);
+
+	qmp = platform_get_drvdata(pdev);
+
+	return qmp ? qmp : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL(qmp_get);
+
+/**
+ * qmp_put() - release a qmp handle
+ * @qmp: qmp handle obtained from qmp_get()
+ */
+void qmp_put(struct qmp *qmp)
+{
+	/*
+	 * Match get_device() inside of_find_device_by_node() in
+	 * qmp_get()
+	 */
+	if (!IS_ERR_OR_NULL(qmp))
+		put_device(qmp->dev);
+}
+EXPORT_SYMBOL(qmp_put);
+
 static int qmp_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -615,6 +666,7 @@ static struct platform_driver qmp_driver = {
 	.driver = {
 		.name		= "qcom_aoss_qmp",
 		.of_match_table	= qmp_dt_match,
+		.suppress_bind_attrs = true,
 	},
 	.probe = qmp_probe,
 	.remove	= qmp_remove,
diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h
new file mode 100644
index 000000000000..3c2a82e606f8
--- /dev/null
+++ b/include/linux/soc/qcom/qcom_aoss.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __QCOM_AOSS_H__
+#define __QCOM_AOSS_H__
+
+#include <linux/err.h>
+#include <linux/device.h>
+
+struct qmp;
+
+#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP)
+
+int qmp_send(struct qmp *qmp, const void *data, size_t len);
+struct qmp *qmp_get(struct device *dev);
+void qmp_put(struct qmp *qmp);
+
+#else
+
+static inline int qmp_send(struct qmp *qmp, const void *data, size_t len)
+{
+	return -ENODEV;
+}
+
+static inline struct qmp *qmp_get(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void qmp_put(struct qmp *qmp)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 20c36ce2164f1774b487d443ece99b754bc6ad43 Mon Sep 17 00:00:00 2001
From: Bixuan Cui <cuibixuan@huawei.com>
Date: Thu, 16 Sep 2021 10:52:03 +0800
Subject: irqdomain: Change the type of 'size' in __irq_domain_add() to be
 consistent

The 'size' is used in struct_size(domain, revmap, size) and its input
parameter type is 'size_t'(unsigned int).
Changing the size to 'unsigned int' to make the type consistent.

Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com
---
 include/linux/irqdomain.h | 2 +-
 kernel/irq/irqdomain.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 23e4ee523576..9ee238ad29ce 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -251,7 +251,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
 }
 
 void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
 				    void *host_data);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 62be16135e7c..bfa289ed57ab 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -136,7 +136,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
  * Allocates and initializes an irq_domain structure.
  * Returns pointer to IRQ domain, or NULL on failure.
  */
-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
+struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
 				    const struct irq_domain_ops *ops,
 				    void *host_data)
-- 
cgit v1.2.3


From a68de80f61f6af397bc06fb391ff2e571c9c4d80 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 1 Sep 2021 13:30:27 -0700
Subject: entry: rseq: Call rseq_handle_notify_resume() in
 tracehook_notify_resume()

Invoke rseq_handle_notify_resume() from tracehook_notify_resume() now
that the two function are always called back-to-back by architectures
that have rseq.  The rseq helper is stubbed out for architectures that
don't support rseq, i.e. this is a nop across the board.

Note, tracehook_notify_resume() is horribly named and arguably does not
belong in tracehook.h as literally every line of code in it has nothing
to do with tracing.  But, that's been true since commit a42c6ded827d
("move key_repace_session_keyring() into tracehook_notify_resume()")
first usurped tracehook_notify_resume() back in 2012.  Punt cleaning that
mess up to future patches.

No functional change intended.

Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20210901203030.1292304-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kernel/signal.c     | 1 -
 arch/arm64/kernel/signal.c   | 4 +---
 arch/csky/kernel/signal.c    | 4 +---
 arch/mips/kernel/signal.c    | 4 +---
 arch/powerpc/kernel/signal.c | 4 +---
 include/linux/tracehook.h    | 2 ++
 kernel/entry/common.c        | 4 +---
 kernel/entry/kvm.c           | 4 +---
 8 files changed, 8 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index d0a800be0486..a41e27ace391 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -628,7 +628,6 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 				uprobe_notify_resume(regs);
 			} else {
 				tracehook_notify_resume(regs);
-				rseq_handle_notify_resume(NULL, regs);
 			}
 		}
 		local_irq_disable();
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index 9fe70b12b34f..c287b9407f28 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -940,10 +940,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags)
 			if (thread_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
 				do_signal(regs);
 
-			if (thread_flags & _TIF_NOTIFY_RESUME) {
+			if (thread_flags & _TIF_NOTIFY_RESUME)
 				tracehook_notify_resume(regs);
-				rseq_handle_notify_resume(NULL, regs);
-			}
 
 			if (thread_flags & _TIF_FOREIGN_FPSTATE)
 				fpsimd_restore_current_state();
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 312f046d452d..bc4238b9f709 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -260,8 +260,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
 		do_signal(regs);
 
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+	if (thread_info_flags & _TIF_NOTIFY_RESUME)
 		tracehook_notify_resume(regs);
-		rseq_handle_notify_resume(NULL, regs);
-	}
 }
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index f1e985109da0..c9b2a75563e1 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -906,10 +906,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
 		do_signal(regs);
 
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+	if (thread_info_flags & _TIF_NOTIFY_RESUME)
 		tracehook_notify_resume(regs);
-		rseq_handle_notify_resume(NULL, regs);
-	}
 
 	user_enter();
 }
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index e600764a926c..b93b87df499d 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -293,10 +293,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 		do_signal(current);
 	}
 
-	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+	if (thread_info_flags & _TIF_NOTIFY_RESUME)
 		tracehook_notify_resume(regs);
-		rseq_handle_notify_resume(NULL, regs);
-	}
 }
 
 static unsigned long get_tm_stackpointer(struct task_struct *tsk)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 3e80c4bc66f7..2564b7434b4d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -197,6 +197,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 
 	mem_cgroup_handle_over_high();
 	blkcg_maybe_throttle_current();
+
+	rseq_handle_notify_resume(NULL, regs);
 }
 
 /*
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index bf16395b9e13..d5a61d565ad5 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -171,10 +171,8 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
 		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
 			handle_signal_work(regs, ti_work);
 
-		if (ti_work & _TIF_NOTIFY_RESUME) {
+		if (ti_work & _TIF_NOTIFY_RESUME)
 			tracehook_notify_resume(regs);
-			rseq_handle_notify_resume(NULL, regs);
-		}
 
 		/* Architecture specific TIF work */
 		arch_exit_to_user_mode_work(regs, ti_work);
diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c
index 049fd06b4c3d..49972ee99aff 100644
--- a/kernel/entry/kvm.c
+++ b/kernel/entry/kvm.c
@@ -19,10 +19,8 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)
 		if (ti_work & _TIF_NEED_RESCHED)
 			schedule();
 
-		if (ti_work & _TIF_NOTIFY_RESUME) {
+		if (ti_work & _TIF_NOTIFY_RESUME)
 			tracehook_notify_resume(NULL);
-			rseq_handle_notify_resume(NULL, NULL);
-		}
 
 		ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work);
 		if (ret)
-- 
cgit v1.2.3


From 4eeef2424153e79910d65248b5e1abf137d050e9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 10 Sep 2021 11:32:19 -0700
Subject: KVM: x86: Query vcpu->vcpu_idx directly and drop its accessor

Read vcpu->vcpu_idx directly instead of bouncing through the one-line
wrapper, kvm_vcpu_get_idx(), and drop the wrapper.  The wrapper is a
remnant of the original implementation and serves no purpose; remove it
before it gains more users.

Back when kvm_vcpu_get_idx() was added by commit 497d72d80a78 ("KVM: Add
kvm_vcpu_get_idx to get vcpu index in kvm->vcpus"), the implementation
was more than just a simple wrapper as vcpu->vcpu_idx did not exist and
retrieving the index meant walking over the vCPU array to find the given
vCPU.

When vcpu_idx was introduced by commit 8750e72a79dd ("KVM: remember
position in kvm->vcpus array"), the helper was left behind, likely to
avoid extra thrash (but even then there were only two users, the original
arm usage having been removed at some point in the past).

No functional change intended.

Suggested-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Message-Id: <20210910183220.2397812-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/interrupt.c | 4 ++--
 arch/s390/kvm/kvm-s390.c  | 2 +-
 arch/s390/kvm/kvm-s390.h  | 2 +-
 arch/x86/kvm/hyperv.c     | 7 +++----
 arch/x86/kvm/hyperv.h     | 2 +-
 include/linux/kvm_host.h  | 5 -----
 6 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 16256e17a544..10722455fd02 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -419,13 +419,13 @@ static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_set_cpuflags(vcpu, CPUSTAT_WAIT);
-	set_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+	set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_WAIT);
-	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 752a0ffab9bf..6a6dd5e1daf6 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4066,7 +4066,7 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 		kvm_s390_patch_guest_per_regs(vcpu);
 	}
 
-	clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
+	clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask);
 
 	vcpu->arch.sie_block->icptcode = 0;
 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index ecd741ee3276..52bc8fbaa60a 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -79,7 +79,7 @@ static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
 
 static inline int is_vcpu_idle(struct kvm_vcpu *vcpu)
 {
-	return test_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.idle_mask);
+	return test_bit(vcpu->vcpu_idx, vcpu->kvm->arch.idle_mask);
 }
 
 static inline int kvm_is_ucontrol(struct kvm *kvm)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 232a86a6faaf..d5124b520f76 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -939,7 +939,7 @@ static int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 		stimer_init(&hv_vcpu->stimer[i], i);
 
-	hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
+	hv_vcpu->vp_index = vcpu->vcpu_idx;
 
 	return 0;
 }
@@ -1444,7 +1444,6 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	switch (msr) {
 	case HV_X64_MSR_VP_INDEX: {
 		struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
-		int vcpu_idx = kvm_vcpu_get_idx(vcpu);
 		u32 new_vp_index = (u32)data;
 
 		if (!host || new_vp_index >= KVM_MAX_VCPUS)
@@ -1459,9 +1458,9 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 		 * VP index is changing, adjust num_mismatched_vp_indexes if
 		 * it now matches or no longer matches vcpu_idx.
 		 */
-		if (hv_vcpu->vp_index == vcpu_idx)
+		if (hv_vcpu->vp_index == vcpu->vcpu_idx)
 			atomic_inc(&hv->num_mismatched_vp_indexes);
-		else if (new_vp_index == vcpu_idx)
+		else if (new_vp_index == vcpu->vcpu_idx)
 			atomic_dec(&hv->num_mismatched_vp_indexes);
 
 		hv_vcpu->vp_index = new_vp_index;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 730da8537d05..ed1c4e546d04 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -83,7 +83,7 @@ static inline u32 kvm_hv_get_vpindex(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-	return hv_vcpu ? hv_vcpu->vp_index : kvm_vcpu_get_idx(vcpu);
+	return hv_vcpu ? hv_vcpu->vp_index : vcpu->vcpu_idx;
 }
 
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 041ca7f15ea4..000ea73dd324 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -721,11 +721,6 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	return NULL;
 }
 
-static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
-{
-	return vcpu->vcpu_idx;
-}
-
 #define kvm_for_each_memslot(memslot, slots)				\
 	for (memslot = &slots->memslots[0];				\
 	     memslot < slots->memslots + slots->used_slots; memslot++)	\
-- 
cgit v1.2.3


From b468e688240b58ece498c430c377bb81b78a41f1 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 14 Sep 2021 11:11:20 +0200
Subject: tty: remove flags from struct tty_ldisc_ops

The last user was apparently removed by commit a352def21a64 (tty: Ldisc
revamp) in 2008. So remove the field completely, the only setter
(n_tty_inherit_ops) and also its only possible value
(LDISC_FLAG_DEFINED).

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20210914091134.17426-2-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c       | 1 -
 include/linux/tty_ldisc.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 0ec93f1a61f5..797af5d46cbe 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2450,7 +2450,6 @@ void n_tty_inherit_ops(struct tty_ldisc_ops *ops)
 {
 	*ops = n_tty_ops;
 	ops->owner = NULL;
-	ops->flags = 0;
 }
 EXPORT_SYMBOL_GPL(n_tty_inherit_ops);
 
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index b1d812e902aa..1f37184eee63 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -180,7 +180,6 @@ extern int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
 struct tty_ldisc_ops {
 	char	*name;
 	int	num;
-	int	flags;
 
 	/*
 	 * The following routines are called from above.
@@ -220,8 +219,6 @@ struct tty_ldisc {
 	struct tty_struct *tty;
 };
 
-#define LDISC_FLAG_DEFINED	0x00000001
-
 #define MODULE_ALIAS_LDISC(ldisc) \
 	MODULE_ALIAS("tty-ldisc-" __stringify(ldisc))
 
-- 
cgit v1.2.3


From 7894193436b65f304ba790abe2532a7a3bbeb7c8 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 14 Sep 2021 11:11:21 +0200
Subject: tty: remove extern from functions in tty headers

After the recent headers cleanup, some function declarations still have
extern before them. It is superfluous (for function declarations), so
remove extern from those which still have it.

This unifies them with the rest of the files.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20210914091134.17426-3-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h        | 139 ++++++++++++++++++++++-----------------------
 include/linux/tty_driver.h |   8 +--
 include/linux/tty_flip.h   |  20 +++----
 include/linux/tty_ldisc.h  |  22 +++----
 4 files changed, 94 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 168e57e40bbb..15453b0c4081 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -252,20 +252,20 @@ static inline bool tty_throttled(struct tty_struct *tty)
 }
 
 #ifdef CONFIG_TTY
-extern void tty_kref_put(struct tty_struct *tty);
-extern struct pid *tty_get_pgrp(struct tty_struct *tty);
-extern void tty_vhangup_self(void);
-extern void disassociate_ctty(int priv);
-extern dev_t tty_devnum(struct tty_struct *tty);
-extern void proc_clear_tty(struct task_struct *p);
-extern struct tty_struct *get_current_tty(void);
+void tty_kref_put(struct tty_struct *tty);
+struct pid *tty_get_pgrp(struct tty_struct *tty);
+void tty_vhangup_self(void);
+void disassociate_ctty(int priv);
+dev_t tty_devnum(struct tty_struct *tty);
+void proc_clear_tty(struct task_struct *p);
+struct tty_struct *get_current_tty(void);
 /* tty_io.c */
-extern int __init tty_init(void);
-extern const char *tty_name(const struct tty_struct *tty);
-extern struct tty_struct *tty_kopen_exclusive(dev_t device);
-extern struct tty_struct *tty_kopen_shared(dev_t device);
-extern void tty_kclose(struct tty_struct *tty);
-extern int tty_dev_name_to_number(const char *name, dev_t *number);
+int __init tty_init(void);
+const char *tty_name(const struct tty_struct *tty);
+struct tty_struct *tty_kopen_exclusive(dev_t device);
+struct tty_struct *tty_kopen_shared(dev_t device);
+void tty_kclose(struct tty_struct *tty);
+int tty_dev_name_to_number(const char *name, dev_t *number);
 #else
 static inline void tty_kref_put(struct tty_struct *tty)
 { }
@@ -296,7 +296,7 @@ static inline int tty_dev_name_to_number(const char *name, dev_t *number)
 
 extern struct ktermios tty_std_termios;
 
-extern int vcs_init(void);
+int vcs_init(void);
 
 extern struct class *tty_class;
 
@@ -316,34 +316,34 @@ static inline struct tty_struct *tty_kref_get(struct tty_struct *tty)
 	return tty;
 }
 
-extern const char *tty_driver_name(const struct tty_struct *tty);
-extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
-extern void stop_tty(struct tty_struct *tty);
-extern void start_tty(struct tty_struct *tty);
-extern void tty_write_message(struct tty_struct *tty, char *msg);
-extern int tty_send_xchar(struct tty_struct *tty, char ch);
-extern int tty_put_char(struct tty_struct *tty, unsigned char c);
-extern unsigned int tty_chars_in_buffer(struct tty_struct *tty);
-extern unsigned int tty_write_room(struct tty_struct *tty);
-extern void tty_driver_flush_buffer(struct tty_struct *tty);
-extern void tty_unthrottle(struct tty_struct *tty);
-extern int tty_throttle_safe(struct tty_struct *tty);
-extern int tty_unthrottle_safe(struct tty_struct *tty);
-extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
-extern int tty_get_icount(struct tty_struct *tty,
-			  struct serial_icounter_struct *icount);
-extern int is_current_pgrp_orphaned(void);
-extern void tty_hangup(struct tty_struct *tty);
-extern void tty_vhangup(struct tty_struct *tty);
-extern int tty_hung_up_p(struct file *filp);
-extern void do_SAK(struct tty_struct *tty);
-extern void __do_SAK(struct tty_struct *tty);
-extern void no_tty(void);
-extern speed_t tty_termios_baud_rate(struct ktermios *termios);
-extern void tty_termios_encode_baud_rate(struct ktermios *termios,
-						speed_t ibaud, speed_t obaud);
-extern void tty_encode_baud_rate(struct tty_struct *tty,
-						speed_t ibaud, speed_t obaud);
+const char *tty_driver_name(const struct tty_struct *tty);
+void tty_wait_until_sent(struct tty_struct *tty, long timeout);
+void stop_tty(struct tty_struct *tty);
+void start_tty(struct tty_struct *tty);
+void tty_write_message(struct tty_struct *tty, char *msg);
+int tty_send_xchar(struct tty_struct *tty, char ch);
+int tty_put_char(struct tty_struct *tty, unsigned char c);
+unsigned int tty_chars_in_buffer(struct tty_struct *tty);
+unsigned int tty_write_room(struct tty_struct *tty);
+void tty_driver_flush_buffer(struct tty_struct *tty);
+void tty_unthrottle(struct tty_struct *tty);
+int tty_throttle_safe(struct tty_struct *tty);
+int tty_unthrottle_safe(struct tty_struct *tty);
+int tty_do_resize(struct tty_struct *tty, struct winsize *ws);
+int tty_get_icount(struct tty_struct *tty,
+		struct serial_icounter_struct *icount);
+int is_current_pgrp_orphaned(void);
+void tty_hangup(struct tty_struct *tty);
+void tty_vhangup(struct tty_struct *tty);
+int tty_hung_up_p(struct file *filp);
+void do_SAK(struct tty_struct *tty);
+void __do_SAK(struct tty_struct *tty);
+void no_tty(void);
+speed_t tty_termios_baud_rate(struct ktermios *termios);
+void tty_termios_encode_baud_rate(struct ktermios *termios, speed_t ibaud,
+		speed_t obaud);
+void tty_encode_baud_rate(struct tty_struct *tty, speed_t ibaud,
+		speed_t obaud);
 
 /**
  *	tty_get_baud_rate	-	get tty bit rates
@@ -363,37 +363,37 @@ static inline speed_t tty_get_baud_rate(struct tty_struct *tty)
 unsigned char tty_get_char_size(unsigned int cflag);
 unsigned char tty_get_frame_size(unsigned int cflag);
 
-extern void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old);
-extern int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b);
-extern int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
+void tty_termios_copy_hw(struct ktermios *new, struct ktermios *old);
+int tty_termios_hw_change(const struct ktermios *a, const struct ktermios *b);
+int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
 
-extern void tty_wakeup(struct tty_struct *tty);
+void tty_wakeup(struct tty_struct *tty);
 
-extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
+int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
 			unsigned int cmd, unsigned long arg);
-extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
-extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
-extern void tty_release_struct(struct tty_struct *tty, int idx);
-extern void tty_init_termios(struct tty_struct *tty);
-extern void tty_save_termios(struct tty_struct *tty);
-extern int tty_standard_install(struct tty_driver *driver,
+int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
+struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
+void tty_release_struct(struct tty_struct *tty, int idx);
+void tty_init_termios(struct tty_struct *tty);
+void tty_save_termios(struct tty_struct *tty);
+int tty_standard_install(struct tty_driver *driver,
 		struct tty_struct *tty);
 
 extern struct mutex tty_mutex;
 
 /* n_tty.c */
-extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
+void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 #ifdef CONFIG_TTY
-extern void __init n_tty_init(void);
+void __init n_tty_init(void);
 #else
 static inline void n_tty_init(void) { }
 #endif
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-extern void tty_audit_exit(void);
-extern void tty_audit_fork(struct signal_struct *sig);
-extern int tty_audit_push(void);
+void tty_audit_exit(void);
+void tty_audit_fork(struct signal_struct *sig);
+int tty_audit_push(void);
 #else
 static inline void tty_audit_exit(void)
 {
@@ -408,24 +408,23 @@ static inline int tty_audit_push(void)
 #endif
 
 /* tty_ioctl.c */
-extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
-		       unsigned int cmd, unsigned long arg);
+int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
+		unsigned int cmd, unsigned long arg);
 
 /* vt.c */
 
-extern int vt_ioctl(struct tty_struct *tty,
-		    unsigned int cmd, unsigned long arg);
+int vt_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
 
-extern long vt_compat_ioctl(struct tty_struct *tty,
-		     unsigned int cmd, unsigned long arg);
+long vt_compat_ioctl(struct tty_struct *tty, unsigned int cmd,
+		unsigned long arg);
 
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
-extern void tty_lock(struct tty_struct *tty);
-extern int  tty_lock_interruptible(struct tty_struct *tty);
-extern void tty_unlock(struct tty_struct *tty);
-extern void tty_lock_slave(struct tty_struct *tty);
-extern void tty_unlock_slave(struct tty_struct *tty);
-extern void tty_set_lock_subclass(struct tty_struct *tty);
+void tty_lock(struct tty_struct *tty);
+int  tty_lock_interruptible(struct tty_struct *tty);
+void tty_unlock(struct tty_struct *tty);
+void tty_lock_slave(struct tty_struct *tty);
+void tty_unlock_slave(struct tty_struct *tty);
+void tty_set_lock_subclass(struct tty_struct *tty);
 
 #endif
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index c20431d8def8..29e1cf178afb 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -327,11 +327,11 @@ struct tty_driver {
 
 extern struct list_head tty_drivers;
 
-extern struct tty_driver *__tty_alloc_driver(unsigned int lines,
-		struct module *owner, unsigned long flags);
-extern struct tty_driver *tty_find_polling_driver(char *name, int *line);
+struct tty_driver *__tty_alloc_driver(unsigned int lines, struct module *owner,
+		unsigned long flags);
+struct tty_driver *tty_find_polling_driver(char *name, int *line);
 
-extern void tty_driver_kref_put(struct tty_driver *driver);
+void tty_driver_kref_put(struct tty_driver *driver);
 
 /* Use TTY_DRIVER_* flags below */
 #define tty_alloc_driver(lines, flags) \
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 32284992b31a..9916acb5de49 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -7,16 +7,16 @@
 
 struct tty_ldisc;
 
-extern int tty_buffer_set_limit(struct tty_port *port, int limit);
-extern unsigned int tty_buffer_space_avail(struct tty_port *port);
-extern int tty_buffer_request_room(struct tty_port *port, size_t size);
-extern int tty_insert_flip_string_flags(struct tty_port *port,
+int tty_buffer_set_limit(struct tty_port *port, int limit);
+unsigned int tty_buffer_space_avail(struct tty_port *port);
+int tty_buffer_request_room(struct tty_port *port, size_t size);
+int tty_insert_flip_string_flags(struct tty_port *port,
 		const unsigned char *chars, const char *flags, size_t size);
-extern int tty_insert_flip_string_fixed_flag(struct tty_port *port,
+int tty_insert_flip_string_fixed_flag(struct tty_port *port,
 		const unsigned char *chars, char flag, size_t size);
-extern int tty_prepare_flip_string(struct tty_port *port,
-		unsigned char **chars, size_t size);
-extern void tty_flip_buffer_push(struct tty_port *port);
+int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
+		size_t size);
+void tty_flip_buffer_push(struct tty_port *port);
 void tty_schedule_flip(struct tty_port *port);
 int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag);
 
@@ -45,7 +45,7 @@ static inline int tty_insert_flip_string(struct tty_port *port,
 int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p,
 		const char *f, int count);
 
-extern void tty_buffer_lock_exclusive(struct tty_port *port);
-extern void tty_buffer_unlock_exclusive(struct tty_port *port);
+void tty_buffer_lock_exclusive(struct tty_port *port);
+void tty_buffer_unlock_exclusive(struct tty_port *port);
 
 #endif /* _LINUX_TTY_FLIP_H */
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 1f37184eee63..4d1c128afbfa 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -146,7 +146,7 @@ struct ld_semaphore {
 #endif
 };
 
-extern void __init_ldsem(struct ld_semaphore *sem, const char *name,
+void __init_ldsem(struct ld_semaphore *sem, const char *name,
 			 struct lock_class_key *key);
 
 #define init_ldsem(sem)						\
@@ -157,18 +157,18 @@ do {								\
 } while (0)
 
 
-extern int ldsem_down_read(struct ld_semaphore *sem, long timeout);
-extern int ldsem_down_read_trylock(struct ld_semaphore *sem);
-extern int ldsem_down_write(struct ld_semaphore *sem, long timeout);
-extern int ldsem_down_write_trylock(struct ld_semaphore *sem);
-extern void ldsem_up_read(struct ld_semaphore *sem);
-extern void ldsem_up_write(struct ld_semaphore *sem);
+int ldsem_down_read(struct ld_semaphore *sem, long timeout);
+int ldsem_down_read_trylock(struct ld_semaphore *sem);
+int ldsem_down_write(struct ld_semaphore *sem, long timeout);
+int ldsem_down_write_trylock(struct ld_semaphore *sem);
+void ldsem_up_read(struct ld_semaphore *sem);
+void ldsem_up_write(struct ld_semaphore *sem);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass,
-				  long timeout);
-extern int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
-				   long timeout);
+int ldsem_down_read_nested(struct ld_semaphore *sem, int subclass,
+		long timeout);
+int ldsem_down_write_nested(struct ld_semaphore *sem, int subclass,
+		long timeout);
 #else
 # define ldsem_down_read_nested(sem, subclass, timeout)		\
 		ldsem_down_read(sem, timeout)
-- 
cgit v1.2.3


From 28f194da4a2c4d431552025a4386edaffab181bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 14 Sep 2021 11:11:22 +0200
Subject: tty: make tty_ldisc_ops::hangup return void

The documentation says that the return value of tty_ldisc_ops::hangup
hook is ignored. And it really is, so there is no point for its return
type to be int. Switch it to void and all the hooks too.

Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Liam Girdwood <lgirdwood@gmail.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20210914091134.17426-4-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/serial/tty.rst | 2 +-
 drivers/input/serio/serport.c           | 3 +--
 drivers/net/can/slcan.c                 | 3 +--
 drivers/net/ppp/ppp_async.c             | 3 +--
 drivers/net/ppp/ppp_synctty.c           | 3 +--
 drivers/net/slip/slip.c                 | 3 +--
 include/linux/tty_ldisc.h               | 2 +-
 sound/soc/codecs/cx20442.c              | 3 +--
 sound/soc/ti/ams-delta.c                | 3 +--
 9 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/serial/tty.rst b/Documentation/driver-api/serial/tty.rst
index dd972caacf3e..4b709f392713 100644
--- a/Documentation/driver-api/serial/tty.rst
+++ b/Documentation/driver-api/serial/tty.rst
@@ -58,7 +58,7 @@ close()			This is called on a terminal when the line
 hangup()		Called when the tty line is hung up.
 			The line discipline should cease I/O to the tty.
 			No further calls into the ldisc code will occur.
-			The return value is ignored. Can sleep.
+			Can sleep.
 
 read()			(optional) A process requests reading data from
 			the line. Multiple read calls may occur in parallel
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index 7fbbe00e3553..17eb8f2aa48d 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -244,7 +244,7 @@ static int serport_ldisc_compat_ioctl(struct tty_struct *tty,
 }
 #endif
 
-static int serport_ldisc_hangup(struct tty_struct *tty)
+static void serport_ldisc_hangup(struct tty_struct *tty)
 {
 	struct serport *serport = (struct serport *) tty->disc_data;
 	unsigned long flags;
@@ -254,7 +254,6 @@ static int serport_ldisc_hangup(struct tty_struct *tty)
 	spin_unlock_irqrestore(&serport->lock, flags);
 
 	wake_up_interruptible(&serport->wait);
-	return 0;
 }
 
 static void serport_ldisc_write_wakeup(struct tty_struct * tty)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index d42ec7d1bc14..012da4b8abe0 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -664,10 +664,9 @@ static void slcan_close(struct tty_struct *tty)
 	/* This will complete via sl_free_netdev */
 }
 
-static int slcan_hangup(struct tty_struct *tty)
+static void slcan_hangup(struct tty_struct *tty)
 {
 	slcan_close(tty);
-	return 0;
 }
 
 /* Perform I/O control on an active SLCAN channel. */
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 29a93d6bfe37..78ec1bcebc4f 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -247,10 +247,9 @@ ppp_asynctty_close(struct tty_struct *tty)
  * Wait for I/O to driver to complete and unregister PPP channel.
  * This is already done by the close routine, so just call that.
  */
-static int ppp_asynctty_hangup(struct tty_struct *tty)
+static void ppp_asynctty_hangup(struct tty_struct *tty)
 {
 	ppp_asynctty_close(tty);
-	return 0;
 }
 
 /*
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index af3e048695b6..c249db7c466a 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -245,10 +245,9 @@ ppp_sync_close(struct tty_struct *tty)
  * Wait for I/O to driver to complete and unregister PPP channel.
  * This is already done by the close routine, so just call that.
  */
-static int ppp_sync_hangup(struct tty_struct *tty)
+static void ppp_sync_hangup(struct tty_struct *tty)
 {
 	ppp_sync_close(tty);
-	return 0;
 }
 
 /*
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 5435b5689ce6..8be9d0c351b5 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -907,10 +907,9 @@ static void slip_close(struct tty_struct *tty)
 	/* This will complete via sl_free_netdev */
 }
 
-static int slip_hangup(struct tty_struct *tty)
+static void slip_hangup(struct tty_struct *tty)
 {
 	slip_close(tty);
-	return 0;
 }
  /************************************************************************
   *			STANDARD SLIP ENCAPSULATION		  	 *
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 4d1c128afbfa..b85d84fb5f49 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -199,7 +199,7 @@ struct tty_ldisc_ops {
 	void	(*set_termios)(struct tty_struct *tty, struct ktermios *old);
 	__poll_t (*poll)(struct tty_struct *, struct file *,
 			     struct poll_table_struct *);
-	int	(*hangup)(struct tty_struct *tty);
+	void	(*hangup)(struct tty_struct *tty);
 
 	/*
 	 * The following routines are called from below.
diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c
index 13258f3ca9aa..1af0bf5f1e2f 100644
--- a/sound/soc/codecs/cx20442.c
+++ b/sound/soc/codecs/cx20442.c
@@ -252,10 +252,9 @@ static void v253_close(struct tty_struct *tty)
 }
 
 /* Line discipline .hangup() */
-static int v253_hangup(struct tty_struct *tty)
+static void v253_hangup(struct tty_struct *tty)
 {
 	v253_close(tty);
-	return 0;
 }
 
 /* Line discipline .receive_buf() */
diff --git a/sound/soc/ti/ams-delta.c b/sound/soc/ti/ams-delta.c
index ecd24d412a9b..b1a32545babd 100644
--- a/sound/soc/ti/ams-delta.c
+++ b/sound/soc/ti/ams-delta.c
@@ -330,10 +330,9 @@ static void cx81801_close(struct tty_struct *tty)
 }
 
 /* Line discipline .hangup() */
-static int cx81801_hangup(struct tty_struct *tty)
+static void cx81801_hangup(struct tty_struct *tty)
 {
 	cx81801_close(tty);
-	return 0;
 }
 
 /* Line discipline .receive_buf() */
-- 
cgit v1.2.3


From dcc223e8b9bf3f987bcd3c327a6737022b39e35b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 14 Sep 2021 11:11:23 +0200
Subject: tty: remove file from tty_mode_ioctl

The only user of 'file' parameter in tty_mode_ioctl is a BUG_ON check.
Provided it never crashed for anyone, it's an overkill to pass the
parameter to tty_mode_ioctl only for this check.

If we wanted to check 'file' there, we should handle it in more graceful
way anyway. Not by a BUG == crash.

Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Andreas Koensgen <ajk@comnets.uni-bremen.de>
Cc: Paul Mackerras <paulus@samba.org>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20210914091134.17426-5-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/slcan.c       | 2 +-
 drivers/net/hamradio/6pack.c  | 2 +-
 drivers/net/ppp/ppp_async.c   | 2 +-
 drivers/net/ppp/ppp_synctty.c | 2 +-
 drivers/net/slip/slip.c       | 2 +-
 drivers/tty/tty_ioctl.c       | 8 ++------
 include/linux/tty.h           | 3 +--
 7 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index 012da4b8abe0..9a4ebda30510 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -691,7 +691,7 @@ static int slcan_ioctl(struct tty_struct *tty, struct file *file,
 		return -EINVAL;
 
 	default:
-		return tty_mode_ioctl(tty, file, cmd, arg);
+		return tty_mode_ioctl(tty, cmd, arg);
 	}
 }
 
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 8fe8887d506a..05404f7a3204 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -732,7 +732,7 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
 			break;
 		}
 	default:
-		err = tty_mode_ioctl(tty, file, cmd, arg);
+		err = tty_mode_ioctl(tty, cmd, arg);
 	}
 
 	sp_put(sp);
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 78ec1bcebc4f..6492523fc234 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -322,7 +322,7 @@ ppp_asynctty_ioctl(struct tty_struct *tty, struct file *file,
 
 	default:
 		/* Try the various mode ioctls */
-		err = tty_mode_ioctl(tty, file, cmd, arg);
+		err = tty_mode_ioctl(tty, cmd, arg);
 	}
 
 	ap_put(ap);
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index c249db7c466a..ebbfea0e1140 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -314,7 +314,7 @@ ppp_synctty_ioctl(struct tty_struct *tty, struct file *file,
 		break;
 
 	default:
-		err = tty_mode_ioctl(tty, file, cmd, arg);
+		err = tty_mode_ioctl(tty, cmd, arg);
 		break;
 	}
 
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 8be9d0c351b5..9f3b4c1aa5ce 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -1173,7 +1173,7 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file,
 	/* VSV changes end */
 #endif
 	default:
-		return tty_mode_ioctl(tty, file, cmd, arg);
+		return tty_mode_ioctl(tty, cmd, arg);
 	}
 }
 
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 507a25d692bb..99a29d72d294 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -675,7 +675,6 @@ static int tty_change_softcar(struct tty_struct *tty, int arg)
 /**
  *	tty_mode_ioctl		-	mode related ioctls
  *	@tty: tty for the ioctl
- *	@file: file pointer for the tty
  *	@cmd: command
  *	@arg: ioctl argument
  *
@@ -684,16 +683,13 @@ static int tty_change_softcar(struct tty_struct *tty, int arg)
  *	consistent mode setting.
  */
 
-int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
-			unsigned int cmd, unsigned long arg)
+int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 {
 	struct tty_struct *real_tty;
 	void __user *p = (void __user *)arg;
 	int ret = 0;
 	struct ktermios kterm;
 
-	BUG_ON(file == NULL);
-
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
 		real_tty = tty->link;
@@ -904,7 +900,7 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 		return __tty_perform_flush(tty, arg);
 	default:
 		/* Try the mode commands */
-		return tty_mode_ioctl(tty, file, cmd, arg);
+		return tty_mode_ioctl(tty, cmd, arg);
 	}
 }
 EXPORT_SYMBOL(n_tty_ioctl_helper);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 15453b0c4081..5e73f577c2b4 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -369,8 +369,7 @@ int tty_set_termios(struct tty_struct *tty, struct ktermios *kt);
 
 void tty_wakeup(struct tty_struct *tty);
 
-int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
-			unsigned int cmd, unsigned long arg);
+int tty_mode_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg);
 int tty_perform_flush(struct tty_struct *tty, unsigned long arg);
 struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx);
 void tty_release_struct(struct tty_struct *tty, int idx);
-- 
cgit v1.2.3


From 7c783601a3bc22a54cce0fb650259a983c8cafba Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Tue, 14 Sep 2021 11:11:24 +0200
Subject: tty: remove file from n_tty_ioctl_helper

After the previous patch, there are no users of 'file' in
n_tty_ioctl_helper. So remove it also from there.

Cc: Marcel Holtmann <marcel@holtmann.org>
Cc: Johan Hedberg <johan.hedberg@gmail.com>
Cc: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Link: https://lore.kernel.org/r/20210914091134.17426-6-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/hci_ldisc.c | 2 +-
 drivers/net/ppp/ppp_async.c   | 2 +-
 drivers/net/ppp/ppp_synctty.c | 2 +-
 drivers/tty/n_gsm.c           | 2 +-
 drivers/tty/n_hdlc.c          | 2 +-
 drivers/tty/n_tty.c           | 2 +-
 drivers/tty/tty_ioctl.c       | 4 ++--
 include/linux/tty.h           | 4 ++--
 net/nfc/nci/uart.c            | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 5ed2cfa7da1d..824d1c7e3e53 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -790,7 +790,7 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
 		break;
 
 	default:
-		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		err = n_tty_ioctl_helper(tty, cmd, arg);
 		break;
 	}
 
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 6492523fc234..f4429b93a9c8 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -310,7 +310,7 @@ ppp_asynctty_ioctl(struct tty_struct *tty, struct file *file,
 		/* flush our buffers and the serial port's buffer */
 		if (arg == TCIOFLUSH || arg == TCOFLUSH)
 			ppp_async_flush_output(ap);
-		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		err = n_tty_ioctl_helper(tty, cmd, arg);
 		break;
 
 	case FIONREAD:
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index ebbfea0e1140..b3a71b409a80 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -303,7 +303,7 @@ ppp_synctty_ioctl(struct tty_struct *tty, struct file *file,
 		/* flush our buffers and the serial port's buffer */
 		if (arg == TCIOFLUSH || arg == TCOFLUSH)
 			ppp_sync_flush_output(ap);
-		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		err = n_tty_ioctl_helper(tty, cmd, arg);
 		break;
 
 	case FIONREAD:
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 157b51ce9cc0..c98a04595084 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2712,7 +2712,7 @@ static int gsmld_ioctl(struct tty_struct *tty, struct file *file,
 		base = mux_num_to_base(gsm);
 		return put_user(base + 1, (__u32 __user *)arg);
 	default:
-		return n_tty_ioctl_helper(tty, file, cmd, arg);
+		return n_tty_ioctl_helper(tty, cmd, arg);
 	}
 }
 
diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c
index 580a37b3fe1b..7e0884ecc74f 100644
--- a/drivers/tty/n_hdlc.c
+++ b/drivers/tty/n_hdlc.c
@@ -630,7 +630,7 @@ static int n_hdlc_tty_ioctl(struct tty_struct *tty, struct file *file,
 		fallthrough;	/* to default */
 
 	default:
-		error = n_tty_ioctl_helper(tty, file, cmd, arg);
+		error = n_tty_ioctl_helper(tty, cmd, arg);
 		break;
 	}
 	return error;
diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 797af5d46cbe..5be6d02dc690 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2418,7 +2418,7 @@ static int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		up_write(&tty->termios_rwsem);
 		return put_user(retval, (unsigned int __user *) arg);
 	default:
-		return n_tty_ioctl_helper(tty, file, cmd, arg);
+		return n_tty_ioctl_helper(tty, cmd, arg);
 	}
 }
 
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 99a29d72d294..63181925ec1a 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -854,8 +854,8 @@ int tty_perform_flush(struct tty_struct *tty, unsigned long arg)
 }
 EXPORT_SYMBOL_GPL(tty_perform_flush);
 
-int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
-		       unsigned int cmd, unsigned long arg)
+int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd,
+		unsigned long arg)
 {
 	int retval;
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5e73f577c2b4..5dbd7c5afac7 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -407,8 +407,8 @@ static inline int tty_audit_push(void)
 #endif
 
 /* tty_ioctl.c */
-int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
-		unsigned int cmd, unsigned long arg);
+int n_tty_ioctl_helper(struct tty_struct *tty, unsigned int cmd,
+		unsigned long arg);
 
 /* vt.c */
 
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 502e7a3f8948..9bdd9a7d187e 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -349,7 +349,7 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
 			return -EBUSY;
 		break;
 	default:
-		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		err = n_tty_ioctl_helper(tty, cmd, arg);
 		break;
 	}
 
-- 
cgit v1.2.3


From 23c64d7618a7e76e46db99444d184ef75498fb71 Mon Sep 17 00:00:00 2001
From: Piyush Mehta <piyush.mehta@xilinx.com>
Date: Wed, 22 Sep 2021 19:23:17 +0530
Subject: firmware: zynqmp: Add MMIO read and write support for PS_MODE pin

Add Xilinx ZynqMP firmware MMIO APIs support to set and get PS_MODE
pins value and status. These APIs create an interface path between
mode pin controller driver and low-level API to access GPIO pins.

Signed-off-by: Piyush Mehta <piyush.mehta@xilinx.com>
Acked-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 drivers/firmware/xilinx/zynqmp.c     | 46 ++++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 14 +++++++++++
 2 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index a3cadbaf3cba..7feba125b883 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -28,6 +28,13 @@
 /* Max HashMap Order for PM API feature check (1<<7 = 128) */
 #define PM_API_FEATURE_CHECK_MAX_ORDER  7
 
+/* CRL registers and bitfields */
+#define CRL_APB_BASE			0xFF5E0000U
+/* BOOT_PIN_CTRL- Used to control the mode pins after boot */
+#define CRL_APB_BOOT_PIN_CTRL		(CRL_APB_BASE + (0x250U))
+/* BOOT_PIN_CTRL_MASK- out_val[11:8], out_en[3:0] */
+#define CRL_APB_BOOTPIN_CTRL_MASK	0xF0FU
+
 static bool feature_check_enabled;
 static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER);
 
@@ -925,6 +932,45 @@ int zynqmp_pm_pinctrl_set_config(const u32 pin, const u32 param,
 }
 EXPORT_SYMBOL_GPL(zynqmp_pm_pinctrl_set_config);
 
+/**
+ * zynqmp_pm_bootmode_read() - PM Config API for read bootpin status
+ * @ps_mode: Returned output value of ps_mode
+ *
+ * This API function is to be used for notify the power management controller
+ * to read bootpin status.
+ *
+ * Return: status, either success or error+reason
+ */
+unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode)
+{
+	unsigned int ret;
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+
+	ret = zynqmp_pm_invoke_fn(PM_MMIO_READ, CRL_APB_BOOT_PIN_CTRL, 0,
+				  0, 0, ret_payload);
+
+	*ps_mode = ret_payload[1];
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_bootmode_read);
+
+/**
+ * zynqmp_pm_bootmode_write() - PM Config API for Configure bootpin
+ * @ps_mode: Value to be written to the bootpin ctrl register
+ *
+ * This API function is to be used for notify the power management controller
+ * to configure bootpin.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+int zynqmp_pm_bootmode_write(u32 ps_mode)
+{
+	return zynqmp_pm_invoke_fn(PM_MMIO_WRITE, CRL_APB_BOOT_PIN_CTRL,
+				   CRL_APB_BOOTPIN_CTRL_MASK, ps_mode, 0, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_bootmode_write);
+
 /**
  * zynqmp_pm_init_finalize() - PM call to inform firmware that the caller
  *			       master has initialized its own power management
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 56b426fe020c..3917f89b2b3c 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -72,6 +72,8 @@ enum pm_api_id {
 	PM_SET_REQUIREMENT = 15,
 	PM_RESET_ASSERT = 17,
 	PM_RESET_GET_STATUS = 18,
+	PM_MMIO_WRITE = 19,
+	PM_MMIO_READ = 20,
 	PM_PM_INIT_FINALIZE = 21,
 	PM_FPGA_LOAD = 22,
 	PM_FPGA_GET_STATUS = 23,
@@ -390,6 +392,8 @@ int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type);
 int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
 			   const enum zynqmp_pm_reset_action assert_flag);
 int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status);
+unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode);
+int zynqmp_pm_bootmode_write(u32 ps_mode);
 int zynqmp_pm_init_finalize(void);
 int zynqmp_pm_set_suspend_mode(u32 mode);
 int zynqmp_pm_request_node(const u32 node, const u32 capabilities,
@@ -520,6 +524,16 @@ static inline int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset,
 	return -ENODEV;
 }
 
+static inline unsigned int zynqmp_pm_bootmode_read(u32 *ps_mode)
+{
+	return -ENODEV;
+}
+
+static inline int zynqmp_pm_bootmode_write(u32 ps_mode)
+{
+	return -ENODEV;
+}
+
 static inline int zynqmp_pm_init_finalize(void)
 {
 	return -ENODEV;
-- 
cgit v1.2.3


From 68a81bb2eebdeaabb04ae04c4d89c3bb5bec3b32 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 16:57:03 +0300
Subject: net: dsa: sja1105: remove sp->dp

It looks like this field was never used since its introduction in commit
227d07a07ef1 ("net: dsa: sja1105: Add support for traffic through
standalone ports") remove it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 1 -
 include/linux/dsa/sja1105.h            | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 2f8cc6686c38..7ce69dc07800 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2965,7 +2965,6 @@ static int sja1105_setup_ports(struct sja1105_private *priv)
 			continue;
 
 		dp->priv = sp;
-		sp->dp = dp;
 		sp->data = tagger_data;
 		slave = dp->slave;
 		kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 171106202fe5..71b69ec52108 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -65,7 +65,6 @@ struct sja1105_port {
 	struct kthread_work xmit_work;
 	struct sk_buff_head xmit_queue;
 	struct sja1105_tagger_data *data;
-	struct dsa_port *dp;
 	bool hwts_tx_en;
 };
 
-- 
cgit v1.2.3


From 6d709cadfde68dbd12bef12fcced6222226dcb06 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 17:37:25 +0300
Subject: net: dsa: move sja1110_process_meta_tstamp inside the tagging
 protocol driver

The problem is that DSA tagging protocols really must not depend on the
switch driver, because this creates a circular dependency at insmod
time, and the switch driver will effectively not load when the tagging
protocol driver is missing.

The code was structured in the way it was for a reason, though. The DSA
driver-facing API for PTP timestamping relies on the assumption that
two-step TX timestamps are provided by the hardware in an out-of-band
manner, typically by raising an interrupt and making that timestamp
available inside some sort of FIFO which is to be accessed over
SPI/MDIO/etc.

So the API puts .port_txtstamp into dsa_switch_ops, because it is
expected that the switch driver needs to save some state (like put the
skb into a queue until its TX timestamp arrives).

On SJA1110, TX timestamps are provided by the switch as Ethernet
packets, so this makes them be received and processed by the tagging
protocol driver. This in itself is great, because the timestamps are
full 64-bit and do not require reconstruction, and since Ethernet is the
fastest I/O method available to/from the switch, PTP timestamps arrive
very quickly, no matter how bottlenecked the SPI connection is, because
SPI interaction is not needed at all.

DSA's code structure and strict isolation between the tagging protocol
driver and the switch driver break the natural code organization.

When the tagging protocol driver receives a packet which is classified
as a metadata packet containing timestamps, it passes those timestamps
one by one to the switch driver, which then proceeds to compare them
based on the recorded timestamp ID that was generated in .port_txtstamp.

The communication between the tagging protocol and the switch driver is
done through a method exported by the switch driver, sja1110_process_meta_tstamp.
To satisfy build requirements, we force a dependency to build the
tagging protocol driver as a module when the switch driver is a module.
However, as explained in the first paragraph, that causes the circular
dependency.

To solve this, move the skb queue from struct sja1105_private :: struct
sja1105_ptp_data to struct sja1105_private :: struct sja1105_tagger_data.
The latter is a data structure for which hacks have already been put
into place to be able to create persistent storage per switch that is
accessible from the tagging protocol driver (see sja1105_setup_ports).

With the skb queue directly accessible from the tagging protocol driver,
we can now move sja1110_process_meta_tstamp into the tagging driver
itself, and avoid exporting a symbol.

Fixes: 566b18c8b752 ("net: dsa: sja1105: implement TX timestamping for SJA1110")
Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_ptp.c | 45 +++++------------------------------
 drivers/net/dsa/sja1105/sja1105_ptp.h | 19 ---------------
 include/linux/dsa/sja1105.h           | 29 +++++++++++-----------
 net/dsa/tag_sja1105.c                 | 43 +++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 691f6dd7e669..54396992a919 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -64,6 +64,7 @@ enum sja1105_ptp_clk_mode {
 static int sja1105_change_rxtstamping(struct sja1105_private *priv,
 				      bool on)
 {
+	struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_table *table;
@@ -79,7 +80,7 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv,
 		priv->tagger_data.stampable_skb = NULL;
 	}
 	ptp_cancel_worker_sync(ptp_data->clock);
-	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
+	skb_queue_purge(&tagger_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 
 	return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING);
@@ -452,40 +453,6 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 	return priv->info->rxtstamp(ds, port, skb);
 }
 
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
-				 enum sja1110_meta_tstamp dir, u64 tstamp)
-{
-	struct sja1105_private *priv = ds->priv;
-	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
-	struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
-	struct skb_shared_hwtstamps shwt = {0};
-
-	/* We don't care about RX timestamps on the CPU port */
-	if (dir == SJA1110_META_TSTAMP_RX)
-		return;
-
-	spin_lock(&ptp_data->skb_txtstamp_queue.lock);
-
-	skb_queue_walk_safe(&ptp_data->skb_txtstamp_queue, skb, skb_tmp) {
-		if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
-			continue;
-
-		__skb_unlink(skb, &ptp_data->skb_txtstamp_queue);
-		skb_match = skb;
-
-		break;
-	}
-
-	spin_unlock(&ptp_data->skb_txtstamp_queue.lock);
-
-	if (WARN_ON(!skb_match))
-		return;
-
-	shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
-	skb_complete_tx_timestamp(skb_match, &shwt);
-}
-EXPORT_SYMBOL_GPL(sja1110_process_meta_tstamp);
-
 /* In addition to cloning the skb which is done by the common
  * sja1105_port_txtstamp, we need to generate a timestamp ID and save the
  * packet to the TX timestamping queue.
@@ -494,7 +461,6 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
 	struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 	struct sja1105_private *priv = ds->priv;
-	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 	struct sja1105_port *sp = &priv->ports[port];
 	u8 ts_id;
 
@@ -510,7 +476,7 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 
 	spin_unlock(&sp->data->meta_lock);
 
-	skb_queue_tail(&ptp_data->skb_txtstamp_queue, clone);
+	skb_queue_tail(&sp->data->skb_txtstamp_queue, clone);
 }
 
 /* Called from dsa_skb_tx_timestamp. This callback is just to clone
@@ -953,7 +919,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
 	/* Only used on SJA1105 */
 	skb_queue_head_init(&ptp_data->skb_rxtstamp_queue);
 	/* Only used on SJA1110 */
-	skb_queue_head_init(&ptp_data->skb_txtstamp_queue);
+	skb_queue_head_init(&tagger_data->skb_txtstamp_queue);
 	spin_lock_init(&tagger_data->meta_lock);
 
 	ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev);
@@ -971,6 +937,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
 void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
+	struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 
 	if (IS_ERR_OR_NULL(ptp_data->clock))
@@ -978,7 +945,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 
 	del_timer_sync(&ptp_data->extts_timer);
 	ptp_cancel_worker_sync(ptp_data->clock);
-	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
+	skb_queue_purge(&tagger_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 	ptp_clock_unregister(ptp_data->clock);
 	ptp_data->clock = NULL;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 3c874bb4c17b..3ae6b9fdd492 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -8,21 +8,6 @@
 
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
 
-/* Timestamps are in units of 8 ns clock ticks (equivalent to
- * a fixed 125 MHz clock).
- */
-#define SJA1105_TICK_NS			8
-
-static inline s64 ns_to_sja1105_ticks(s64 ns)
-{
-	return ns / SJA1105_TICK_NS;
-}
-
-static inline s64 sja1105_ticks_to_ns(s64 ticks)
-{
-	return ticks * SJA1105_TICK_NS;
-}
-
 /* Calculate the first base_time in the future that satisfies this
  * relationship:
  *
@@ -77,10 +62,6 @@ struct sja1105_ptp_data {
 	struct timer_list extts_timer;
 	/* Used only on SJA1105 to reconstruct partial timestamps */
 	struct sk_buff_head skb_rxtstamp_queue;
-	/* Used on SJA1110 where meta frames are generated only for
-	 * 2-step TX timestamps
-	 */
-	struct sk_buff_head skb_txtstamp_queue;
 	struct ptp_clock_info caps;
 	struct ptp_clock *clock;
 	struct sja1105_ptp_cmd cmd;
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 71b69ec52108..9d5a1053b276 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,6 +48,10 @@ struct sja1105_tagger_data {
 	spinlock_t meta_lock;
 	unsigned long state;
 	u8 ts_id;
+	/* Used on SJA1110 where meta frames are generated only for
+	 * 2-step TX timestamps
+	 */
+	struct sk_buff_head skb_txtstamp_queue;
 };
 
 struct sja1105_skb_cb {
@@ -68,25 +72,20 @@ struct sja1105_port {
 	bool hwts_tx_en;
 };
 
-enum sja1110_meta_tstamp {
-	SJA1110_META_TSTAMP_TX = 0,
-	SJA1110_META_TSTAMP_RX = 1,
-};
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
-
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
-				 enum sja1110_meta_tstamp dir, u64 tstamp);
-
-#else
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS			8
 
-static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
-					       u8 ts_id, enum sja1110_meta_tstamp dir,
-					       u64 tstamp)
+static inline s64 ns_to_sja1105_ticks(s64 ns)
 {
+	return ns / SJA1105_TICK_NS;
 }
 
-#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
+{
+	return ticks * SJA1105_TICK_NS;
+}
 
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
 
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c054f48541c8..2edede9ddac9 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -4,6 +4,7 @@
 #include <linux/if_vlan.h>
 #include <linux/dsa/sja1105.h>
 #include <linux/dsa/8021q.h>
+#include <linux/skbuff.h>
 #include <linux/packing.h>
 #include "dsa_priv.h"
 
@@ -53,6 +54,11 @@
 #define SJA1110_TX_TRAILER_LEN			4
 #define SJA1110_MAX_PADDING_LEN			15
 
+enum sja1110_meta_tstamp {
+	SJA1110_META_TSTAMP_TX = 0,
+	SJA1110_META_TSTAMP_RX = 1,
+};
+
 /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
 static inline bool sja1105_is_link_local(const struct sk_buff *skb)
 {
@@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 					      is_meta);
 }
 
+static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+					u8 ts_id, enum sja1110_meta_tstamp dir,
+					u64 tstamp)
+{
+	struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct skb_shared_hwtstamps shwt = {0};
+	struct sja1105_port *sp = dp->priv;
+
+	if (!dsa_port_is_sja1105(dp))
+		return;
+
+	/* We don't care about RX timestamps on the CPU port */
+	if (dir == SJA1110_META_TSTAMP_RX)
+		return;
+
+	spin_lock(&sp->data->skb_txtstamp_queue.lock);
+
+	skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) {
+		if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+			continue;
+
+		__skb_unlink(skb, &sp->data->skb_txtstamp_queue);
+		skb_match = skb;
+
+		break;
+	}
+
+	spin_unlock(&sp->data->skb_txtstamp_queue.lock);
+
+	if (WARN_ON(!skb_match))
+		return;
+
+	shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+	skb_complete_tx_timestamp(skb_match, &shwt);
+}
+
 static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
 {
 	u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
-- 
cgit v1.2.3


From f5aef4241592765a868611509e5170bbe8c89ae7 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 17:37:26 +0300
Subject: net: dsa: sja1105: break dependency between dsa_port_is_sja1105 and
 switch driver

It's nice to be able to test a tagging protocol with dsa_loop, but not
at the cost of losing the ability of building the tagging protocol and
switch driver as modules, because as things stand, there is a circular
dependency between the two. Tagging protocol drivers cannot depend on
switch drivers, that is a hard fact.

The reasoning behind the blamed patch was that accessing dp->priv should
first make sure that the structure behind that pointer is what we really
think it is.

Currently the "sja1105" and "sja1110" tagging protocols only operate
with the sja1105 switch driver, just like any other tagging protocol and
switch combination. The only way to mix and match them is by modifying
the code, and this applies to dsa_loop as well (by default that uses
DSA_TAG_PROTO_NONE). So while in principle there is an issue, in
practice there isn't one.

Until we extend dsa_loop to allow user space configuration, treat the
problem as a non-issue and just say that DSA ports found by tag_sja1105
are always sja1105 ports, which is in fact true. But keep the
dsa_port_is_sja1105 function so that it's easy to patch it during
testing, and rely on dead code elimination.

Fixes: 994d2cbb08ca ("net: dsa: tag_sja1105: be dsa_loop-safe")
Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c |  3 +--
 include/linux/dsa/sja1105.h            | 15 +--------------
 net/dsa/Kconfig                        |  1 -
 3 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7ce69dc07800..d5f8166b73b1 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3116,7 +3116,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	sja1105_static_config_free(&priv->static_config);
 }
 
-const struct dsa_switch_ops sja1105_switch_ops = {
+static const struct dsa_switch_ops sja1105_switch_ops = {
 	.get_tag_protocol	= sja1105_get_tag_protocol,
 	.setup			= sja1105_setup,
 	.teardown		= sja1105_teardown,
@@ -3165,7 +3165,6 @@ const struct dsa_switch_ops sja1105_switch_ops = {
 	.port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload,
 	.port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload,
 };
-EXPORT_SYMBOL_GPL(sja1105_switch_ops);
 
 static const struct of_device_id sja1105_dt_ids[];
 
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 9d5a1053b276..e6c78be40bde 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -87,22 +87,9 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks)
 	return ticks * SJA1105_TICK_NS;
 }
 
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
-
-extern const struct dsa_switch_ops sja1105_switch_ops;
-
-static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
-{
-	return dp->ds->ops == &sja1105_switch_ops;
-}
-
-#else
-
 static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
 {
-	return false;
+	return true;
 }
 
-#endif
-
 #endif /* _NET_DSA_SJA1105_H */
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..bca1b5d66df2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -138,7 +138,6 @@ config NET_DSA_TAG_LAN9303
 
 config NET_DSA_TAG_SJA1105
 	tristate "Tag driver for NXP SJA1105 switches"
-	depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
 	select PACKING
 	help
 	  Say Y or M if you want to enable support for tagging frames with the
-- 
cgit v1.2.3


From 6bc6db000295332bae2c1e8815d7450b72923d23 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@linux.alibaba.com>
Date: Sat, 18 Sep 2021 08:56:29 +0800
Subject: KVM: Remove tlbs_dirty

There is no user of tlbs_dirty.

Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210918005636.3675-4-jiangshanlai@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      | 10 ++--------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 000ea73dd324..0f18df7fe874 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -608,7 +608,6 @@ struct kvm {
 	unsigned long mmu_notifier_range_start;
 	unsigned long mmu_notifier_range_end;
 #endif
-	long tlbs_dirty;
 	struct list_head devices;
 	u64 manual_dirty_log_protect;
 	struct dentry *debugfs_dentry;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e95e7a9e4d53..7851f3a1b5f7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -326,13 +326,8 @@ EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
 #ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
 void kvm_flush_remote_tlbs(struct kvm *kvm)
 {
-	/*
-	 * Read tlbs_dirty before setting KVM_REQ_TLB_FLUSH in
-	 * kvm_make_all_cpus_request.
-	 */
-	long dirty_count = smp_load_acquire(&kvm->tlbs_dirty);
-
 	++kvm->stat.generic.remote_tlb_flush_requests;
+
 	/*
 	 * We want to publish modifications to the page tables before reading
 	 * mode. Pairs with a memory barrier in arch-specific code.
@@ -347,7 +342,6 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	if (!kvm_arch_flush_remote_tlb(kvm)
 	    || kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.generic.remote_tlb_flush;
-	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 #endif
@@ -552,7 +546,7 @@ static __always_inline int __kvm_handle_hva_range(struct kvm *kvm,
 		}
 	}
 
-	if (range->flush_on_ret && (ret || kvm->tlbs_dirty))
+	if (range->flush_on_ret && ret)
 		kvm_flush_remote_tlbs(kvm);
 
 	if (locked)
-- 
cgit v1.2.3


From d50497c4a05e73e76874fd0d4942036375a7ec0f Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Wed, 15 Sep 2021 18:46:27 -0700
Subject: platform/chrome: cros_ec_proto: Fix check_features ret val

The kerneldoc for cros_ec_check_features() states that it returns 1 or 0
depedending on whether a feature is supported or not, but it instead
returns a negative error number in one case, and a non-1 bitmask in
other cases.

Since all call-sites only check for a 1 or 0 return value, update
the function to return boolean values.

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: https://lore.kernel.org/r/20210916014632.2662612-1-pmalani@chromium.org
---
 drivers/platform/chrome/cros_ec_proto.c     | 12 +++++++-----
 include/linux/platform_data/cros_ec_proto.h |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index a7404d69b2d3..a34cf58c5ef7 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -808,9 +808,9 @@ EXPORT_SYMBOL(cros_ec_get_host_event);
  *
  * Call this function to test whether the ChromeOS EC supports a feature.
  *
- * Return: 1 if supported, 0 if not
+ * Return: true if supported, false if not (or if an error was encountered).
  */
-int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
+bool cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 {
 	struct cros_ec_command *msg;
 	int ret;
@@ -818,8 +818,10 @@ int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 	if (ec->features[0] == -1U && ec->features[1] == -1U) {
 		/* features bitmap not read yet */
 		msg = kzalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL);
-		if (!msg)
-			return -ENOMEM;
+		if (!msg) {
+			dev_err(ec->dev, "failed to allocate memory to get EC features\n");
+			return false;
+		}
 
 		msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset;
 		msg->insize = sizeof(ec->features);
@@ -839,7 +841,7 @@ int cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 		kfree(msg);
 	}
 
-	return ec->features[feature / 32] & EC_FEATURE_MASK_0(feature);
+	return !!(ec->features[feature / 32] & EC_FEATURE_MASK_0(feature));
 }
 EXPORT_SYMBOL_GPL(cros_ec_check_features);
 
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 02599687770c..55844ece0b32 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -227,7 +227,7 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev,
 
 u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
-int cros_ec_check_features(struct cros_ec_dev *ec, int feature);
+bool cros_ec_check_features(struct cros_ec_dev *ec, int feature);
 
 int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
 
-- 
cgit v1.2.3


From 5501765a02a6c324f78581e6bb8209d054fe13ae Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 15 Sep 2021 10:09:38 -0700
Subject: driver core: fw_devlink: Add support for
 FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD

If a parent device is also a supplier to a child device, fw_devlink=on by
design delays the probe() of the child device until the probe() of the
parent finishes successfully.

However, some drivers of such parent devices (where parent is also a
supplier) expect the child device to finish probing successfully as soon as
they are added using device_add() and before the probe() of the parent
device has completed successfully. One example of such a case is discussed
in the link mentioned below.

Add a flag to make fw_devlink=on not enforce these supplier-consumer
relationships, so these drivers can continue working.

Link: https://lore.kernel.org/netdev/CAGETcx_uj0V4DChME-gy5HGKTYnxLBX=TH2rag29f_p=UcG+Tg@mail.gmail.com/
Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""")
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20210915170940.617415-3-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 19 +++++++++++++++++++
 include/linux/fwnode.h | 11 ++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 316df6027093..21d4cb5d3767 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1722,6 +1722,25 @@ static int fw_devlink_create_devlink(struct device *con,
 	struct device *sup_dev;
 	int ret = 0;
 
+	/*
+	 * In some cases, a device P might also be a supplier to its child node
+	 * C. However, this would defer the probe of C until the probe of P
+	 * completes successfully. This is perfectly fine in the device driver
+	 * model. device_add() doesn't guarantee probe completion of the device
+	 * by the time it returns.
+	 *
+	 * However, there are a few drivers that assume C will finish probing
+	 * as soon as it's added and before P finishes probing. So, we provide
+	 * a flag to let fw_devlink know not to delay the probe of C until the
+	 * probe of P completes successfully.
+	 *
+	 * When such a flag is set, we can't create device links where P is the
+	 * supplier of C as that would delay the probe of C.
+	 */
+	if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD &&
+	    fwnode_is_ancestor_of(sup_handle, con->fwnode))
+		return -EINVAL;
+
 	sup_dev = get_dev_from_fwnode(sup_handle);
 	if (sup_dev) {
 		/*
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 59828516ebaf..9f4ad719bfe3 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -22,10 +22,15 @@ struct device;
  * LINKS_ADDED:	The fwnode has already be parsed to add fwnode links.
  * NOT_DEVICE:	The fwnode will never be populated as a struct device.
  * INITIALIZED: The hardware corresponding to fwnode has been initialized.
+ * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its
+ *			     driver needs its child devices to be bound with
+ *			     their respective drivers as soon as they are
+ *			     added.
  */
-#define FWNODE_FLAG_LINKS_ADDED		BIT(0)
-#define FWNODE_FLAG_NOT_DEVICE		BIT(1)
-#define FWNODE_FLAG_INITIALIZED		BIT(2)
+#define FWNODE_FLAG_LINKS_ADDED			BIT(0)
+#define FWNODE_FLAG_NOT_DEVICE			BIT(1)
+#define FWNODE_FLAG_INITIALIZED			BIT(2)
+#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD	BIT(3)
 
 struct fwnode_handle {
 	struct fwnode_handle *secondary;
-- 
cgit v1.2.3


From 3fd445a4d49fce594eecc90b9bbcf85cd223154d Mon Sep 17 00:00:00 2001
From: Len Baker <len.baker@gmx.com>
Date: Sat, 4 Sep 2021 11:22:17 +0200
Subject: brcmfmac: Replace zero-length array with flexible array member

There is a regular need in the kernel to provide a way to declare
having a dynamically sized set of trailing elements in a structure.
Kernel code should always use "flexible array members"[1] for these
cases. The older style of one-element or zero-length arrays should
no longer be used[2].

Also, make use of the struct_size() helper in devm_kzalloc().

[1] https://en.wikipedia.org/wiki/Flexible_array_member
[2] https://www.kernel.org/doc/html/v5.14/process/deprecated.html#zero-length-and-one-element-arrays

Signed-off-by: Len Baker <len.baker@gmx.com>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210904092217.2848-1-len.baker@gmx.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 2 +-
 include/linux/platform_data/brcmfmac.h                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index 2f7bc3a70c65..513c7e6421b2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -29,7 +29,7 @@ static int brcmf_of_get_country_codes(struct device *dev,
 		return (count == -EINVAL) ? 0 : count;
 	}
 
-	cc = devm_kzalloc(dev, sizeof(*cc) + count * sizeof(*cce), GFP_KERNEL);
+	cc = devm_kzalloc(dev, struct_size(cc, table, count), GFP_KERNEL);
 	if (!cc)
 		return -ENOMEM;
 
diff --git a/include/linux/platform_data/brcmfmac.h b/include/linux/platform_data/brcmfmac.h
index 1d30bf278231..2b5676ff35be 100644
--- a/include/linux/platform_data/brcmfmac.h
+++ b/include/linux/platform_data/brcmfmac.h
@@ -125,7 +125,7 @@ struct brcmfmac_pd_cc_entry {
  */
 struct brcmfmac_pd_cc {
 	int				table_size;
-	struct brcmfmac_pd_cc_entry	table[0];
+	struct brcmfmac_pd_cc_entry	table[];
 };
 
 /**
-- 
cgit v1.2.3


From ae98f40d32cd0ee6fc222e765734ffa497a0a95e Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 23 Sep 2021 13:57:32 -0700
Subject: net: phy: broadcom: Fix PHY_BRCM_IDDQ_SUSPEND definition

An extraneous number was added during the inclusion of that change,
correct that such that we use a single bit as is expected by the PHY
driver.

Reported-by: Justin Chen <justinpopo6@gmail.com>
Fixes: d6da08ed1425 ("net: phy: broadcom: Add IDDQ-SR mode")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index b119d6819d6c..27d9b6683f0e 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -67,7 +67,7 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00000004
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00000008
 #define PHY_BRCM_EN_MASTER_MODE		0x00000010
-#define PHY_BRCM_IDDQ_SUSPEND		0x000000220
+#define PHY_BRCM_IDDQ_SUSPEND		0x00000020
 
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
-- 
cgit v1.2.3


From 243418e3925d5b5b0657ae54c322d43035e97eed Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Fri, 24 Sep 2021 15:43:47 -0700
Subject: mm: fs: invalidate bh_lrus for only cold path

The kernel test robot reported the regression of fio.write_iops[1] with
commit 8cc621d2f45d ("mm: fs: invalidate BH LRU during page migration").

Since lru_add_drain is called frequently, invalidate bh_lrus there could
increase bh_lrus cache miss ratio, which needs more IO in the end.

This patch moves the bh_lrus invalidation from the hot path( e.g.,
zap_page_range, pagevec_release) to cold path(i.e., lru_add_drain_all,
lru_cache_disable).

Zhengjun Xing confirmed
 "I test the patch, the regression reduced to -2.9%"

[1] https://lore.kernel.org/lkml/20210520083144.GD14190@xsang-OptiPlex-9020/
[2] 8cc621d2f45d, mm: fs: invalidate BH LRU during page migration

Link: https://lkml.kernel.org/r/20210907212347.1977686-1-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reported-by: kernel test robot <oliver.sang@intel.com>
Reviewed-by: Chris Goldsworthy <cgoldswo@codeaurora.org>
Tested-by: "Xing, Zhengjun" <zhengjun.xing@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c                 |  8 ++++++--
 include/linux/buffer_head.h |  4 ++--
 mm/swap.c                   | 19 ++++++++++++++++---
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index ab7573d72dd7..c615387aedca 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1425,12 +1425,16 @@ void invalidate_bh_lrus(void)
 }
 EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
 
-void invalidate_bh_lrus_cpu(int cpu)
+/*
+ * It's called from workqueue context so we need a bh_lru_lock to close
+ * the race with preemption/irq.
+ */
+void invalidate_bh_lrus_cpu(void)
 {
 	struct bh_lru *b;
 
 	bh_lru_lock();
-	b = per_cpu_ptr(&bh_lrus, cpu);
+	b = this_cpu_ptr(&bh_lrus);
 	__invalidate_bh_lrus(b);
 	bh_lru_unlock();
 }
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6486d3c19463..36f33685c8c0 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -194,7 +194,7 @@ void __breadahead_gfp(struct block_device *, sector_t block, unsigned int size,
 struct buffer_head *__bread_gfp(struct block_device *,
 				sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
-void invalidate_bh_lrus_cpu(int cpu);
+void invalidate_bh_lrus_cpu(void);
 bool has_bh_in_lru(int cpu, void *dummy);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
@@ -408,7 +408,7 @@ static inline int inode_has_buffers(struct inode *inode) { return 0; }
 static inline void invalidate_inode_buffers(struct inode *inode) {}
 static inline int remove_inode_buffers(struct inode *inode) { return 1; }
 static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
-static inline void invalidate_bh_lrus_cpu(int cpu) {}
+static inline void invalidate_bh_lrus_cpu(void) {}
 static inline bool has_bh_in_lru(int cpu, void *dummy) { return false; }
 #define buffer_heads_over_limit 0
 
diff --git a/mm/swap.c b/mm/swap.c
index 897200d27dd0..af3cad4e5378 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -620,7 +620,6 @@ void lru_add_drain_cpu(int cpu)
 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
 
 	activate_page_drain(cpu);
-	invalidate_bh_lrus_cpu(cpu);
 }
 
 /**
@@ -703,6 +702,20 @@ void lru_add_drain(void)
 	local_unlock(&lru_pvecs.lock);
 }
 
+/*
+ * It's called from per-cpu workqueue context in SMP case so
+ * lru_add_drain_cpu and invalidate_bh_lrus_cpu should run on
+ * the same cpu. It shouldn't be a problem in !SMP case since
+ * the core is only one and the locks will disable preemption.
+ */
+static void lru_add_and_bh_lrus_drain(void)
+{
+	local_lock(&lru_pvecs.lock);
+	lru_add_drain_cpu(smp_processor_id());
+	local_unlock(&lru_pvecs.lock);
+	invalidate_bh_lrus_cpu();
+}
+
 void lru_add_drain_cpu_zone(struct zone *zone)
 {
 	local_lock(&lru_pvecs.lock);
@@ -717,7 +730,7 @@ static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 
 static void lru_add_drain_per_cpu(struct work_struct *dummy)
 {
-	lru_add_drain();
+	lru_add_and_bh_lrus_drain();
 }
 
 /*
@@ -858,7 +871,7 @@ void lru_cache_disable(void)
 	 */
 	__lru_add_drain_all(true);
 #else
-	lru_add_drain();
+	lru_add_and_bh_lrus_drain();
 #endif
 }
 
-- 
cgit v1.2.3


From 57ed7b4303a1c4d1885019fef03e6a5af2e8468a Mon Sep 17 00:00:00 2001
From: Weizhao Ouyang <o451686892@gmail.com>
Date: Fri, 24 Sep 2021 15:43:53 -0700
Subject: mm/debug: sync up latest migrate_reason to migrate_reason_names

Sync up MR_DEMOTION to migrate_reason_names and add a synch prompt.

Link: https://lkml.kernel.org/r/20210921064553.293905-3-o451686892@gmail.com
Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim")
Signed-off-by: Weizhao Ouyang <o451686892@gmail.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mina Almasry <almasrymina@google.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Wei Xu <weixugc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 6 +++++-
 mm/debug.c              | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 326250996b4e..c8077e936691 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -19,6 +19,11 @@ struct migration_target_control;
  */
 #define MIGRATEPAGE_SUCCESS		0
 
+/*
+ * Keep sync with:
+ * - macro MIGRATE_REASON in include/trace/events/migrate.h
+ * - migrate_reason_names[MR_TYPES] in mm/debug.c
+ */
 enum migrate_reason {
 	MR_COMPACTION,
 	MR_MEMORY_FAILURE,
@@ -32,7 +37,6 @@ enum migrate_reason {
 	MR_TYPES
 };
 
-/* In mm/debug.c; also keep sync with include/trace/events/migrate.h */
 extern const char *migrate_reason_names[MR_TYPES];
 
 #ifdef CONFIG_MIGRATION
diff --git a/mm/debug.c b/mm/debug.c
index e61037cded98..fae0f81ad831 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -26,6 +26,7 @@ const char *migrate_reason_names[MR_TYPES] = {
 	"numa_misplaced",
 	"contig_range",
 	"longterm_pin",
+	"demotion",
 };
 
 const struct trace_print_flags pageflag_names[] = {
-- 
cgit v1.2.3


From e7f18c22e6bea258ffd65185fdab66d1e63dd5bd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 19 Aug 2021 13:42:43 -0700
Subject: stddef: Fix kerndoc for sizeof_field() and offsetofend()

Adjust the comment styles so these are correctly identified as valid
kern-doc.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/stddef.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 998a4ba28eba..8553b33143d1 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -20,7 +20,7 @@ enum {
 #endif
 
 /**
- * sizeof_field(TYPE, MEMBER)
+ * sizeof_field() - Report the size of a struct field in bytes
  *
  * @TYPE: The structure containing the field of interest
  * @MEMBER: The field to return the size of
@@ -28,7 +28,7 @@ enum {
 #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
 
 /**
- * offsetofend(TYPE, MEMBER)
+ * offsetofend() - Report the offset of a struct field within the struct
  *
  * @TYPE: The type of the structure
  * @MEMBER: The member within the structure to get the end offset of
-- 
cgit v1.2.3


From 50d7bd38c3aafc4749e05e8d7fcb616979143602 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 17 May 2021 20:01:15 -0700
Subject: stddef: Introduce struct_group() helper macro

Kernel code has a regular need to describe groups of members within a
structure usually when they need to be copied or initialized separately
from the rest of the surrounding structure. The generally accepted design
pattern in C is to use a named sub-struct:

	struct foo {
		int one;
		struct {
			int two;
			int three, four;
		} thing;
		int five;
	};

This would allow for traditional references and sizing:

	memcpy(&dst.thing, &src.thing, sizeof(dst.thing));

However, doing this would mean that referencing struct members enclosed
by such named structs would always require including the sub-struct name
in identifiers:

	do_something(dst.thing.three);

This has tended to be quite inflexible, especially when such groupings
need to be added to established code which causes huge naming churn.
Three workarounds exist in the kernel for this problem, and each have
other negative properties.

To avoid the naming churn, there is a design pattern of adding macro
aliases for the named struct:

	#define f_three thing.three

This ends up polluting the global namespace, and makes it difficult to
search for identifiers.

Another common work-around in kernel code avoids the pollution by avoiding
the named struct entirely, instead identifying the group's boundaries using
either a pair of empty anonymous structs of a pair of zero-element arrays:

	struct foo {
		int one;
		struct { } start;
		int two;
		int three, four;
		struct { } finish;
		int five;
	};

	struct foo {
		int one;
		int start[0];
		int two;
		int three, four;
		int finish[0];
		int five;
	};

This allows code to avoid needing to use a sub-struct named for member
references within the surrounding structure, but loses the benefits of
being able to actually use such a struct, making it rather fragile. Using
these requires open-coded calculation of sizes and offsets. The efforts
made to avoid common mistakes include lots of comments, or adding various
BUILD_BUG_ON()s. Such code is left with no way for the compiler to reason
about the boundaries (e.g. the "start" object looks like it's 0 bytes
in length), making bounds checking depend on open-coded calculations:

	if (length > offsetof(struct foo, finish) -
		     offsetof(struct foo, start))
		return -EINVAL;
	memcpy(&dst.start, &src.start, offsetof(struct foo, finish) -
				       offsetof(struct foo, start));

However, the vast majority of places in the kernel that operate on
groups of members do so without any identification of the grouping,
relying either on comments or implicit knowledge of the struct contents,
which is even harder for the compiler to reason about, and results in
even more fragile manual sizing, usually depending on member locations
outside of the region (e.g. to copy "two" and "three", use the start of
"four" to find the size):

	BUILD_BUG_ON((offsetof(struct foo, four) <
		      offsetof(struct foo, two)) ||
		     (offsetof(struct foo, four) <
		      offsetof(struct foo, three));
	if (length > offsetof(struct foo, four) -
		     offsetof(struct foo, two))
		return -EINVAL;
	memcpy(&dst.two, &src.two, length);

In order to have a regular programmatic way to describe a struct
region that can be used for references and sizing, can be examined for
bounds checking, avoids forcing the use of intermediate identifiers,
and avoids polluting the global namespace, introduce the struct_group()
macro. This macro wraps the member declarations to create an anonymous
union of an anonymous struct (no intermediate name) and a named struct
(for references and sizing):

	struct foo {
		int one;
		struct_group(thing,
			int two;
			int three, four;
		);
		int five;
	};

	if (length > sizeof(src.thing))
		return -EINVAL;
	memcpy(&dst.thing, &src.thing, length);
	do_something(dst.three);

There are some rare cases where the resulting struct_group() needs
attributes added, so struct_group_attr() is also introduced to allow
for specifying struct attributes (e.g. __align(x) or __packed).
Additionally, there are places where such declarations would like to
have the struct be tagged, so struct_group_tagged() is added.

Given there is a need for a handful of UAPI uses too, the underlying
__struct_group() macro has been defined in UAPI so it can be used there
too.

To avoid confusing scripts/kernel-doc, hide the macro from its struct
parsing.

Co-developed-by: Keith Packard <keithp@keithp.com>
Signed-off-by: Keith Packard <keithp@keithp.com>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/lkml/20210728023217.GC35706@embeddedor
Enhanced-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/lkml/41183a98-bdb9-4ad6-7eab-5a7292a6df84@rasmusvillemoes.dk
Enhanced-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/lkml/1d9a2e6df2a9a35b2cdd50a9a68cac5991e7e5f0.camel@intel.com
Enhanced-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://lore.kernel.org/lkml/YQKa76A6XuFqgM03@phenom.ffwll.local
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/stddef.h      | 48 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/stddef.h | 21 ++++++++++++++++++++
 scripts/kernel-doc          |  7 +++++++
 3 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 8553b33143d1..8b103a53b000 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -36,4 +36,52 @@ enum {
 #define offsetofend(TYPE, MEMBER) \
 	(offsetof(TYPE, MEMBER)	+ sizeof_field(TYPE, MEMBER))
 
+/**
+ * struct_group() - Wrap a set of declarations in a mirrored struct
+ *
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members.
+ */
+#define struct_group(NAME, MEMBERS...)	\
+	__struct_group(/* no tag */, NAME, /* no attrs */, MEMBERS)
+
+/**
+ * struct_group_attr() - Create a struct_group() with trailing attributes
+ *
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @ATTRS: Any struct attributes to apply
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members. Includes structure attributes argument.
+ */
+#define struct_group_attr(NAME, ATTRS, MEMBERS...) \
+	__struct_group(/* no tag */, NAME, ATTRS, MEMBERS)
+
+/**
+ * struct_group_tagged() - Create a struct_group with a reusable tag
+ *
+ * @TAG: The tag name for the named sub-struct
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical
+ * layout and size: one anonymous and one named. The former can be
+ * used normally without sub-struct naming, and the latter can be
+ * used to reason about the start, end, and size of the group of
+ * struct members. Includes struct tag argument for the named copy,
+ * so the specified layout can be reused later.
+ */
+#define struct_group_tagged(TAG, NAME, MEMBERS...) \
+	__struct_group(TAG, NAME, /* no attrs */, MEMBERS)
+
 #endif
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index ee8220f8dcf5..610204f7c275 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -4,3 +4,24 @@
 #ifndef __always_inline
 #define __always_inline inline
 #endif
+
+/**
+ * __struct_group() - Create a mirrored named and anonyomous struct
+ *
+ * @TAG: The tag name for the named sub-struct (usually empty)
+ * @NAME: The identifier name of the mirrored sub-struct
+ * @ATTRS: Any struct attributes (usually empty)
+ * @MEMBERS: The member declarations for the mirrored structs
+ *
+ * Used to create an anonymous union of two structs with identical layout
+ * and size: one anonymous and one named. The former's members can be used
+ * normally without sub-struct naming, and the latter can be used to
+ * reason about the start, end, and size of the group of struct members.
+ * The named struct can also be explicitly tagged for layer reuse, as well
+ * as both having struct attributes appended.
+ */
+#define __struct_group(TAG, NAME, ATTRS, MEMBERS...) \
+	union { \
+		struct { MEMBERS } ATTRS; \
+		struct TAG { MEMBERS } ATTRS NAME; \
+	}
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index cfcb60737957..38aa799a776c 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1245,6 +1245,13 @@ sub dump_struct($$) {
 	$members =~ s/\s*CRYPTO_MINALIGN_ATTR/ /gos;
 	$members =~ s/\s*____cacheline_aligned_in_smp/ /gos;
 	$members =~ s/\s*____cacheline_aligned/ /gos;
+	# unwrap struct_group():
+	# - first eat non-declaration parameters and rewrite for final match
+	# - then remove macro, outer parens, and trailing semicolon
+	$members =~ s/\bstruct_group\s*\(([^,]*,)/STRUCT_GROUP(/gos;
+	$members =~ s/\bstruct_group_(attr|tagged)\s*\(([^,]*,){2}/STRUCT_GROUP(/gos;
+	$members =~ s/\b__struct_group\s*\(([^,]*,){3}/STRUCT_GROUP(/gos;
+	$members =~ s/\bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;/$2/gos;
 
 	my $args = qr{([^,)]+)};
 	# replace DECLARE_BITMAP
-- 
cgit v1.2.3


From c80d92fbb67b2c80b8eeb8759ee79d676eb33520 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Jun 2021 22:48:05 -0700
Subject: compiler_types.h: Remove __compiletime_object_size()

Since all compilers support __builtin_object_size(), and there is only
one user of __compiletime_object_size, remove it to avoid the needless
indirection. This lets Clang reason about check_copy_size() correctly.

Link: https://github.com/ClangBuiltLinux/linux/issues/1179
Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Arvind Sankar <nivedita@alum.mit.edu>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Sami Tolvanen <samitolvanen@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Gabriel Krisman Bertazi <krisman@collabora.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler-gcc.h   | 2 --
 include/linux/compiler_types.h | 5 -----
 include/linux/thread_info.h    | 2 +-
 3 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index bd2b881c6b63..9957085b8148 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -41,8 +41,6 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
-
 #if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index b6ff83a714ca..05ceb2e92b0e 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -290,11 +290,6 @@ struct ftrace_likely_data {
 	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-/* Compile time object size, -1 for unknown */
-#ifndef __compiletime_object_size
-# define __compiletime_object_size(obj) -1
-#endif
-
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 0999f6317978..ad0c4e041030 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -203,7 +203,7 @@ static inline void copy_overflow(int size, unsigned long count)
 static __always_inline __must_check bool
 check_copy_size(const void *addr, size_t bytes, bool is_source)
 {
-	int sz = __compiletime_object_size(addr);
+	int sz = __builtin_object_size(addr, 0);
 	if (unlikely(sz >= 0 && sz < bytes)) {
 		if (!__builtin_constant_p(bytes))
 			copy_overflow(sz, bytes);
-- 
cgit v1.2.3


From c430f60036af44079170ff71a461b9d7cf5ee431 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 14 Apr 2021 15:45:39 -0700
Subject: fortify: Move remaining fortify helpers into fortify-string.h

When commit a28a6e860c6c ("string.h: move fortified functions definitions
in a dedicated header.") moved the fortify-specific code, some helpers
were left behind. Move the remaining fortify-specific helpers into
fortify-string.h so they're together where they're used. This requires
that any FORTIFY helper function prototypes be conditionally built to
avoid "no prototype" warnings. Additionally removes unused helpers.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daniel Axtens <dja@axtens.net>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Acked-by: Francis Laniel <laniel_francis@privacyrequired.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 7 +++++++
 include/linux/string.h         | 9 ---------
 lib/string_helpers.c           | 2 ++
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index c1be37437e77..7e67d02764db 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -2,6 +2,13 @@
 #ifndef _LINUX_FORTIFY_STRING_H_
 #define _LINUX_FORTIFY_STRING_H_
 
+#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
+#define __RENAME(x) __asm__(#x)
+
+void fortify_panic(const char *name) __noreturn __cold;
+void __read_overflow(void) __compiletime_error("detected read beyond size of object (1st parameter)");
+void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
+void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)");
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
diff --git a/include/linux/string.h b/include/linux/string.h
index 5e96d656be7a..ac1c769a5a80 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -249,15 +249,6 @@ static inline const char *kbasename(const char *path)
 	return tail ? tail + 1 : path;
 }
 
-#define __FORTIFY_INLINE extern __always_inline __attribute__((gnu_inline))
-#define __RENAME(x) __asm__(#x)
-
-void fortify_panic(const char *name) __noreturn __cold;
-void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter");
-void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter");
-void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter");
-void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
-
 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
 #include <linux/fortify-string.h>
 #endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index bde13612c25d..faa9d8e4e2c5 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -883,9 +883,11 @@ char *strreplace(char *s, char old, char new)
 }
 EXPORT_SYMBOL(strreplace);
 
+#ifdef CONFIG_FORTIFY_SOURCE
 void fortify_panic(const char *name)
 {
 	pr_emerg("detected buffer overflow in %s\n", name);
 	BUG();
 }
 EXPORT_SYMBOL(fortify_panic);
+#endif /* CONFIG_FORTIFY_SOURCE */
-- 
cgit v1.2.3


From 072af0c638dc8a5c7db2edc4dddbd6d44bee3bdb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 2 Aug 2021 10:25:01 -0700
Subject: fortify: Fix dropped strcpy() compile-time write overflow check

The implementation for intra-object overflow in str*-family functions
accidentally dropped compile-time write overflow checking in strcpy(),
leaving it entirely to run-time. Add back the intended check.

Fixes: 6a39e62abbaf ("lib: string.h: detect intra-object overflow in fortified string functions")
Cc: Daniel Axtens <dja@axtens.net>
Cc: Francis Laniel <laniel_francis@privacyrequired.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 include/linux/fortify-string.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 7e67d02764db..68bc5978d916 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -287,7 +287,10 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q)
 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
 		return __underlying_strcpy(p, q);
 	size = strlen(q) + 1;
-	/* test here to use the more stringent object size */
+	/* Compile-time check for const size overflow. */
+	if (__builtin_constant_p(size) && p_size < size)
+		__write_overflow();
+	/* Run-time check for dynamic size overflow. */
 	if (p_size < size)
 		fortify_panic(__func__);
 	memcpy(p, q, size);
-- 
cgit v1.2.3


From 369cd2165d7beac1db144b40811baa2c6b7d8c54 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Aug 2021 14:20:14 -0700
Subject: fortify: Prepare to improve strnlen() and strlen() warnings

In order to have strlen() use fortified strnlen() internally, swap their
positions in the source. Doing this as part of later changes makes
review difficult, so reoroder it here; no code changes.

Cc: Francis Laniel <laniel_francis@privacyrequired.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 include/linux/fortify-string.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 68bc5978d916..a3cb1d9aacce 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -56,6 +56,17 @@ __FORTIFY_INLINE char *strcat(char *p, const char *q)
 	return p;
 }
 
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+	size_t p_size = __builtin_object_size(p, 1);
+	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+
+	if (p_size <= ret && maxlen != ret)
+		fortify_panic(__func__);
+	return ret;
+}
+
 __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
 {
 	__kernel_size_t ret;
@@ -71,17 +82,6 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
 	return ret;
 }
 
-extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
-__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
-{
-	size_t p_size = __builtin_object_size(p, 1);
-	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
-
-	if (p_size <= ret && maxlen != ret)
-		fortify_panic(__func__);
-	return ret;
-}
-
 /* defined after fortified strlen to reuse it */
 extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
 __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
-- 
cgit v1.2.3


From 3009f891bb9f328945ebd5b71e12df7e2467f3dd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 2 Aug 2021 22:51:31 -0700
Subject: fortify: Allow strlen() and strnlen() to pass compile-time known
 lengths

Under CONFIG_FORTIFY_SOURCE, it is possible for the compiler to perform
strlen() and strnlen() at compile-time when the string size is known.
This is required to support compile-time overflow checking in strlcpy().

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 49 ++++++++++++++++++++++++++++++++----------
 1 file changed, 38 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index a3cb1d9aacce..fdb0a74c9ca2 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -10,6 +10,20 @@ void __read_overflow(void) __compiletime_error("detected read beyond size of obj
 void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
 void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)");
 
+#define __compiletime_strlen(p)				\
+({							\
+	unsigned char *__p = (unsigned char *)(p);      \
+	size_t ret = (size_t)-1;			\
+	size_t p_size = __builtin_object_size(p, 1);	\
+	if (p_size != (size_t)-1) {			\
+		size_t p_len = p_size - 1;		\
+		if (__builtin_constant_p(__p[p_len]) &&	\
+		    __p[p_len] == '\0')			\
+			ret = __builtin_strlen(__p);	\
+	}						\
+	ret;						\
+})
+
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
 extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
@@ -60,21 +74,31 @@ extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(st
 __FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
 {
 	size_t p_size = __builtin_object_size(p, 1);
-	__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+	size_t p_len = __compiletime_strlen(p);
+	size_t ret;
+
+	/* We can take compile-time actions when maxlen is const. */
+	if (__builtin_constant_p(maxlen) && p_len != (size_t)-1) {
+		/* If p is const, we can use its compile-time-known len. */
+		if (maxlen >= p_size)
+			return p_len;
+	}
 
+	/* Do not check characters beyond the end of p. */
+	ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
 	if (p_size <= ret && maxlen != ret)
 		fortify_panic(__func__);
 	return ret;
 }
 
+/* defined after fortified strnlen to reuse it. */
 __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
 {
 	__kernel_size_t ret;
 	size_t p_size = __builtin_object_size(p, 1);
 
-	/* Work around gcc excess stack consumption issue */
-	if (p_size == (size_t)-1 ||
-		(__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
+	/* Give up if we don't know how large p is. */
+	if (p_size == (size_t)-1)
 		return __underlying_strlen(p);
 	ret = strnlen(p, p_size);
 	if (p_size <= ret)
@@ -86,24 +110,27 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p)
 extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
 __FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
 {
-	size_t ret;
 	size_t p_size = __builtin_object_size(p, 1);
 	size_t q_size = __builtin_object_size(q, 1);
+	size_t q_len;	/* Full count of source string length. */
+	size_t len;	/* Count of characters going into destination. */
 
 	if (p_size == (size_t)-1 && q_size == (size_t)-1)
 		return __real_strlcpy(p, q, size);
-	ret = strlen(q);
-	if (size) {
-		size_t len = (ret >= size) ? size - 1 : ret;
-
-		if (__builtin_constant_p(len) && len >= p_size)
+	q_len = strlen(q);
+	len = (q_len >= size) ? size - 1 : q_len;
+	if (__builtin_constant_p(size) && __builtin_constant_p(q_len) && size) {
+		/* Write size is always larger than destination. */
+		if (len >= p_size)
 			__write_overflow();
+	}
+	if (size) {
 		if (len >= p_size)
 			fortify_panic(__func__);
 		__underlying_memcpy(p, q, len);
 		p[len] = '\0';
 	}
-	return ret;
+	return q_len;
 }
 
 /* defined after fortified strnlen to reuse it */
-- 
cgit v1.2.3


From 405fca8a946168e71c04b82cc80727c3ea686e08 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Fri, 24 Sep 2021 06:00:47 -0400
Subject: ieee80211: add power type definition for 6 GHz

6 GHz regulatory domains introduces different modes for 6 GHz AP
operations: Low Power Indoor (LPI), Standard Power (SP) and Very
Low Power (VLP). 6 GHz STAs could be operated as either Regular or
Subordinate clients. Define the flags for power type
of AP and STATION mode.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Link: https://lore.kernel.org/r/20210924100052.32029-2-wgong@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 38 ++++++++++++++++++++++++++++++++++++++
 include/net/mac80211.h    |  2 ++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 694264503119..420dea9aa648 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1988,6 +1988,44 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 			      int mcs, bool ext_nss_bw_capable,
 			      unsigned int max_vht_nss);
 
+/**
+ * enum ieee80211_ap_reg_power - regulatory power for a Access Point
+ *
+ * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode
+ * @IEEE80211_REG_LPI: Indoor Access Point
+ * @IEEE80211_REG_SP: Standard power Access Point
+ * @IEEE80211_REG_VLP: Very low power Access Point
+ * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal
+ * @IEEE80211_REG_AP_POWER_MAX: maximum value
+ */
+enum ieee80211_ap_reg_power {
+	IEEE80211_REG_UNSET_AP,
+	IEEE80211_REG_LPI_AP,
+	IEEE80211_REG_SP_AP,
+	IEEE80211_REG_VLP_AP,
+	IEEE80211_REG_AP_POWER_AFTER_LAST,
+	IEEE80211_REG_AP_POWER_MAX =
+		IEEE80211_REG_AP_POWER_AFTER_LAST - 1,
+};
+
+/**
+ * enum ieee80211_client_reg_power - regulatory power for a client
+ *
+ * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode
+ * @IEEE80211_REG_DEFAULT_CLIENT: Default Client
+ * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client
+ * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal
+ * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value
+ */
+enum ieee80211_client_reg_power {
+	IEEE80211_REG_UNSET_CLIENT,
+	IEEE80211_REG_DEFAULT_CLIENT,
+	IEEE80211_REG_SUBORDINATE_CLIENT,
+	IEEE80211_REG_CLIENT_POWER_AFTER_LAST,
+	IEEE80211_REG_CLIENT_POWER_MAX =
+		IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1,
+};
+
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
 #define IEEE80211_HE_MAC_CAP0_TWT_REQ				0x02
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index af0fc13cea34..8923a9fc4126 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -632,6 +632,7 @@ struct ieee80211_fils_discovery {
  * @s1g: BSS is S1G BSS (affects Association Request format).
  * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed
  *	to driver when rate control is offloaded to firmware.
+ * @power_type: power type of BSS for 6 GHz
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -702,6 +703,7 @@ struct ieee80211_bss_conf {
 	u32 unsol_bcast_probe_resp_interval;
 	bool s1g;
 	struct cfg80211_bitrate_mask beacon_tx_rate;
+	enum ieee80211_ap_reg_power power_type;
 };
 
 /**
-- 
cgit v1.2.3


From 930dfa563155179861470b2aba880eac2ae30bfb Mon Sep 17 00:00:00 2001
From: Min Li <min.li.xe@renesas.com>
Date: Fri, 24 Sep 2021 15:01:32 -0400
Subject: ptp: clockmatrix: use rsmu driver to access i2c/spi bus

rsmu (Renesas Synchronization Management Unit ) driver is located in
drivers/mfd and responsible for creating multiple devices including
clockmatrix phc, which will then use the exposed regmap and mutex
handle to access i2c/spi bus.

Signed-off-by: Min Li <min.li.xe@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/idt8a340_reg.h       | 783 ---------------------------------------
 drivers/ptp/ptp_clockmatrix.c    | 772 ++++++++++++++++++++------------------
 drivers/ptp/ptp_clockmatrix.h    | 117 +-----
 include/linux/mfd/idt8a340_reg.h |  31 +-
 4 files changed, 461 insertions(+), 1242 deletions(-)
 delete mode 100644 drivers/ptp/idt8a340_reg.h

(limited to 'include/linux')

diff --git a/drivers/ptp/idt8a340_reg.h b/drivers/ptp/idt8a340_reg.h
deleted file mode 100644
index 1c5210187110..000000000000
--- a/drivers/ptp/idt8a340_reg.h
+++ /dev/null
@@ -1,783 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/* idt8a340_reg.h
- *
- * Originally generated by regen.tcl on Thu Feb 14 19:23:44 PST 2019
- * https://github.com/richardcochran/regen
- *
- * Hand modified to include some HW registers.
- * Based on 5.2.0, Family Programming Guide (Sept 30, 2020)
- */
-#ifndef HAVE_IDT8A340_REG
-#define HAVE_IDT8A340_REG
-
-#define PAGE_ADDR_BASE                    0x0000
-#define PAGE_ADDR                         0x00fc
-
-#define HW_REVISION                       0x8180
-#define REV_ID                            0x007a
-
-#define HW_DPLL_0                         (0x8a00)
-#define HW_DPLL_1                         (0x8b00)
-#define HW_DPLL_2                         (0x8c00)
-#define HW_DPLL_3                         (0x8d00)
-#define HW_DPLL_4                         (0x8e00)
-#define HW_DPLL_5                         (0x8f00)
-#define HW_DPLL_6                         (0x9000)
-#define HW_DPLL_7                         (0x9100)
-
-#define HW_DPLL_TOD_SW_TRIG_ADDR__0       (0x080)
-#define HW_DPLL_TOD_CTRL_1                (0x089)
-#define HW_DPLL_TOD_CTRL_2                (0x08A)
-#define HW_DPLL_TOD_OVR__0                (0x098)
-#define HW_DPLL_TOD_OUT_0__0              (0x0B0)
-
-#define HW_Q0_Q1_CH_SYNC_CTRL_0           (0xa740)
-#define HW_Q0_Q1_CH_SYNC_CTRL_1           (0xa741)
-#define HW_Q2_Q3_CH_SYNC_CTRL_0           (0xa742)
-#define HW_Q2_Q3_CH_SYNC_CTRL_1           (0xa743)
-#define HW_Q4_Q5_CH_SYNC_CTRL_0           (0xa744)
-#define HW_Q4_Q5_CH_SYNC_CTRL_1           (0xa745)
-#define HW_Q6_Q7_CH_SYNC_CTRL_0           (0xa746)
-#define HW_Q6_Q7_CH_SYNC_CTRL_1           (0xa747)
-#define HW_Q8_CH_SYNC_CTRL_0              (0xa748)
-#define HW_Q8_CH_SYNC_CTRL_1              (0xa749)
-#define HW_Q9_CH_SYNC_CTRL_0              (0xa74a)
-#define HW_Q9_CH_SYNC_CTRL_1              (0xa74b)
-#define HW_Q10_CH_SYNC_CTRL_0             (0xa74c)
-#define HW_Q10_CH_SYNC_CTRL_1             (0xa74d)
-#define HW_Q11_CH_SYNC_CTRL_0             (0xa74e)
-#define HW_Q11_CH_SYNC_CTRL_1             (0xa74f)
-
-#define SYNC_SOURCE_DPLL0_TOD_PPS	0x14
-#define SYNC_SOURCE_DPLL1_TOD_PPS	0x15
-#define SYNC_SOURCE_DPLL2_TOD_PPS	0x16
-#define SYNC_SOURCE_DPLL3_TOD_PPS	0x17
-
-#define SYNCTRL1_MASTER_SYNC_RST	BIT(7)
-#define SYNCTRL1_MASTER_SYNC_TRIG	BIT(5)
-#define SYNCTRL1_TOD_SYNC_TRIG		BIT(4)
-#define SYNCTRL1_FBDIV_FRAME_SYNC_TRIG	BIT(3)
-#define SYNCTRL1_FBDIV_SYNC_TRIG	BIT(2)
-#define SYNCTRL1_Q1_DIV_SYNC_TRIG	BIT(1)
-#define SYNCTRL1_Q0_DIV_SYNC_TRIG	BIT(0)
-
-#define HW_Q8_CTRL_SPARE  (0xa7d4)
-#define HW_Q11_CTRL_SPARE (0xa7ec)
-
-/**
- * Select FOD5 as sync_trigger for Q8 divider.
- * Transition from logic zero to one
- * sets trigger to sync Q8 divider.
- *
- * Unused when FOD4 is driving Q8 divider (normal operation).
- */
-#define Q9_TO_Q8_SYNC_TRIG  BIT(1)
-
-/**
- * Enable FOD5 as driver for clock and sync for Q8 divider.
- * Enable fanout buffer for FOD5.
- *
- * Unused when FOD4 is driving Q8 divider (normal operation).
- */
-#define Q9_TO_Q8_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK  (BIT(0) | BIT(2))
-
-/**
- * Select FOD6 as sync_trigger for Q11 divider.
- * Transition from logic zero to one
- * sets trigger to sync Q11 divider.
- *
- * Unused when FOD7 is driving Q11 divider (normal operation).
- */
-#define Q10_TO_Q11_SYNC_TRIG  BIT(1)
-
-/**
- * Enable FOD6 as driver for clock and sync for Q11 divider.
- * Enable fanout buffer for FOD6.
- *
- * Unused when FOD7 is driving Q11 divider (normal operation).
- */
-#define Q10_TO_Q11_FANOUT_AND_CLOCK_SYNC_ENABLE_MASK  (BIT(0) | BIT(2))
-
-#define RESET_CTRL                        0xc000
-#define SM_RESET                          0x0012
-#define SM_RESET_V520                     0x0013
-#define SM_RESET_CMD                      0x5A
-
-#define GENERAL_STATUS                    0xc014
-#define BOOT_STATUS                       0x0000
-#define HW_REV_ID                         0x000A
-#define BOND_ID                           0x000B
-#define HW_CSR_ID                         0x000C
-#define HW_IRQ_ID                         0x000E
-
-#define MAJ_REL                           0x0010
-#define MIN_REL                           0x0011
-#define HOTFIX_REL                        0x0012
-
-#define PIPELINE_ID                       0x0014
-#define BUILD_ID                          0x0018
-
-#define JTAG_DEVICE_ID                    0x001c
-#define PRODUCT_ID                        0x001e
-
-#define OTP_SCSR_CONFIG_SELECT            0x0022
-
-#define STATUS                            0xc03c
-#define DPLL_SYS_STATUS                   0x0020
-#define DPLL_SYS_APLL_STATUS              0x0021
-#define USER_GPIO0_TO_7_STATUS            0x008a
-#define USER_GPIO8_TO_15_STATUS           0x008b
-
-#define GPIO_USER_CONTROL                 0xc160
-#define GPIO0_TO_7_OUT                    0x0000
-#define GPIO8_TO_15_OUT                   0x0001
-#define GPIO0_TO_7_OUT_V520               0x0002
-#define GPIO8_TO_15_OUT_V520              0x0003
-
-#define STICKY_STATUS_CLEAR               0xc164
-
-#define GPIO_TOD_NOTIFICATION_CLEAR       0xc16c
-
-#define ALERT_CFG                         0xc188
-
-#define SYS_DPLL_XO                       0xc194
-
-#define SYS_APLL                          0xc19c
-
-#define INPUT_0                           0xc1b0
-
-#define INPUT_1                           0xc1c0
-
-#define INPUT_2                           0xc1d0
-
-#define INPUT_3                           0xc200
-
-#define INPUT_4                           0xc210
-
-#define INPUT_5                           0xc220
-
-#define INPUT_6                           0xc230
-
-#define INPUT_7                           0xc240
-
-#define INPUT_8                           0xc250
-
-#define INPUT_9                           0xc260
-
-#define INPUT_10                          0xc280
-
-#define INPUT_11                          0xc290
-
-#define INPUT_12                          0xc2a0
-
-#define INPUT_13                          0xc2b0
-
-#define INPUT_14                          0xc2c0
-
-#define INPUT_15                          0xc2d0
-
-#define REF_MON_0                         0xc2e0
-
-#define REF_MON_1                         0xc2ec
-
-#define REF_MON_2                         0xc300
-
-#define REF_MON_3                         0xc30c
-
-#define REF_MON_4                         0xc318
-
-#define REF_MON_5                         0xc324
-
-#define REF_MON_6                         0xc330
-
-#define REF_MON_7                         0xc33c
-
-#define REF_MON_8                         0xc348
-
-#define REF_MON_9                         0xc354
-
-#define REF_MON_10                        0xc360
-
-#define REF_MON_11                        0xc36c
-
-#define REF_MON_12                        0xc380
-
-#define REF_MON_13                        0xc38c
-
-#define REF_MON_14                        0xc398
-
-#define REF_MON_15                        0xc3a4
-
-#define DPLL_0                            0xc3b0
-#define DPLL_CTRL_REG_0                   0x0002
-#define DPLL_CTRL_REG_1                   0x0003
-#define DPLL_CTRL_REG_2                   0x0004
-#define DPLL_TOD_SYNC_CFG                 0x0031
-#define DPLL_COMBO_SLAVE_CFG_0            0x0032
-#define DPLL_COMBO_SLAVE_CFG_1            0x0033
-#define DPLL_SLAVE_REF_CFG                0x0034
-#define DPLL_REF_MODE                     0x0035
-#define DPLL_PHASE_MEASUREMENT_CFG        0x0036
-#define DPLL_MODE                         0x0037
-#define DPLL_MODE_V520                    0x003B
-
-#define DPLL_1                            0xc400
-
-#define DPLL_2                            0xc438
-#define DPLL_2_V520                       0xc43c
-
-#define DPLL_3                            0xc480
-
-#define DPLL_4                            0xc4b8
-#define DPLL_4_V520                       0xc4bc
-
-#define DPLL_5                            0xc500
-
-#define DPLL_6                            0xc538
-#define DPLL_6_V520                       0xc53c
-
-#define DPLL_7                            0xc580
-
-#define SYS_DPLL                          0xc5b8
-#define SYS_DPLL_V520                     0xc5bc
-
-#define DPLL_CTRL_0                       0xc600
-#define DPLL_CTRL_DPLL_MANU_REF_CFG       0x0001
-#define DPLL_CTRL_COMBO_MASTER_CFG        0x003a
-
-#define DPLL_CTRL_1                       0xc63c
-
-#define DPLL_CTRL_2                       0xc680
-
-#define DPLL_CTRL_3                       0xc6bc
-
-#define DPLL_CTRL_4                       0xc700
-
-#define DPLL_CTRL_5                       0xc73c
-
-#define DPLL_CTRL_6                       0xc780
-
-#define DPLL_CTRL_7                       0xc7bc
-
-#define SYS_DPLL_CTRL                     0xc800
-
-#define DPLL_PHASE_0                      0xc818
-
-/* Signed 42-bit FFO in units of 2^(-53) */
-#define DPLL_WR_PHASE                     0x0000
-
-#define DPLL_PHASE_1                      0xc81c
-
-#define DPLL_PHASE_2                      0xc820
-
-#define DPLL_PHASE_3                      0xc824
-
-#define DPLL_PHASE_4                      0xc828
-
-#define DPLL_PHASE_5                      0xc82c
-
-#define DPLL_PHASE_6                      0xc830
-
-#define DPLL_PHASE_7                      0xc834
-
-#define DPLL_FREQ_0                       0xc838
-
-/* Signed 42-bit FFO in units of 2^(-53) */
-#define DPLL_WR_FREQ                      0x0000
-
-#define DPLL_FREQ_1                       0xc840
-
-#define DPLL_FREQ_2                       0xc848
-
-#define DPLL_FREQ_3                       0xc850
-
-#define DPLL_FREQ_4                       0xc858
-
-#define DPLL_FREQ_5                       0xc860
-
-#define DPLL_FREQ_6                       0xc868
-
-#define DPLL_FREQ_7                       0xc870
-
-#define DPLL_PHASE_PULL_IN_0              0xc880
-#define PULL_IN_OFFSET                    0x0000 /* Signed 32 bit */
-#define PULL_IN_SLOPE_LIMIT               0x0004 /* Unsigned 24 bit */
-#define PULL_IN_CTRL                      0x0007
-
-#define DPLL_PHASE_PULL_IN_1              0xc888
-
-#define DPLL_PHASE_PULL_IN_2              0xc890
-
-#define DPLL_PHASE_PULL_IN_3              0xc898
-
-#define DPLL_PHASE_PULL_IN_4              0xc8a0
-
-#define DPLL_PHASE_PULL_IN_5              0xc8a8
-
-#define DPLL_PHASE_PULL_IN_6              0xc8b0
-
-#define DPLL_PHASE_PULL_IN_7              0xc8b8
-
-#define GPIO_CFG                          0xc8c0
-#define GPIO_CFG_GBL                      0x0000
-
-#define GPIO_0                            0xc8c2
-#define GPIO_DCO_INC_DEC                  0x0000
-#define GPIO_OUT_CTRL_0                   0x0001
-#define GPIO_OUT_CTRL_1                   0x0002
-#define GPIO_TOD_TRIG                     0x0003
-#define GPIO_DPLL_INDICATOR               0x0004
-#define GPIO_LOS_INDICATOR                0x0005
-#define GPIO_REF_INPUT_DSQ_0              0x0006
-#define GPIO_REF_INPUT_DSQ_1              0x0007
-#define GPIO_REF_INPUT_DSQ_2              0x0008
-#define GPIO_REF_INPUT_DSQ_3              0x0009
-#define GPIO_MAN_CLK_SEL_0                0x000a
-#define GPIO_MAN_CLK_SEL_1                0x000b
-#define GPIO_MAN_CLK_SEL_2                0x000c
-#define GPIO_SLAVE                        0x000d
-#define GPIO_ALERT_OUT_CFG                0x000e
-#define GPIO_TOD_NOTIFICATION_CFG         0x000f
-#define GPIO_CTRL                         0x0010
-#define GPIO_CTRL_V520                    0x0011
-
-#define GPIO_1                            0xc8d4
-
-#define GPIO_2                            0xc8e6
-
-#define GPIO_3                            0xc900
-
-#define GPIO_4                            0xc912
-
-#define GPIO_5                            0xc924
-
-#define GPIO_6                            0xc936
-
-#define GPIO_7                            0xc948
-
-#define GPIO_8                            0xc95a
-
-#define GPIO_9                            0xc980
-
-#define GPIO_10                           0xc992
-
-#define GPIO_11                           0xc9a4
-
-#define GPIO_12                           0xc9b6
-
-#define GPIO_13                           0xc9c8
-
-#define GPIO_14                           0xc9da
-
-#define GPIO_15                           0xca00
-
-#define OUT_DIV_MUX                       0xca12
-
-#define OUTPUT_0                          0xca14
-#define OUTPUT_0_V520                     0xca20
-/* FOD frequency output divider value */
-#define OUT_DIV                           0x0000
-#define OUT_DUTY_CYCLE_HIGH               0x0004
-#define OUT_CTRL_0                        0x0008
-#define OUT_CTRL_1                        0x0009
-/* Phase adjustment in FOD cycles */
-#define OUT_PHASE_ADJ                     0x000c
-
-#define OUTPUT_1                          0xca24
-#define OUTPUT_1_V520                     0xca30
-
-#define OUTPUT_2                          0xca34
-#define OUTPUT_2_V520                     0xca40
-
-#define OUTPUT_3                          0xca44
-#define OUTPUT_3_V520                     0xca50
-
-#define OUTPUT_4                          0xca54
-#define OUTPUT_4_V520                     0xca60
-
-#define OUTPUT_5                          0xca64
-#define OUTPUT_5_V520                     0xca80
-
-#define OUTPUT_6                          0xca80
-#define OUTPUT_6_V520                     0xca90
-
-#define OUTPUT_7                          0xca90
-#define OUTPUT_7_V520                     0xcaa0
-
-#define OUTPUT_8                          0xcaa0
-#define OUTPUT_8_V520                     0xcab0
-
-#define OUTPUT_9                          0xcab0
-#define OUTPUT_9_V520                     0xcac0
-
-#define OUTPUT_10                         0xcac0
-#define OUTPUT_10_V520                     0xcad0
-
-#define OUTPUT_11                         0xcad0
-#define OUTPUT_11_V520                    0xcae0
-
-#define SERIAL                            0xcae0
-#define SERIAL_V520                       0xcaf0
-
-#define PWM_ENCODER_0                     0xcb00
-
-#define PWM_ENCODER_1                     0xcb08
-
-#define PWM_ENCODER_2                     0xcb10
-
-#define PWM_ENCODER_3                     0xcb18
-
-#define PWM_ENCODER_4                     0xcb20
-
-#define PWM_ENCODER_5                     0xcb28
-
-#define PWM_ENCODER_6                     0xcb30
-
-#define PWM_ENCODER_7                     0xcb38
-
-#define PWM_DECODER_0                     0xcb40
-
-#define PWM_DECODER_1                     0xcb48
-#define PWM_DECODER_1_V520                0xcb4a
-
-#define PWM_DECODER_2                     0xcb50
-#define PWM_DECODER_2_V520                0xcb54
-
-#define PWM_DECODER_3                     0xcb58
-#define PWM_DECODER_3_V520                0xcb5e
-
-#define PWM_DECODER_4                     0xcb60
-#define PWM_DECODER_4_V520                0xcb68
-
-#define PWM_DECODER_5                     0xcb68
-#define PWM_DECODER_5_V520                0xcb80
-
-#define PWM_DECODER_6                     0xcb70
-#define PWM_DECODER_6_V520                0xcb8a
-
-#define PWM_DECODER_7                     0xcb80
-#define PWM_DECODER_7_V520                0xcb94
-
-#define PWM_DECODER_8                     0xcb88
-#define PWM_DECODER_8_V520                0xcb9e
-
-#define PWM_DECODER_9                     0xcb90
-#define PWM_DECODER_9_V520                0xcba8
-
-#define PWM_DECODER_10                    0xcb98
-#define PWM_DECODER_10_V520               0xcbb2
-
-#define PWM_DECODER_11                    0xcba0
-#define PWM_DECODER_11_V520               0xcbbc
-
-#define PWM_DECODER_12                    0xcba8
-#define PWM_DECODER_12_V520               0xcbc6
-
-#define PWM_DECODER_13                    0xcbb0
-#define PWM_DECODER_13_V520               0xcbd0
-
-#define PWM_DECODER_14                    0xcbb8
-#define PWM_DECODER_14_V520               0xcbda
-
-#define PWM_DECODER_15                    0xcbc0
-#define PWM_DECODER_15_V520               0xcbe4
-
-#define PWM_USER_DATA                     0xcbc8
-#define PWM_USER_DATA_V520                0xcbf0
-
-#define TOD_0                             0xcbcc
-#define TOD_0_V520                        0xcc00
-
-/* Enable TOD counter, output channel sync and even-PPS mode */
-#define TOD_CFG                           0x0000
-#define TOD_CFG_V520                      0x0001
-
-#define TOD_1                             0xcbce
-#define TOD_1_V520                        0xcc02
-
-#define TOD_2                             0xcbd0
-#define TOD_2_V520                        0xcc04
-
-#define TOD_3                             0xcbd2
-#define TOD_3_V520                        0xcc06
-
-
-#define TOD_WRITE_0                       0xcc00
-#define TOD_WRITE_0_V520                  0xcc10
-/* 8-bit subns, 32-bit ns, 48-bit seconds */
-#define TOD_WRITE                         0x0000
-/* Counter increments after TOD write is completed */
-#define TOD_WRITE_COUNTER                 0x000c
-/* TOD write trigger configuration */
-#define TOD_WRITE_SELECT_CFG_0            0x000d
-/* TOD write trigger selection */
-#define TOD_WRITE_CMD                     0x000f
-
-#define TOD_WRITE_1                       0xcc10
-#define TOD_WRITE_1_V520                  0xcc20
-
-#define TOD_WRITE_2                       0xcc20
-#define TOD_WRITE_2_V520                  0xcc30
-
-#define TOD_WRITE_3                       0xcc30
-#define TOD_WRITE_3_V520                  0xcc40
-
-#define TOD_READ_PRIMARY_0                0xcc40
-#define TOD_READ_PRIMARY_0_V520           0xcc50
-/* 8-bit subns, 32-bit ns, 48-bit seconds */
-#define TOD_READ_PRIMARY                  0x0000
-/* Counter increments after TOD write is completed */
-#define TOD_READ_PRIMARY_COUNTER          0x000b
-/* Read trigger configuration */
-#define TOD_READ_PRIMARY_SEL_CFG_0        0x000c
-/* Read trigger selection */
-#define TOD_READ_PRIMARY_CMD              0x000e
-#define TOD_READ_PRIMARY_CMD_V520         0x000f
-
-#define TOD_READ_PRIMARY_1                0xcc50
-#define TOD_READ_PRIMARY_1_V520           0xcc60
-
-#define TOD_READ_PRIMARY_2                0xcc60
-#define TOD_READ_PRIMARY_2_V520           0xcc80
-
-#define TOD_READ_PRIMARY_3                0xcc80
-#define TOD_READ_PRIMARY_3_V520           0xcc90
-
-#define TOD_READ_SECONDARY_0              0xcc90
-#define TOD_READ_SECONDARY_0_V520         0xcca0
-
-#define TOD_READ_SECONDARY_1              0xcca0
-#define TOD_READ_SECONDARY_1_V520         0xccb0
-
-#define TOD_READ_SECONDARY_2              0xccb0
-#define TOD_READ_SECONDARY_2_V520         0xccc0
-
-#define TOD_READ_SECONDARY_3              0xccc0
-#define TOD_READ_SECONDARY_3_V520         0xccd0
-
-#define OUTPUT_TDC_CFG                    0xccd0
-#define OUTPUT_TDC_CFG_V520               0xcce0
-
-#define OUTPUT_TDC_0                      0xcd00
-
-#define OUTPUT_TDC_1                      0xcd08
-
-#define OUTPUT_TDC_2                      0xcd10
-
-#define OUTPUT_TDC_3                      0xcd18
-
-#define INPUT_TDC                         0xcd20
-
-#define SCRATCH                           0xcf50
-#define SCRATCH_V520                      0xcf4c
-
-#define EEPROM                            0xcf68
-#define EEPROM_V520                       0xcf64
-
-#define OTP                               0xcf70
-
-#define BYTE                              0xcf80
-
-/* Bit definitions for the MAJ_REL register */
-#define MAJOR_SHIFT                       (1)
-#define MAJOR_MASK                        (0x7f)
-#define PR_BUILD                          BIT(0)
-
-/* Bit definitions for the USER_GPIO0_TO_7_STATUS register */
-#define GPIO0_LEVEL                       BIT(0)
-#define GPIO1_LEVEL                       BIT(1)
-#define GPIO2_LEVEL                       BIT(2)
-#define GPIO3_LEVEL                       BIT(3)
-#define GPIO4_LEVEL                       BIT(4)
-#define GPIO5_LEVEL                       BIT(5)
-#define GPIO6_LEVEL                       BIT(6)
-#define GPIO7_LEVEL                       BIT(7)
-
-/* Bit definitions for the USER_GPIO8_TO_15_STATUS register */
-#define GPIO8_LEVEL                       BIT(0)
-#define GPIO9_LEVEL                       BIT(1)
-#define GPIO10_LEVEL                      BIT(2)
-#define GPIO11_LEVEL                      BIT(3)
-#define GPIO12_LEVEL                      BIT(4)
-#define GPIO13_LEVEL                      BIT(5)
-#define GPIO14_LEVEL                      BIT(6)
-#define GPIO15_LEVEL                      BIT(7)
-
-/* Bit definitions for the GPIO0_TO_7_OUT register */
-#define GPIO0_DRIVE_LEVEL                 BIT(0)
-#define GPIO1_DRIVE_LEVEL                 BIT(1)
-#define GPIO2_DRIVE_LEVEL                 BIT(2)
-#define GPIO3_DRIVE_LEVEL                 BIT(3)
-#define GPIO4_DRIVE_LEVEL                 BIT(4)
-#define GPIO5_DRIVE_LEVEL                 BIT(5)
-#define GPIO6_DRIVE_LEVEL                 BIT(6)
-#define GPIO7_DRIVE_LEVEL                 BIT(7)
-
-/* Bit definitions for the GPIO8_TO_15_OUT register */
-#define GPIO8_DRIVE_LEVEL                 BIT(0)
-#define GPIO9_DRIVE_LEVEL                 BIT(1)
-#define GPIO10_DRIVE_LEVEL                BIT(2)
-#define GPIO11_DRIVE_LEVEL                BIT(3)
-#define GPIO12_DRIVE_LEVEL                BIT(4)
-#define GPIO13_DRIVE_LEVEL                BIT(5)
-#define GPIO14_DRIVE_LEVEL                BIT(6)
-#define GPIO15_DRIVE_LEVEL                BIT(7)
-
-/* Bit definitions for the DPLL_TOD_SYNC_CFG register */
-#define TOD_SYNC_SOURCE_SHIFT             (1)
-#define TOD_SYNC_SOURCE_MASK              (0x3)
-#define TOD_SYNC_EN                       BIT(0)
-
-/* Bit definitions for the DPLL_MODE register */
-#define WRITE_TIMER_MODE                  BIT(6)
-#define PLL_MODE_SHIFT                    (3)
-#define PLL_MODE_MASK                     (0x7)
-#define STATE_MODE_SHIFT                  (0)
-#define STATE_MODE_MASK                   (0x7)
-
-/* Bit definitions for the DPLL_MANU_REF_CFG register */
-#define MANUAL_REFERENCE_SHIFT            (0)
-#define MANUAL_REFERENCE_MASK             (0x1f)
-
-/* Bit definitions for the GPIO_CFG_GBL register */
-#define SUPPLY_MODE_SHIFT                 (0)
-#define SUPPLY_MODE_MASK                  (0x3)
-
-/* Bit definitions for the GPIO_DCO_INC_DEC register */
-#define INCDEC_DPLL_INDEX_SHIFT           (0)
-#define INCDEC_DPLL_INDEX_MASK            (0x7)
-
-/* Bit definitions for the GPIO_OUT_CTRL_0 register */
-#define CTRL_OUT_0                        BIT(0)
-#define CTRL_OUT_1                        BIT(1)
-#define CTRL_OUT_2                        BIT(2)
-#define CTRL_OUT_3                        BIT(3)
-#define CTRL_OUT_4                        BIT(4)
-#define CTRL_OUT_5                        BIT(5)
-#define CTRL_OUT_6                        BIT(6)
-#define CTRL_OUT_7                        BIT(7)
-
-/* Bit definitions for the GPIO_OUT_CTRL_1 register */
-#define CTRL_OUT_8                        BIT(0)
-#define CTRL_OUT_9                        BIT(1)
-#define CTRL_OUT_10                       BIT(2)
-#define CTRL_OUT_11                       BIT(3)
-#define CTRL_OUT_12                       BIT(4)
-#define CTRL_OUT_13                       BIT(5)
-#define CTRL_OUT_14                       BIT(6)
-#define CTRL_OUT_15                       BIT(7)
-
-/* Bit definitions for the GPIO_TOD_TRIG register */
-#define TOD_TRIG_0                        BIT(0)
-#define TOD_TRIG_1                        BIT(1)
-#define TOD_TRIG_2                        BIT(2)
-#define TOD_TRIG_3                        BIT(3)
-
-/* Bit definitions for the GPIO_DPLL_INDICATOR register */
-#define IND_DPLL_INDEX_SHIFT              (0)
-#define IND_DPLL_INDEX_MASK               (0x7)
-
-/* Bit definitions for the GPIO_LOS_INDICATOR register */
-#define REFMON_INDEX_SHIFT                (0)
-#define REFMON_INDEX_MASK                 (0xf)
-/* Active level of LOS indicator, 0=low 1=high */
-#define ACTIVE_LEVEL                      BIT(4)
-
-/* Bit definitions for the GPIO_REF_INPUT_DSQ_0 register */
-#define DSQ_INP_0                         BIT(0)
-#define DSQ_INP_1                         BIT(1)
-#define DSQ_INP_2                         BIT(2)
-#define DSQ_INP_3                         BIT(3)
-#define DSQ_INP_4                         BIT(4)
-#define DSQ_INP_5                         BIT(5)
-#define DSQ_INP_6                         BIT(6)
-#define DSQ_INP_7                         BIT(7)
-
-/* Bit definitions for the GPIO_REF_INPUT_DSQ_1 register */
-#define DSQ_INP_8                         BIT(0)
-#define DSQ_INP_9                         BIT(1)
-#define DSQ_INP_10                        BIT(2)
-#define DSQ_INP_11                        BIT(3)
-#define DSQ_INP_12                        BIT(4)
-#define DSQ_INP_13                        BIT(5)
-#define DSQ_INP_14                        BIT(6)
-#define DSQ_INP_15                        BIT(7)
-
-/* Bit definitions for the GPIO_REF_INPUT_DSQ_2 register */
-#define DSQ_DPLL_0                        BIT(0)
-#define DSQ_DPLL_1                        BIT(1)
-#define DSQ_DPLL_2                        BIT(2)
-#define DSQ_DPLL_3                        BIT(3)
-#define DSQ_DPLL_4                        BIT(4)
-#define DSQ_DPLL_5                        BIT(5)
-#define DSQ_DPLL_6                        BIT(6)
-#define DSQ_DPLL_7                        BIT(7)
-
-/* Bit definitions for the GPIO_REF_INPUT_DSQ_3 register */
-#define DSQ_DPLL_SYS                      BIT(0)
-#define GPIO_DSQ_LEVEL                    BIT(1)
-
-/* Bit definitions for the GPIO_TOD_NOTIFICATION_CFG register */
-#define DPLL_TOD_SHIFT                    (0)
-#define DPLL_TOD_MASK                     (0x3)
-#define TOD_READ_SECONDARY                BIT(2)
-#define GPIO_ASSERT_LEVEL                 BIT(3)
-
-/* Bit definitions for the GPIO_CTRL register */
-#define GPIO_FUNCTION_EN                  BIT(0)
-#define GPIO_CMOS_OD_MODE                 BIT(1)
-#define GPIO_CONTROL_DIR                  BIT(2)
-#define GPIO_PU_PD_MODE                   BIT(3)
-#define GPIO_FUNCTION_SHIFT               (4)
-#define GPIO_FUNCTION_MASK                (0xf)
-
-/* Bit definitions for the OUT_CTRL_1 register */
-#define OUT_SYNC_DISABLE                  BIT(7)
-#define SQUELCH_VALUE                     BIT(6)
-#define SQUELCH_DISABLE                   BIT(5)
-#define PAD_VDDO_SHIFT                    (2)
-#define PAD_VDDO_MASK                     (0x7)
-#define PAD_CMOSDRV_SHIFT                 (0)
-#define PAD_CMOSDRV_MASK                  (0x3)
-
-/* Bit definitions for the TOD_CFG register */
-#define TOD_EVEN_PPS_MODE                 BIT(2)
-#define TOD_OUT_SYNC_ENABLE               BIT(1)
-#define TOD_ENABLE                        BIT(0)
-
-/* Bit definitions for the TOD_WRITE_SELECT_CFG_0 register */
-#define WR_PWM_DECODER_INDEX_SHIFT        (4)
-#define WR_PWM_DECODER_INDEX_MASK         (0xf)
-#define WR_REF_INDEX_SHIFT                (0)
-#define WR_REF_INDEX_MASK                 (0xf)
-
-/* Bit definitions for the TOD_WRITE_CMD register */
-#define TOD_WRITE_SELECTION_SHIFT         (0)
-#define TOD_WRITE_SELECTION_MASK          (0xf)
-/* 4.8.7 */
-#define TOD_WRITE_TYPE_SHIFT              (4)
-#define TOD_WRITE_TYPE_MASK               (0x3)
-
-/* Bit definitions for the TOD_READ_PRIMARY_SEL_CFG_0 register */
-#define RD_PWM_DECODER_INDEX_SHIFT        (4)
-#define RD_PWM_DECODER_INDEX_MASK         (0xf)
-#define RD_REF_INDEX_SHIFT                (0)
-#define RD_REF_INDEX_MASK                 (0xf)
-
-/* Bit definitions for the TOD_READ_PRIMARY_CMD register */
-#define TOD_READ_TRIGGER_MODE             BIT(4)
-#define TOD_READ_TRIGGER_SHIFT            (0)
-#define TOD_READ_TRIGGER_MASK             (0xf)
-
-/* Bit definitions for the DPLL_CTRL_COMBO_MASTER_CFG register */
-#define COMBO_MASTER_HOLD                 BIT(0)
-
-/* Bit definitions for DPLL_SYS_STATUS register */
-#define DPLL_SYS_STATE_MASK               (0xf)
-
-/* Bit definitions for SYS_APLL_STATUS register */
-#define SYS_APLL_LOSS_LOCK_LIVE_MASK       BIT(0)
-#define SYS_APLL_LOSS_LOCK_LIVE_LOCKED     0
-#define SYS_APLL_LOSS_LOCK_LIVE_UNLOCKED   1
-
-#endif
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index 1a2e3c2e4328..6bc5791a7ec5 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -6,7 +6,7 @@
  * Copyright (C) 2019 Integrated Device Technology, Inc., a Renesas Company.
  */
 #include <linux/firmware.h>
-#include <linux/i2c.h>
+#include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/delay.h>
@@ -14,6 +14,10 @@
 #include <linux/kernel.h>
 #include <linux/timekeeping.h>
 #include <linux/string.h>
+#include <linux/of.h>
+#include <linux/mfd/rsmu.h>
+#include <linux/mfd/idt8a340_reg.h>
+#include <asm/unaligned.h>
 
 #include "ptp_private.h"
 #include "ptp_clockmatrix.h"
@@ -32,9 +36,28 @@ static char *firmware;
 module_param(firmware, charp, 0);
 
 #define SETTIME_CORRECTION (0)
+#define EXTTS_PERIOD_MS (95)
 
 static int _idtcm_adjfine(struct idtcm_channel *channel, long scaled_ppm);
 
+static inline int idtcm_read(struct idtcm *idtcm,
+			     u16 module,
+			     u16 regaddr,
+			     u8 *buf,
+			     u16 count)
+{
+	return regmap_bulk_read(idtcm->regmap, module + regaddr, buf, count);
+}
+
+static inline int idtcm_write(struct idtcm *idtcm,
+			      u16 module,
+			      u16 regaddr,
+			      u8 *buf,
+			      u16 count)
+{
+	return regmap_bulk_write(idtcm->regmap, module + regaddr, buf, count);
+}
+
 static int contains_full_configuration(struct idtcm *idtcm,
 				       const struct firmware *fw)
 {
@@ -173,134 +196,6 @@ static enum fw_version idtcm_fw_version(const char *version)
 	return ver;
 }
 
-static int idtcm_xfer_read(struct idtcm *idtcm,
-			   u8 regaddr,
-			   u8 *buf,
-			   u16 count)
-{
-	struct i2c_client *client = idtcm->client;
-	struct i2c_msg msg[2];
-	int cnt;
-
-	msg[0].addr = client->addr;
-	msg[0].flags = 0;
-	msg[0].len = 1;
-	msg[0].buf = &regaddr;
-
-	msg[1].addr = client->addr;
-	msg[1].flags = I2C_M_RD;
-	msg[1].len = count;
-	msg[1].buf = buf;
-
-	cnt = i2c_transfer(client->adapter, msg, 2);
-
-	if (cnt < 0) {
-		dev_err(&client->dev,
-			"i2c_transfer failed at %d in %s, at addr: %04x!",
-			__LINE__, __func__, regaddr);
-		return cnt;
-	} else if (cnt != 2) {
-		dev_err(&client->dev,
-			"i2c_transfer sent only %d of %d messages", cnt, 2);
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static int idtcm_xfer_write(struct idtcm *idtcm,
-			    u8 regaddr,
-			    u8 *buf,
-			    u16 count)
-{
-	struct i2c_client *client = idtcm->client;
-	/* we add 1 byte for device register */
-	u8 msg[IDTCM_MAX_WRITE_COUNT + 1];
-	int cnt;
-
-	if (count > IDTCM_MAX_WRITE_COUNT)
-		return -EINVAL;
-
-	msg[0] = regaddr;
-	memcpy(&msg[1], buf, count);
-
-	cnt = i2c_master_send(client, msg, count + 1);
-
-	if (cnt < 0) {
-		dev_err(&client->dev,
-			"i2c_master_send failed at %d in %s, at addr: %04x!",
-			__LINE__, __func__, regaddr);
-		return cnt;
-	}
-
-	return 0;
-}
-
-static int idtcm_page_offset(struct idtcm *idtcm, u8 val)
-{
-	u8 buf[4];
-	int err;
-
-	if (idtcm->page_offset == val)
-		return 0;
-
-	buf[0] = 0x0;
-	buf[1] = val;
-	buf[2] = 0x10;
-	buf[3] = 0x20;
-
-	err = idtcm_xfer_write(idtcm, PAGE_ADDR, buf, sizeof(buf));
-	if (err) {
-		idtcm->page_offset = 0xff;
-		dev_err(&idtcm->client->dev, "failed to set page offset");
-	} else {
-		idtcm->page_offset = val;
-	}
-
-	return err;
-}
-
-static int _idtcm_rdwr(struct idtcm *idtcm,
-		       u16 regaddr,
-		       u8 *buf,
-		       u16 count,
-		       bool write)
-{
-	u8 hi;
-	u8 lo;
-	int err;
-
-	hi = (regaddr >> 8) & 0xff;
-	lo = regaddr & 0xff;
-
-	err = idtcm_page_offset(idtcm, hi);
-	if (err)
-		return err;
-
-	if (write)
-		return idtcm_xfer_write(idtcm, lo, buf, count);
-
-	return idtcm_xfer_read(idtcm, lo, buf, count);
-}
-
-static int idtcm_read(struct idtcm *idtcm,
-		      u16 module,
-		      u16 regaddr,
-		      u8 *buf,
-		      u16 count)
-{
-	return _idtcm_rdwr(idtcm, module + regaddr, buf, count, false);
-}
-
-static int idtcm_write(struct idtcm *idtcm,
-		       u16 module,
-		       u16 regaddr,
-		       u8 *buf,
-		       u16 count)
-{
-	return _idtcm_rdwr(idtcm, module + regaddr, buf, count, true);
-}
-
 static int clear_boot_status(struct idtcm *idtcm)
 {
 	u8 buf[4] = {0};
@@ -339,11 +234,82 @@ static int wait_for_boot_status_ready(struct idtcm *idtcm)
 
 	} while (i);
 
-	dev_warn(&idtcm->client->dev, "%s timed out", __func__);
+	dev_warn(idtcm->dev, "%s timed out", __func__);
 
 	return -EBUSY;
 }
 
+static int _idtcm_set_scsr_read_trig(struct idtcm_channel *channel,
+				     enum scsr_read_trig_sel trig, u8 ref)
+{
+	struct idtcm *idtcm = channel->idtcm;
+	u16 tod_read_cmd = IDTCM_FW_REG(idtcm->fw_ver, V520, TOD_READ_PRIMARY_CMD);
+	u8 val;
+	int err;
+
+	if (trig == SCSR_TOD_READ_TRIG_SEL_REFCLK) {
+		err = idtcm_read(idtcm, channel->tod_read_primary,
+				 TOD_READ_PRIMARY_SEL_CFG_0, &val, sizeof(val));
+		if (err)
+			return err;
+
+		val &= ~(WR_REF_INDEX_MASK << WR_REF_INDEX_SHIFT);
+		val |= (ref << WR_REF_INDEX_SHIFT);
+
+		err = idtcm_write(idtcm, channel->tod_read_primary,
+				  TOD_READ_PRIMARY_SEL_CFG_0, &val, sizeof(val));
+		if (err)
+			return err;
+	}
+
+	err = idtcm_read(idtcm, channel->tod_read_primary,
+			 tod_read_cmd, &val, sizeof(val));
+	if (err)
+		return err;
+
+	val &= ~(TOD_READ_TRIGGER_MASK << TOD_READ_TRIGGER_SHIFT);
+	val |= (trig << TOD_READ_TRIGGER_SHIFT);
+	val &= ~TOD_READ_TRIGGER_MODE; /* single shot */
+
+	err = idtcm_write(idtcm, channel->tod_read_primary,
+			  tod_read_cmd, &val, sizeof(val));
+	return err;
+}
+
+static int idtcm_enable_extts(struct idtcm_channel *channel, u8 todn, u8 ref,
+			      bool enable)
+{
+	struct idtcm *idtcm = channel->idtcm;
+	u8 old_mask = idtcm->extts_mask;
+	u8 mask = 1 << todn;
+	int err = 0;
+
+	if (todn >= MAX_TOD)
+		return -EINVAL;
+
+	if (enable) {
+		if (ref > 0xF) /* E_REF_CLK15 */
+			return -EINVAL;
+		if (idtcm->extts_mask & mask)
+			return 0;
+		err = _idtcm_set_scsr_read_trig(&idtcm->channel[todn],
+						SCSR_TOD_READ_TRIG_SEL_REFCLK,
+						ref);
+		if (err == 0) {
+			idtcm->extts_mask |= mask;
+			idtcm->event_channel[todn] = channel;
+			idtcm->channel[todn].refn = ref;
+		}
+	} else
+		idtcm->extts_mask &= ~mask;
+
+	if (old_mask == 0 && idtcm->extts_mask)
+		schedule_delayed_work(&idtcm->extts_work,
+				      msecs_to_jiffies(EXTTS_PERIOD_MS));
+
+	return err;
+}
+
 static int read_sys_apll_status(struct idtcm *idtcm, u8 *status)
 {
 	return idtcm_read(idtcm, STATUS, DPLL_SYS_APLL_STATUS, status,
@@ -380,7 +346,7 @@ static int wait_for_sys_apll_dpll_lock(struct idtcm *idtcm)
 		} else if (dpll == DPLL_STATE_FREERUN ||
 			   dpll == DPLL_STATE_HOLDOVER ||
 			   dpll == DPLL_STATE_OPEN_LOOP) {
-			dev_warn(&idtcm->client->dev,
+			dev_warn(idtcm->dev,
 				"No wait state: DPLL_SYS_STATE %d", dpll);
 			return -EPERM;
 		}
@@ -388,7 +354,7 @@ static int wait_for_sys_apll_dpll_lock(struct idtcm *idtcm)
 		msleep(LOCK_POLL_INTERVAL_MS);
 	} while (time_is_after_jiffies(timeout));
 
-	dev_warn(&idtcm->client->dev,
+	dev_warn(idtcm->dev,
 		 "%d ms lock timeout: SYS APLL Loss Lock %d  SYS DPLL state %d",
 		 LOCK_TIMEOUT_MS, apll, dpll);
 
@@ -398,39 +364,27 @@ static int wait_for_sys_apll_dpll_lock(struct idtcm *idtcm)
 static void wait_for_chip_ready(struct idtcm *idtcm)
 {
 	if (wait_for_boot_status_ready(idtcm))
-		dev_warn(&idtcm->client->dev, "BOOT_STATUS != 0xA0");
+		dev_warn(idtcm->dev, "BOOT_STATUS != 0xA0");
 
 	if (wait_for_sys_apll_dpll_lock(idtcm))
-		dev_warn(&idtcm->client->dev,
+		dev_warn(idtcm->dev,
 			 "Continuing while SYS APLL/DPLL is not locked");
 }
 
 static int _idtcm_gettime(struct idtcm_channel *channel,
-			  struct timespec64 *ts)
+			  struct timespec64 *ts, u8 timeout)
 {
 	struct idtcm *idtcm = channel->idtcm;
 	u16 tod_read_cmd = IDTCM_FW_REG(idtcm->fw_ver, V520, TOD_READ_PRIMARY_CMD);
 	u8 buf[TOD_BYTE_COUNT];
-	u8 timeout = 10;
 	u8 trigger;
 	int err;
 
-	err = idtcm_read(idtcm, channel->tod_read_primary,
-			 tod_read_cmd, &trigger, sizeof(trigger));
-	if (err)
-		return err;
-
-	trigger &= ~(TOD_READ_TRIGGER_MASK << TOD_READ_TRIGGER_SHIFT);
-	trigger |= (1 << TOD_READ_TRIGGER_SHIFT);
-	trigger &= ~TOD_READ_TRIGGER_MODE; /* single shot */
-
-	err = idtcm_write(idtcm, channel->tod_read_primary,
-			  tod_read_cmd, &trigger, sizeof(trigger));
-	if (err)
-		return err;
-
 	/* wait trigger to be 0 */
-	while (trigger & TOD_READ_TRIGGER_MASK) {
+	do {
+		if (timeout-- == 0)
+			return -EIO;
+
 		if (idtcm->calculate_overhead_flag)
 			idtcm->start_time = ktime_get_raw();
 
@@ -439,10 +393,7 @@ static int _idtcm_gettime(struct idtcm_channel *channel,
 				 sizeof(trigger));
 		if (err)
 			return err;
-
-		if (--timeout == 0)
-			return -EIO;
-	}
+	} while (trigger & TOD_READ_TRIGGER_MASK);
 
 	err = idtcm_read(idtcm, channel->tod_read_primary,
 			 TOD_READ_PRIMARY, buf, sizeof(buf));
@@ -454,6 +405,79 @@ static int _idtcm_gettime(struct idtcm_channel *channel,
 	return err;
 }
 
+static int idtcm_extts_check_channel(struct idtcm *idtcm, u8 todn)
+{
+	struct idtcm_channel *ptp_channel, *extts_channel;
+	struct ptp_clock_event event;
+	struct timespec64 ts;
+	u32 dco_delay = 0;
+	int err;
+
+	extts_channel = &idtcm->channel[todn];
+	ptp_channel = idtcm->event_channel[todn];
+	if (extts_channel == ptp_channel)
+		dco_delay = ptp_channel->dco_delay;
+
+	err = _idtcm_gettime(extts_channel, &ts, 1);
+	if (err == 0) {
+		event.type = PTP_CLOCK_EXTTS;
+		event.index = todn;
+		event.timestamp = timespec64_to_ns(&ts) - dco_delay;
+		ptp_clock_event(ptp_channel->ptp_clock, &event);
+	}
+	return err;
+}
+
+static u8 idtcm_enable_extts_mask(struct idtcm_channel *channel,
+				    u8 extts_mask, bool enable)
+{
+	struct idtcm *idtcm = channel->idtcm;
+	int i, err;
+
+	for (i = 0; i < MAX_TOD; i++) {
+		u8 mask = 1 << i;
+		u8 refn = idtcm->channel[i].refn;
+
+		if (extts_mask & mask) {
+			/* check extts before disabling it */
+			if (enable == false) {
+				err = idtcm_extts_check_channel(idtcm, i);
+				/* trigger happened so we won't re-enable it */
+				if (err == 0)
+					extts_mask &= ~mask;
+			}
+			(void)idtcm_enable_extts(channel, i, refn, enable);
+		}
+	}
+
+	return extts_mask;
+}
+
+static int _idtcm_gettime_immediate(struct idtcm_channel *channel,
+				    struct timespec64 *ts)
+{
+	struct idtcm *idtcm = channel->idtcm;
+	u8 extts_mask = 0;
+	int err;
+
+	/* Disable extts */
+	if (idtcm->extts_mask) {
+		extts_mask = idtcm_enable_extts_mask(channel, idtcm->extts_mask,
+						     false);
+	}
+
+	err = _idtcm_set_scsr_read_trig(channel,
+					SCSR_TOD_READ_TRIG_SEL_IMMEDIATE, 0);
+	if (err == 0)
+		err = _idtcm_gettime(channel, ts, 10);
+
+	/* Re-enable extts */
+	if (extts_mask)
+		idtcm_enable_extts_mask(channel, extts_mask, true);
+
+	return err;
+}
+
 static int _sync_pll_output(struct idtcm *idtcm,
 			    u8 pll,
 			    u8 sync_src,
@@ -777,7 +801,7 @@ static int _idtcm_set_dpll_scsr_tod(struct idtcm_channel *channel,
 			break;
 
 		if (++count > 20) {
-			dev_err(&idtcm->client->dev,
+			dev_err(idtcm->dev,
 				"Timed out waiting for the write counter");
 			return -EIO;
 		}
@@ -842,7 +866,7 @@ static int _idtcm_settime_deprecated(struct idtcm_channel *channel,
 
 	err = _idtcm_set_dpll_hw_tod(channel, ts, HW_TOD_WR_TRIG_SEL_MSB);
 	if (err) {
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"%s: Set HW ToD failed", __func__);
 		return err;
 	}
@@ -1001,7 +1025,7 @@ static int _idtcm_adjtime_deprecated(struct idtcm_channel *channel, s64 delta)
 		if (err)
 			return err;
 
-		err = _idtcm_gettime(channel, &ts);
+		err = _idtcm_gettime_immediate(channel, &ts);
 		if (err)
 			return err;
 
@@ -1035,14 +1059,14 @@ static int idtcm_state_machine_reset(struct idtcm *idtcm)
 			read_boot_status(idtcm, &status);
 
 			if (status == 0xA0) {
-				dev_dbg(&idtcm->client->dev,
+				dev_dbg(idtcm->dev,
 					"SM_RESET completed in %d ms", i * 100);
 				break;
 			}
 		}
 
 		if (!status)
-			dev_err(&idtcm->client->dev,
+			dev_err(idtcm->dev,
 				"Timed out waiting for CM_RESET to complete");
 	}
 
@@ -1139,12 +1163,12 @@ static int set_pll_output_mask(struct idtcm *idtcm, u16 addr, u8 val)
 static int set_tod_ptp_pll(struct idtcm *idtcm, u8 index, u8 pll)
 {
 	if (index >= MAX_TOD) {
-		dev_err(&idtcm->client->dev, "ToD%d not supported", index);
+		dev_err(idtcm->dev, "ToD%d not supported", index);
 		return -EINVAL;
 	}
 
 	if (pll >= MAX_PLL) {
-		dev_err(&idtcm->client->dev, "Pll%d not supported", pll);
+		dev_err(idtcm->dev, "Pll%d not supported", pll);
 		return -EINVAL;
 	}
 
@@ -1162,7 +1186,7 @@ static int check_and_set_masks(struct idtcm *idtcm,
 	switch (regaddr) {
 	case TOD_MASK_ADDR:
 		if ((val & 0xf0) || !(val & 0x0f)) {
-			dev_err(&idtcm->client->dev, "Invalid TOD mask 0x%02x", val);
+			dev_err(idtcm->dev, "Invalid TOD mask 0x%02x", val);
 			err = -EINVAL;
 		} else {
 			idtcm->tod_mask = val;
@@ -1193,13 +1217,13 @@ static void display_pll_and_masks(struct idtcm *idtcm)
 	u8 i;
 	u8 mask;
 
-	dev_dbg(&idtcm->client->dev, "tod_mask = 0x%02x", idtcm->tod_mask);
+	dev_dbg(idtcm->dev, "tod_mask = 0x%02x", idtcm->tod_mask);
 
 	for (i = 0; i < MAX_TOD; i++) {
 		mask = 1 << i;
 
 		if (mask & idtcm->tod_mask)
-			dev_dbg(&idtcm->client->dev,
+			dev_dbg(idtcm->dev,
 				"TOD%d pll = %d    output_mask = 0x%04x",
 				i, idtcm->channel[i].pll,
 				idtcm->channel[i].output_mask);
@@ -1222,16 +1246,16 @@ static int idtcm_load_firmware(struct idtcm *idtcm,
 	if (firmware) /* module parameter */
 		snprintf(fname, sizeof(fname), "%s", firmware);
 
-	dev_info(&idtcm->client->dev, "firmware '%s'", fname);
+	dev_info(idtcm->dev, "requesting firmware '%s'", fname);
 
 	err = request_firmware(&fw, fname, dev);
 	if (err) {
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 		return err;
 	}
 
-	dev_dbg(&idtcm->client->dev, "firmware size %zu bytes", fw->size);
+	dev_dbg(idtcm->dev, "firmware size %zu bytes", fw->size);
 
 	rec = (struct idtcm_fwrc *) fw->data;
 
@@ -1240,7 +1264,7 @@ static int idtcm_load_firmware(struct idtcm *idtcm,
 
 	for (len = fw->size; len > 0; len -= sizeof(*rec)) {
 		if (rec->reserved) {
-			dev_err(&idtcm->client->dev,
+			dev_err(idtcm->dev,
 				"bad firmware, reserved field non-zero");
 			err = -EINVAL;
 		} else {
@@ -1291,7 +1315,7 @@ static int idtcm_output_enable(struct idtcm_channel *channel,
 	base = get_output_base_addr(idtcm->fw_ver, outn);
 
 	if (!(base > 0)) {
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"%s - Unsupported out%d", __func__, outn);
 		return base;
 	}
@@ -1333,8 +1357,8 @@ static int idtcm_output_mask_enable(struct idtcm_channel *channel,
 }
 
 static int idtcm_perout_enable(struct idtcm_channel *channel,
-			       bool enable,
-			       struct ptp_perout_request *perout)
+			       struct ptp_perout_request *perout,
+			       bool enable)
 {
 	struct idtcm *idtcm = channel->idtcm;
 	unsigned int flags = perout->flags;
@@ -1347,7 +1371,7 @@ static int idtcm_perout_enable(struct idtcm_channel *channel,
 		err = idtcm_output_enable(channel, enable, perout->index);
 
 	if (err) {
-		dev_err(&idtcm->client->dev, "Unable to set output enable");
+		dev_err(idtcm->dev, "Unable to set output enable");
 		return err;
 	}
 
@@ -1448,7 +1472,7 @@ static int configure_dpll_mode_write_frequency(struct idtcm_channel *channel)
 	err = idtcm_set_pll_mode(channel, PLL_MODE_WRITE_FREQUENCY);
 
 	if (err)
-		dev_err(&idtcm->client->dev, "Failed to set pll mode to write frequency");
+		dev_err(idtcm->dev, "Failed to set pll mode to write frequency");
 	else
 		channel->mode = PTP_PLL_MODE_WRITE_FREQUENCY;
 
@@ -1463,7 +1487,7 @@ static int configure_dpll_mode_write_phase(struct idtcm_channel *channel)
 	err = idtcm_set_pll_mode(channel, PLL_MODE_WRITE_PHASE);
 
 	if (err)
-		dev_err(&idtcm->client->dev, "Failed to set pll mode to write phase");
+		dev_err(idtcm->dev, "Failed to set pll mode to write phase");
 	else
 		channel->mode = PTP_PLL_MODE_WRITE_PHASE;
 
@@ -1478,7 +1502,7 @@ static int configure_manual_reference_write_frequency(struct idtcm_channel *chan
 	err = idtcm_set_manual_reference(channel, MANU_REF_WRITE_FREQUENCY);
 
 	if (err)
-		dev_err(&idtcm->client->dev, "Failed to set manual reference to write frequency");
+		dev_err(idtcm->dev, "Failed to set manual reference to write frequency");
 	else
 		channel->mode = PTP_PLL_MODE_WRITE_FREQUENCY;
 
@@ -1493,7 +1517,7 @@ static int configure_manual_reference_write_phase(struct idtcm_channel *channel)
 	err = idtcm_set_manual_reference(channel, MANU_REF_WRITE_PHASE);
 
 	if (err)
-		dev_err(&idtcm->client->dev, "Failed to set manual reference to write phase");
+		dev_err(idtcm->dev, "Failed to set manual reference to write phase");
 	else
 		channel->mode = PTP_PLL_MODE_WRITE_PHASE;
 
@@ -1518,11 +1542,11 @@ static long idtcm_work_handler(struct ptp_clock_info *ptp)
 	struct idtcm_channel *channel = container_of(ptp, struct idtcm_channel, caps);
 	struct idtcm *idtcm = channel->idtcm;
 
-	mutex_lock(&idtcm->reg_lock);
+	mutex_lock(idtcm->lock);
 
 	(void)idtcm_stop_phase_pull_in(channel);
 
-	mutex_unlock(&idtcm->reg_lock);
+	mutex_unlock(idtcm->lock);
 
 	/* Return a negative value here to not reschedule */
 	return -1;
@@ -1533,8 +1557,8 @@ static s32 phase_pull_in_scaled_ppm(s32 current_ppm, s32 phase_pull_in_ppb)
 	/* ppb = scaled_ppm * 125 / 2^13 */
 	/* scaled_ppm = ppb * 2^13 / 125 */
 
-	s64 max_scaled_ppm = (PHASE_PULL_IN_MAX_PPB << 13) / 125;
-	s64 scaled_ppm = (phase_pull_in_ppb << 13) / 125;
+	s64 max_scaled_ppm = div_s64((s64)PHASE_PULL_IN_MAX_PPB << 13, 125);
+	s64 scaled_ppm = div_s64((s64)phase_pull_in_ppb << 13, 125);
 
 	current_ppm += scaled_ppm;
 
@@ -1607,7 +1631,7 @@ static int initialize_operating_mode_with_manual_reference(struct idtcm_channel
 		channel->mode = PTP_PLL_MODE_WRITE_FREQUENCY;
 		break;
 	default:
-		dev_warn(&idtcm->client->dev,
+		dev_warn(idtcm->dev,
 			 "Unsupported MANUAL_REFERENCE: 0x%02x", ref);
 	}
 
@@ -1633,7 +1657,7 @@ static int initialize_operating_mode_with_pll_mode(struct idtcm_channel *channel
 		channel->mode = PTP_PLL_MODE_WRITE_FREQUENCY;
 		break;
 	default:
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Unsupported PLL_MODE: 0x%02x", mode);
 		err = -EINVAL;
 	}
@@ -1652,14 +1676,14 @@ static int initialize_dco_operating_mode(struct idtcm_channel *channel)
 
 	err = idtcm_get_pll_mode(channel, &mode);
 	if (err) {
-		dev_err(&idtcm->client->dev, "Unable to read pll mode!");
+		dev_err(idtcm->dev, "Unable to read pll mode!");
 		return err;
 	}
 
 	if (mode == PLL_MODE_PLL) {
 		err = idtcm_get_manual_reference(channel, &ref);
 		if (err) {
-			dev_err(&idtcm->client->dev, "Unable to read manual reference!");
+			dev_err(idtcm->dev, "Unable to read manual reference!");
 			return err;
 		}
 		err = initialize_operating_mode_with_manual_reference(channel, ref);
@@ -1775,15 +1799,14 @@ static int idtcm_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	struct idtcm *idtcm = channel->idtcm;
 	int err;
 
-	mutex_lock(&idtcm->reg_lock);
+	mutex_lock(idtcm->lock);
+	err = _idtcm_gettime_immediate(channel, ts);
+	mutex_unlock(idtcm->lock);
 
-	err = _idtcm_gettime(channel, ts);
 	if (err)
-		dev_err(&idtcm->client->dev, "Failed at line %d in %s!",
+		dev_err(idtcm->dev, "Failed at line %d in %s!",
 			__LINE__, __func__);
 
-	mutex_unlock(&idtcm->reg_lock);
-
 	return err;
 }
 
@@ -1794,15 +1817,14 @@ static int idtcm_settime_deprecated(struct ptp_clock_info *ptp,
 	struct idtcm *idtcm = channel->idtcm;
 	int err;
 
-	mutex_lock(&idtcm->reg_lock);
-
+	mutex_lock(idtcm->lock);
 	err = _idtcm_settime_deprecated(channel, ts);
+	mutex_unlock(idtcm->lock);
+
 	if (err)
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 
-	mutex_unlock(&idtcm->reg_lock);
-
 	return err;
 }
 
@@ -1813,15 +1835,14 @@ static int idtcm_settime(struct ptp_clock_info *ptp,
 	struct idtcm *idtcm = channel->idtcm;
 	int err;
 
-	mutex_lock(&idtcm->reg_lock);
-
+	mutex_lock(idtcm->lock);
 	err = _idtcm_settime(channel, ts, SCSR_TOD_WR_TYPE_SEL_ABSOLUTE);
+	mutex_unlock(idtcm->lock);
+
 	if (err)
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 
-	mutex_unlock(&idtcm->reg_lock);
-
 	return err;
 }
 
@@ -1831,15 +1852,14 @@ static int idtcm_adjtime_deprecated(struct ptp_clock_info *ptp, s64 delta)
 	struct idtcm *idtcm = channel->idtcm;
 	int err;
 
-	mutex_lock(&idtcm->reg_lock);
-
+	mutex_lock(idtcm->lock);
 	err = _idtcm_adjtime_deprecated(channel, delta);
+	mutex_unlock(idtcm->lock);
+
 	if (err)
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 
-	mutex_unlock(&idtcm->reg_lock);
-
 	return err;
 }
 
@@ -1854,13 +1874,10 @@ static int idtcm_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	if (channel->phase_pull_in == true)
 		return 0;
 
-	mutex_lock(&idtcm->reg_lock);
+	mutex_lock(idtcm->lock);
 
 	if (abs(delta) < PHASE_PULL_IN_THRESHOLD_NS) {
 		err = channel->do_phase_pull_in(channel, delta, 0);
-		if (err)
-			dev_err(&idtcm->client->dev,
-				"Failed at line %d in %s!", __LINE__, __func__);
 	} else {
 		if (delta >= 0) {
 			ts = ns_to_timespec64(delta);
@@ -1870,11 +1887,13 @@ static int idtcm_adjtime(struct ptp_clock_info *ptp, s64 delta)
 			type = SCSR_TOD_WR_TYPE_SEL_DELTA_MINUS;
 		}
 		err = _idtcm_settime(channel, &ts, type);
-		if (err)
-			dev_err(&idtcm->client->dev,
-				"Failed at line %d in %s!", __LINE__, __func__);
 	}
-	mutex_unlock(&idtcm->reg_lock);
+
+	mutex_unlock(idtcm->lock);
+
+	if (err)
+		dev_err(idtcm->dev,
+			"Failed at line %d in %s!", __LINE__, __func__);
 
 	return err;
 }
@@ -1885,15 +1904,14 @@ static int idtcm_adjphase(struct ptp_clock_info *ptp, s32 delta)
 	struct idtcm *idtcm = channel->idtcm;
 	int err;
 
-	mutex_lock(&idtcm->reg_lock);
-
+	mutex_lock(idtcm->lock);
 	err = _idtcm_adjphase(channel, delta);
+	mutex_unlock(idtcm->lock);
+
 	if (err)
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 
-	mutex_unlock(&idtcm->reg_lock);
-
 	return err;
 }
 
@@ -1909,13 +1927,14 @@ static int idtcm_adjfine(struct ptp_clock_info *ptp,  long scaled_ppm)
 	if (scaled_ppm == channel->current_freq_scaled_ppm)
 		return 0;
 
-	mutex_lock(&idtcm->reg_lock);
-
+	mutex_lock(idtcm->lock);
 	err = _idtcm_adjfine(channel, scaled_ppm);
+	mutex_unlock(idtcm->lock);
 
-	mutex_unlock(&idtcm->reg_lock);
-
-	if (!err)
+	if (err)
+		dev_err(idtcm->dev,
+			"Failed at line %d in %s!", __LINE__, __func__);
+	else
 		channel->current_freq_scaled_ppm = scaled_ppm;
 
 	return err;
@@ -1924,35 +1943,38 @@ static int idtcm_adjfine(struct ptp_clock_info *ptp,  long scaled_ppm)
 static int idtcm_enable(struct ptp_clock_info *ptp,
 			struct ptp_clock_request *rq, int on)
 {
-	int err;
 	struct idtcm_channel *channel = container_of(ptp, struct idtcm_channel, caps);
+	struct idtcm *idtcm = channel->idtcm;
+	int err = -EOPNOTSUPP;
+
+	mutex_lock(idtcm->lock);
 
 	switch (rq->type) {
 	case PTP_CLK_REQ_PEROUT:
-		if (!on) {
-			err = idtcm_perout_enable(channel, false, &rq->perout);
-			if (err)
-				dev_err(&channel->idtcm->client->dev,
-					"Failed at line %d in %s!",
-					__LINE__, __func__);
-			return err;
-		}
-
+		if (!on)
+			err = idtcm_perout_enable(channel, &rq->perout, false);
 		/* Only accept a 1-PPS aligned to the second. */
-		if (rq->perout.start.nsec || rq->perout.period.sec != 1 ||
-		    rq->perout.period.nsec)
-			return -ERANGE;
-
-		err = idtcm_perout_enable(channel, true, &rq->perout);
-		if (err)
-			dev_err(&channel->idtcm->client->dev,
-				"Failed at line %d in %s!", __LINE__, __func__);
-		return err;
+		else if (rq->perout.start.nsec || rq->perout.period.sec != 1 ||
+			 rq->perout.period.nsec)
+			err = -ERANGE;
+		else
+			err = idtcm_perout_enable(channel, &rq->perout, true);
+		break;
+	case PTP_CLK_REQ_EXTTS:
+		err = idtcm_enable_extts(channel, rq->extts.index,
+					 rq->extts.rsv[0], on);
+		break;
 	default:
 		break;
 	}
 
-	return -EOPNOTSUPP;
+	mutex_unlock(idtcm->lock);
+
+	if (err)
+		dev_err(channel->idtcm->dev,
+			"Failed in %s with err %d!", __func__, err);
+
+	return err;
 }
 
 static int idtcm_enable_tod(struct idtcm_channel *channel)
@@ -2013,7 +2035,7 @@ static void idtcm_set_version_info(struct idtcm *idtcm)
 
 	idtcm->fw_ver = idtcm_fw_version(idtcm->version);
 
-	dev_info(&idtcm->client->dev,
+	dev_info(idtcm->dev,
 		 "%d.%d.%d, Id: 0x%04x  HW Rev: %d  OTP Config Select: %d",
 		 major, minor, hotfix,
 		 product_id, hw_rev_id, config_select);
@@ -2023,6 +2045,7 @@ static const struct ptp_clock_info idtcm_caps = {
 	.owner		= THIS_MODULE,
 	.max_adj	= 244000,
 	.n_per_out	= 12,
+	.n_ext_ts	= MAX_TOD,
 	.adjphase	= &idtcm_adjphase,
 	.adjfine	= &idtcm_adjfine,
 	.adjtime	= &idtcm_adjtime,
@@ -2036,6 +2059,7 @@ static const struct ptp_clock_info idtcm_caps_deprecated = {
 	.owner		= THIS_MODULE,
 	.max_adj	= 244000,
 	.n_per_out	= 12,
+	.n_ext_ts	= MAX_TOD,
 	.adjphase	= &idtcm_adjphase,
 	.adjfine	= &idtcm_adjfine,
 	.adjtime	= &idtcm_adjtime_deprecated,
@@ -2122,24 +2146,46 @@ static int configure_channel_pll(struct idtcm_channel *channel)
 	return err;
 }
 
-static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
+/*
+ * Compensate for the PTP DCO input-to-output delay.
+ * This delay is 18 FOD cycles.
+ */
+static u32 idtcm_get_dco_delay(struct idtcm_channel *channel)
 {
-	enum fw_version fw_ver = idtcm->fw_ver;
-	struct idtcm_channel *channel;
+	struct idtcm *idtcm = channel->idtcm;
+	u8 mbuf[8] = {0};
+	u8 nbuf[2] = {0};
+	u32 fodFreq;
 	int err;
+	u64 m;
+	u16 n;
 
-	if (!(index < MAX_TOD))
-		return -EINVAL;
+	err = idtcm_read(idtcm, channel->dpll_ctrl_n,
+			 DPLL_CTRL_DPLL_FOD_FREQ, mbuf, 6);
+	if (err)
+		return 0;
 
-	channel = &idtcm->channel[index];
+	err = idtcm_read(idtcm, channel->dpll_ctrl_n,
+			 DPLL_CTRL_DPLL_FOD_FREQ + 6, nbuf, 2);
+	if (err)
+		return 0;
 
-	channel->idtcm = idtcm;
-	channel->current_freq_scaled_ppm = 0;
+	m = get_unaligned_le64(mbuf);
+	n = get_unaligned_le16(nbuf);
 
-	/* Set pll addresses */
-	err = configure_channel_pll(channel);
-	if (err)
-		return err;
+	if (n == 0)
+		n = 1;
+
+	fodFreq = (u32)div_u64(m, n);
+	if (fodFreq >= 500000000)
+		return 18 * (u32)div_u64(NSEC_PER_SEC, fodFreq);
+
+	return 0;
+}
+
+static int configure_channel_tod(struct idtcm_channel *channel, u32 index)
+{
+	enum fw_version fw_ver = channel->idtcm->fw_ver;
 
 	/* Set tod addresses */
 	switch (index) {
@@ -2171,6 +2217,32 @@ static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
 		return -EINVAL;
 	}
 
+	return 0;
+}
+
+static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
+{
+	struct idtcm_channel *channel;
+	int err;
+
+	if (!(index < MAX_TOD))
+		return -EINVAL;
+
+	channel = &idtcm->channel[index];
+
+	channel->idtcm = idtcm;
+	channel->current_freq_scaled_ppm = 0;
+
+	/* Set pll addresses */
+	err = configure_channel_pll(channel);
+	if (err)
+		return err;
+
+	/* Set tod addresses */
+	err = configure_channel_tod(channel, index);
+	if (err)
+		return err;
+
 	if (idtcm->fw_ver < V487)
 		channel->caps = idtcm_caps_deprecated;
 	else
@@ -2185,11 +2257,13 @@ static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
 
 	err = idtcm_enable_tod(channel);
 	if (err) {
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"Failed at line %d in %s!", __LINE__, __func__);
 		return err;
 	}
 
+	channel->dco_delay = idtcm_get_dco_delay(channel);
+
 	channel->ptp_clock = ptp_clock_register(&channel->caps, NULL);
 
 	if (IS_ERR(channel->ptp_clock)) {
@@ -2201,12 +2275,59 @@ static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
 	if (!channel->ptp_clock)
 		return -ENOTSUPP;
 
-	dev_info(&idtcm->client->dev, "PLL%d registered as ptp%d",
+	dev_info(idtcm->dev, "PLL%d registered as ptp%d",
 		 index, channel->ptp_clock->index);
 
 	return 0;
 }
 
+static int idtcm_enable_extts_channel(struct idtcm *idtcm, u32 index)
+{
+	struct idtcm_channel *channel;
+	int err;
+
+	if (!(index < MAX_TOD))
+		return -EINVAL;
+
+	channel = &idtcm->channel[index];
+	channel->idtcm = idtcm;
+
+	/* Set tod addresses */
+	err = configure_channel_tod(channel, index);
+	if (err)
+		return err;
+
+	channel->idtcm = idtcm;
+
+	return 0;
+}
+
+static void idtcm_extts_check(struct work_struct *work)
+{
+	struct idtcm *idtcm = container_of(work, struct idtcm, extts_work.work);
+	int err, i;
+
+	if (idtcm->extts_mask == 0)
+		return;
+
+	mutex_lock(idtcm->lock);
+	for (i = 0; i < MAX_TOD; i++) {
+		u8 mask = 1 << i;
+
+		if (idtcm->extts_mask & mask) {
+			err = idtcm_extts_check_channel(idtcm, i);
+			/* trigger clears itself, so clear the mask */
+			if (err == 0)
+				idtcm->extts_mask &= ~mask;
+		}
+	}
+
+	if (idtcm->extts_mask)
+		schedule_delayed_work(&idtcm->extts_work,
+				      msecs_to_jiffies(EXTTS_PERIOD_MS));
+	mutex_unlock(idtcm->lock);
+}
+
 static void ptp_clock_unregister_all(struct idtcm *idtcm)
 {
 	u8 i;
@@ -2222,6 +2343,7 @@ static void ptp_clock_unregister_all(struct idtcm *idtcm)
 static void set_default_masks(struct idtcm *idtcm)
 {
 	idtcm->tod_mask = DEFAULT_TOD_MASK;
+	idtcm->extts_mask = 0;
 
 	idtcm->channel[0].pll = DEFAULT_TOD0_PTP_PLL;
 	idtcm->channel[1].pll = DEFAULT_TOD1_PTP_PLL;
@@ -2234,158 +2356,86 @@ static void set_default_masks(struct idtcm *idtcm)
 	idtcm->channel[3].output_mask = DEFAULT_OUTPUT_MASK_PLL3;
 }
 
-static int idtcm_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int idtcm_probe(struct platform_device *pdev)
 {
+	struct rsmu_ddata *ddata = dev_get_drvdata(pdev->dev.parent);
 	struct idtcm *idtcm;
 	int err;
 	u8 i;
 
-	/* Unused for now */
-	(void)id;
-
-	idtcm = devm_kzalloc(&client->dev, sizeof(struct idtcm), GFP_KERNEL);
+	idtcm = devm_kzalloc(&pdev->dev, sizeof(struct idtcm), GFP_KERNEL);
 
 	if (!idtcm)
 		return -ENOMEM;
 
-	idtcm->client = client;
-	idtcm->page_offset = 0xff;
+	idtcm->dev = &pdev->dev;
+	idtcm->mfd = pdev->dev.parent;
+	idtcm->lock = &ddata->lock;
+	idtcm->regmap = ddata->regmap;
 	idtcm->calculate_overhead_flag = 0;
 
+	INIT_DELAYED_WORK(&idtcm->extts_work, idtcm_extts_check);
+
 	set_default_masks(idtcm);
 
-	mutex_init(&idtcm->reg_lock);
-	mutex_lock(&idtcm->reg_lock);
+	mutex_lock(idtcm->lock);
 
 	idtcm_set_version_info(idtcm);
 
-	err = idtcm_load_firmware(idtcm, &client->dev);
+	err = idtcm_load_firmware(idtcm, &pdev->dev);
+
 	if (err)
-		dev_warn(&idtcm->client->dev, "loading firmware failed with %d", err);
+		dev_warn(idtcm->dev, "loading firmware failed with %d", err);
 
 	wait_for_chip_ready(idtcm);
 
 	if (idtcm->tod_mask) {
 		for (i = 0; i < MAX_TOD; i++) {
-			if (idtcm->tod_mask & (1 << i)) {
+			if (idtcm->tod_mask & (1 << i))
 				err = idtcm_enable_channel(idtcm, i);
-				if (err) {
-					dev_err(&idtcm->client->dev,
-						"idtcm_enable_channel %d failed!", i);
-					break;
-				}
+			else
+				err = idtcm_enable_extts_channel(idtcm, i);
+			if (err) {
+				dev_err(idtcm->dev,
+					"idtcm_enable_channel %d failed!", i);
+				break;
 			}
 		}
 	} else {
-		dev_err(&idtcm->client->dev,
+		dev_err(idtcm->dev,
 			"no PLLs flagged as PHCs, nothing to do");
 		err = -ENODEV;
 	}
 
-	mutex_unlock(&idtcm->reg_lock);
+	mutex_unlock(idtcm->lock);
 
 	if (err) {
 		ptp_clock_unregister_all(idtcm);
 		return err;
 	}
 
-	i2c_set_clientdata(client, idtcm);
+	platform_set_drvdata(pdev, idtcm);
 
 	return 0;
 }
 
-static int idtcm_remove(struct i2c_client *client)
+static int idtcm_remove(struct platform_device *pdev)
 {
-	struct idtcm *idtcm = i2c_get_clientdata(client);
+	struct idtcm *idtcm = platform_get_drvdata(pdev);
 
 	ptp_clock_unregister_all(idtcm);
 
-	mutex_destroy(&idtcm->reg_lock);
+	cancel_delayed_work_sync(&idtcm->extts_work);
 
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id idtcm_dt_id[] = {
-	{ .compatible = "idt,8a34000" },
-	{ .compatible = "idt,8a34001" },
-	{ .compatible = "idt,8a34002" },
-	{ .compatible = "idt,8a34003" },
-	{ .compatible = "idt,8a34004" },
-	{ .compatible = "idt,8a34005" },
-	{ .compatible = "idt,8a34006" },
-	{ .compatible = "idt,8a34007" },
-	{ .compatible = "idt,8a34008" },
-	{ .compatible = "idt,8a34009" },
-	{ .compatible = "idt,8a34010" },
-	{ .compatible = "idt,8a34011" },
-	{ .compatible = "idt,8a34012" },
-	{ .compatible = "idt,8a34013" },
-	{ .compatible = "idt,8a34014" },
-	{ .compatible = "idt,8a34015" },
-	{ .compatible = "idt,8a34016" },
-	{ .compatible = "idt,8a34017" },
-	{ .compatible = "idt,8a34018" },
-	{ .compatible = "idt,8a34019" },
-	{ .compatible = "idt,8a34040" },
-	{ .compatible = "idt,8a34041" },
-	{ .compatible = "idt,8a34042" },
-	{ .compatible = "idt,8a34043" },
-	{ .compatible = "idt,8a34044" },
-	{ .compatible = "idt,8a34045" },
-	{ .compatible = "idt,8a34046" },
-	{ .compatible = "idt,8a34047" },
-	{ .compatible = "idt,8a34048" },
-	{ .compatible = "idt,8a34049" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, idtcm_dt_id);
-#endif
-
-static const struct i2c_device_id idtcm_i2c_id[] = {
-	{ "8a34000" },
-	{ "8a34001" },
-	{ "8a34002" },
-	{ "8a34003" },
-	{ "8a34004" },
-	{ "8a34005" },
-	{ "8a34006" },
-	{ "8a34007" },
-	{ "8a34008" },
-	{ "8a34009" },
-	{ "8a34010" },
-	{ "8a34011" },
-	{ "8a34012" },
-	{ "8a34013" },
-	{ "8a34014" },
-	{ "8a34015" },
-	{ "8a34016" },
-	{ "8a34017" },
-	{ "8a34018" },
-	{ "8a34019" },
-	{ "8a34040" },
-	{ "8a34041" },
-	{ "8a34042" },
-	{ "8a34043" },
-	{ "8a34044" },
-	{ "8a34045" },
-	{ "8a34046" },
-	{ "8a34047" },
-	{ "8a34048" },
-	{ "8a34049" },
-	{},
-};
-MODULE_DEVICE_TABLE(i2c, idtcm_i2c_id);
-
-static struct i2c_driver idtcm_driver = {
+static struct platform_driver idtcm_driver = {
 	.driver = {
-		.of_match_table	= of_match_ptr(idtcm_dt_id),
-		.name		= "idtcm",
+		.name = "8a3400x-phc",
 	},
-	.probe		= idtcm_probe,
-	.remove		= idtcm_remove,
-	.id_table	= idtcm_i2c_id,
+	.probe = idtcm_probe,
+	.remove	= idtcm_remove,
 };
 
-module_i2c_driver(idtcm_driver);
+module_platform_driver(idtcm_driver);
diff --git a/drivers/ptp/ptp_clockmatrix.h b/drivers/ptp/ptp_clockmatrix.h
index 833e5907c351..0f3059ae1fff 100644
--- a/drivers/ptp/ptp_clockmatrix.h
+++ b/drivers/ptp/ptp_clockmatrix.h
@@ -9,8 +9,8 @@
 #define PTP_IDTCLOCKMATRIX_H
 
 #include <linux/ktime.h>
-
-#include "idt8a340_reg.h"
+#include <linux/mfd/idt8a340_reg.h>
+#include <linux/regmap.h>
 
 #define FW_FILENAME	"idtcm.bin"
 #define MAX_TOD		(4)
@@ -44,7 +44,6 @@
 #define DEFAULT_TOD2_PTP_PLL		(2)
 #define DEFAULT_TOD3_PTP_PLL		(3)
 
-#define POST_SM_RESET_DELAY_MS			(3000)
 #define PHASE_PULL_IN_THRESHOLD_NS_DEPRECATED	(150000)
 #define PHASE_PULL_IN_THRESHOLD_NS		(15000)
 #define TOD_WRITE_OVERHEAD_COUNT_MAX		(2)
@@ -64,6 +63,11 @@
  * Return register address based on passed in firmware version
  */
 #define IDTCM_FW_REG(FW, VER, REG)	(((FW) < (VER)) ? (REG) : (REG##_##VER))
+enum fw_version {
+	V_DEFAULT = 0,
+	V487 = 1,
+	V520 = 2,
+};
 
 /* PTP PLL Mode */
 enum ptp_pll_mode {
@@ -74,94 +78,6 @@ enum ptp_pll_mode {
 	PTP_PLL_MODE_MAX = PTP_PLL_MODE_UNSUPPORTED,
 };
 
-/* Values of DPLL_N.DPLL_MODE.PLL_MODE */
-enum pll_mode {
-	PLL_MODE_MIN = 0,
-	PLL_MODE_PLL = PLL_MODE_MIN,
-	PLL_MODE_WRITE_PHASE = 1,
-	PLL_MODE_WRITE_FREQUENCY = 2,
-	PLL_MODE_GPIO_INC_DEC = 3,
-	PLL_MODE_SYNTHESIS = 4,
-	PLL_MODE_PHASE_MEASUREMENT = 5,
-	PLL_MODE_DISABLED = 6,
-	PLL_MODE_MAX = PLL_MODE_DISABLED,
-};
-
-/* Values of DPLL_CTRL_n.DPLL_MANU_REF_CFG.MANUAL_REFERENCE */
-enum manual_reference {
-	MANU_REF_MIN = 0,
-	MANU_REF_CLK0 = MANU_REF_MIN,
-	MANU_REF_CLK1,
-	MANU_REF_CLK2,
-	MANU_REF_CLK3,
-	MANU_REF_CLK4,
-	MANU_REF_CLK5,
-	MANU_REF_CLK6,
-	MANU_REF_CLK7,
-	MANU_REF_CLK8,
-	MANU_REF_CLK9,
-	MANU_REF_CLK10,
-	MANU_REF_CLK11,
-	MANU_REF_CLK12,
-	MANU_REF_CLK13,
-	MANU_REF_CLK14,
-	MANU_REF_CLK15,
-	MANU_REF_WRITE_PHASE,
-	MANU_REF_WRITE_FREQUENCY,
-	MANU_REF_XO_DPLL,
-	MANU_REF_MAX = MANU_REF_XO_DPLL,
-};
-
-enum hw_tod_write_trig_sel {
-	HW_TOD_WR_TRIG_SEL_MIN = 0,
-	HW_TOD_WR_TRIG_SEL_MSB = HW_TOD_WR_TRIG_SEL_MIN,
-	HW_TOD_WR_TRIG_SEL_RESERVED = 1,
-	HW_TOD_WR_TRIG_SEL_TOD_PPS = 2,
-	HW_TOD_WR_TRIG_SEL_IRIGB_PPS = 3,
-	HW_TOD_WR_TRIG_SEL_PWM_PPS = 4,
-	HW_TOD_WR_TRIG_SEL_GPIO = 5,
-	HW_TOD_WR_TRIG_SEL_FOD_SYNC = 6,
-	WR_TRIG_SEL_MAX = HW_TOD_WR_TRIG_SEL_FOD_SYNC,
-};
-
-/* 4.8.7 only */
-enum scsr_tod_write_trig_sel {
-	SCSR_TOD_WR_TRIG_SEL_DISABLE = 0,
-	SCSR_TOD_WR_TRIG_SEL_IMMEDIATE = 1,
-	SCSR_TOD_WR_TRIG_SEL_REFCLK = 2,
-	SCSR_TOD_WR_TRIG_SEL_PWMPPS = 3,
-	SCSR_TOD_WR_TRIG_SEL_TODPPS = 4,
-	SCSR_TOD_WR_TRIG_SEL_SYNCFOD = 5,
-	SCSR_TOD_WR_TRIG_SEL_GPIO = 6,
-	SCSR_TOD_WR_TRIG_SEL_MAX = SCSR_TOD_WR_TRIG_SEL_GPIO,
-};
-
-/* 4.8.7 only */
-enum scsr_tod_write_type_sel {
-	SCSR_TOD_WR_TYPE_SEL_ABSOLUTE = 0,
-	SCSR_TOD_WR_TYPE_SEL_DELTA_PLUS = 1,
-	SCSR_TOD_WR_TYPE_SEL_DELTA_MINUS = 2,
-	SCSR_TOD_WR_TYPE_SEL_MAX = SCSR_TOD_WR_TYPE_SEL_DELTA_MINUS,
-};
-
-/* Values STATUS.DPLL_SYS_STATUS.DPLL_SYS_STATE */
-enum dpll_state {
-	DPLL_STATE_MIN = 0,
-	DPLL_STATE_FREERUN = DPLL_STATE_MIN,
-	DPLL_STATE_LOCKACQ = 1,
-	DPLL_STATE_LOCKREC = 2,
-	DPLL_STATE_LOCKED = 3,
-	DPLL_STATE_HOLDOVER = 4,
-	DPLL_STATE_OPEN_LOOP = 5,
-	DPLL_STATE_MAX = DPLL_STATE_OPEN_LOOP,
-};
-
-enum fw_version {
-	V_DEFAULT = 0,
-	V487 = 1,
-	V520 = 2,
-};
-
 struct idtcm;
 
 struct idtcm_channel {
@@ -185,25 +101,32 @@ struct idtcm_channel {
 						    s32 offset_ns, u32 max_ffo_ppb);
 	s32			current_freq_scaled_ppm;
 	bool			phase_pull_in;
+	u32			dco_delay;
+	/* last input trigger for extts */
+	u8			refn;
 	u8			pll;
 	u16			output_mask;
 };
 
 struct idtcm {
 	struct idtcm_channel	channel[MAX_TOD];
-	struct i2c_client	*client;
-	u8			page_offset;
+	struct device		*dev;
 	u8			tod_mask;
 	char			version[16];
 	enum fw_version		fw_ver;
-
+	/* Polls for external time stamps */
+	u8			extts_mask;
+	struct delayed_work	extts_work;
+	/* Remember the ptp channel to report extts */
+	struct idtcm_channel	*event_channel[MAX_TOD];
+	/* Mutex to protect operations from being interrupted */
+	struct mutex		*lock;
+	struct device		*mfd;
+	struct regmap		*regmap;
 	/* Overhead calculation for adjtime */
 	u8			calculate_overhead_flag;
 	s64			tod_write_overhead_ns;
 	ktime_t			start_time;
-
-	/* Protects I2C read/modify/write registers from concurrent access */
-	struct mutex		reg_lock;
 };
 
 struct idtcm_fwrc {
diff --git a/include/linux/mfd/idt8a340_reg.h b/include/linux/mfd/idt8a340_reg.h
index 92d763230bdf..a18c1539a152 100644
--- a/include/linux/mfd/idt8a340_reg.h
+++ b/include/linux/mfd/idt8a340_reg.h
@@ -506,6 +506,10 @@
 #define STATE_MODE_SHIFT                  (0)
 #define STATE_MODE_MASK                   (0x7)
 
+/* Bit definitions for the DPLL_MANU_REF_CFG register */
+#define MANUAL_REFERENCE_SHIFT            (0)
+#define MANUAL_REFERENCE_MASK             (0x1f)
+
 /* Bit definitions for the GPIO_CFG_GBL register */
 #define SUPPLY_MODE_SHIFT                 (0)
 #define SUPPLY_MODE_MASK                  (0x3)
@@ -654,7 +658,7 @@
 /* Values of DPLL_N.DPLL_MODE.PLL_MODE */
 enum pll_mode {
 	PLL_MODE_MIN = 0,
-	PLL_MODE_NORMAL = PLL_MODE_MIN,
+	PLL_MODE_PLL = PLL_MODE_MIN,
 	PLL_MODE_WRITE_PHASE = 1,
 	PLL_MODE_WRITE_FREQUENCY = 2,
 	PLL_MODE_GPIO_INC_DEC = 3,
@@ -664,6 +668,31 @@ enum pll_mode {
 	PLL_MODE_MAX = PLL_MODE_DISABLED,
 };
 
+/* Values of DPLL_CTRL_n.DPLL_MANU_REF_CFG.MANUAL_REFERENCE */
+enum manual_reference {
+	MANU_REF_MIN = 0,
+	MANU_REF_CLK0 = MANU_REF_MIN,
+	MANU_REF_CLK1,
+	MANU_REF_CLK2,
+	MANU_REF_CLK3,
+	MANU_REF_CLK4,
+	MANU_REF_CLK5,
+	MANU_REF_CLK6,
+	MANU_REF_CLK7,
+	MANU_REF_CLK8,
+	MANU_REF_CLK9,
+	MANU_REF_CLK10,
+	MANU_REF_CLK11,
+	MANU_REF_CLK12,
+	MANU_REF_CLK13,
+	MANU_REF_CLK14,
+	MANU_REF_CLK15,
+	MANU_REF_WRITE_PHASE,
+	MANU_REF_WRITE_FREQUENCY,
+	MANU_REF_XO_DPLL,
+	MANU_REF_MAX = MANU_REF_XO_DPLL,
+};
+
 enum hw_tod_write_trig_sel {
 	HW_TOD_WR_TRIG_SEL_MIN = 0,
 	HW_TOD_WR_TRIG_SEL_MSB = HW_TOD_WR_TRIG_SEL_MIN,
-- 
cgit v1.2.3


From f6bc909e7673c30abcbdb329e7d0aa2e83c103d7 Mon Sep 17 00:00:00 2001
From: Simon Trimmer <simont@opensource.cirrus.com>
Date: Mon, 13 Sep 2021 17:00:57 +0100
Subject: firmware: cs_dsp: add driver to support firmware loading on Cirrus
 Logic DSPs

wm_adsp originally provided firmware loading on some audio DSP and was
implemented as an ASoC codec driver. However, the firmware loading now
covers a wider range of DSP cores and peripherals containing them,
beyond just audio. So it needs to be available to non-audio drivers. All
the core firmware loading support has been moved into a new driver
cs_dsp, leaving only the ASoC-specific parts in wm_adsp.

Signed-off-by: Simon Trimmer <simont@opensource.cirrus.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20210913160057.103842-17-simont@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 MAINTAINERS                            |   11 +
 drivers/firmware/Kconfig               |    1 +
 drivers/firmware/Makefile              |    1 +
 drivers/firmware/cirrus/Kconfig        |    5 +
 drivers/firmware/cirrus/Makefile       |    3 +
 drivers/firmware/cirrus/cs_dsp.c       | 3109 ++++++++++++++++++++++++++++++++
 include/linux/firmware/cirrus/cs_dsp.h |  242 +++
 include/linux/firmware/cirrus/wmfw.h   |  202 +++
 sound/soc/codecs/Kconfig               |    1 +
 sound/soc/codecs/wm_adsp.c             | 3036 +------------------------------
 sound/soc/codecs/wm_adsp.h             |  132 +-
 sound/soc/codecs/wmfw.h                |  202 ---
 12 files changed, 3674 insertions(+), 3271 deletions(-)
 create mode 100644 drivers/firmware/cirrus/Kconfig
 create mode 100644 drivers/firmware/cirrus/Makefile
 create mode 100644 drivers/firmware/cirrus/cs_dsp.c
 create mode 100644 include/linux/firmware/cirrus/cs_dsp.h
 create mode 100644 include/linux/firmware/cirrus/wmfw.h
 delete mode 100644 sound/soc/codecs/wmfw.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index f555bfad3ca0..dd3ed0f809a3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4445,6 +4445,17 @@ L:	patches@opensource.cirrus.com
 S:	Maintained
 F:	sound/soc/codecs/cs*
 
+CIRRUS LOGIC DSP FIRMWARE DRIVER
+M:	Simon Trimmer <simont@opensource.cirrus.com>
+M:	Charles Keepax <ckeepax@opensource.cirrus.com>
+M:	Richard Fitzgerald <rf@opensource.cirrus.com>
+L:	patches@opensource.cirrus.com
+S:	Supported
+W:	https://github.com/CirrusLogic/linux-drivers/wiki
+T:	git https://github.com/CirrusLogic/linux-drivers.git
+F:	drivers/firmware/cirrus/*
+F:	include/linux/firmware/cirrus/*
+
 CIRRUS LOGIC EP93XX ETHERNET DRIVER
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 L:	netdev@vger.kernel.org
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 220a58cf0a44..fd2a5f68acab 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -298,6 +298,7 @@ config TURRIS_MOX_RWTM
 
 source "drivers/firmware/arm_ffa/Kconfig"
 source "drivers/firmware/broadcom/Kconfig"
+source "drivers/firmware/cirrus/Kconfig"
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
 source "drivers/firmware/imx/Kconfig"
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index 5ced0673d94b..4e58cb474a68 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -28,6 +28,7 @@ obj-$(CONFIG_TURRIS_MOX_RWTM)	+= turris-mox-rwtm.o
 obj-y				+= arm_ffa/
 obj-y				+= arm_scmi/
 obj-y				+= broadcom/
+obj-y				+= cirrus/
 obj-y				+= meson/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig
new file mode 100644
index 000000000000..f9503cb481d2
--- /dev/null
+++ b/drivers/firmware/cirrus/Kconfig
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CS_DSP
+	tristate
+	default n
diff --git a/drivers/firmware/cirrus/Makefile b/drivers/firmware/cirrus/Makefile
new file mode 100644
index 000000000000..f074e2638c9c
--- /dev/null
+++ b/drivers/firmware/cirrus/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+obj-$(CONFIG_CS_DSP)		+= cs_dsp.o
diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c
new file mode 100644
index 000000000000..948dd8382686
--- /dev/null
+++ b/drivers/firmware/cirrus/cs_dsp.c
@@ -0,0 +1,3109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cs_dsp.c  --  Cirrus Logic DSP firmware support
+ *
+ * Based on sound/soc/codecs/wm_adsp.c
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ * Copyright (C) 2015-2021 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/firmware.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/workqueue.h>
+
+#include <linux/firmware/cirrus/cs_dsp.h>
+#include <linux/firmware/cirrus/wmfw.h>
+
+#define cs_dsp_err(_dsp, fmt, ...) \
+	dev_err(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
+#define cs_dsp_warn(_dsp, fmt, ...) \
+	dev_warn(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
+#define cs_dsp_info(_dsp, fmt, ...) \
+	dev_info(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
+#define cs_dsp_dbg(_dsp, fmt, ...) \
+	dev_dbg(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
+
+#define ADSP1_CONTROL_1                   0x00
+#define ADSP1_CONTROL_2                   0x02
+#define ADSP1_CONTROL_3                   0x03
+#define ADSP1_CONTROL_4                   0x04
+#define ADSP1_CONTROL_5                   0x06
+#define ADSP1_CONTROL_6                   0x07
+#define ADSP1_CONTROL_7                   0x08
+#define ADSP1_CONTROL_8                   0x09
+#define ADSP1_CONTROL_9                   0x0A
+#define ADSP1_CONTROL_10                  0x0B
+#define ADSP1_CONTROL_11                  0x0C
+#define ADSP1_CONTROL_12                  0x0D
+#define ADSP1_CONTROL_13                  0x0F
+#define ADSP1_CONTROL_14                  0x10
+#define ADSP1_CONTROL_15                  0x11
+#define ADSP1_CONTROL_16                  0x12
+#define ADSP1_CONTROL_17                  0x13
+#define ADSP1_CONTROL_18                  0x14
+#define ADSP1_CONTROL_19                  0x16
+#define ADSP1_CONTROL_20                  0x17
+#define ADSP1_CONTROL_21                  0x18
+#define ADSP1_CONTROL_22                  0x1A
+#define ADSP1_CONTROL_23                  0x1B
+#define ADSP1_CONTROL_24                  0x1C
+#define ADSP1_CONTROL_25                  0x1E
+#define ADSP1_CONTROL_26                  0x20
+#define ADSP1_CONTROL_27                  0x21
+#define ADSP1_CONTROL_28                  0x22
+#define ADSP1_CONTROL_29                  0x23
+#define ADSP1_CONTROL_30                  0x24
+#define ADSP1_CONTROL_31                  0x26
+
+/*
+ * ADSP1 Control 19
+ */
+#define ADSP1_WDMA_BUFFER_LENGTH_MASK     0x00FF  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
+#define ADSP1_WDMA_BUFFER_LENGTH_SHIFT         0  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
+#define ADSP1_WDMA_BUFFER_LENGTH_WIDTH         8  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
+
+/*
+ * ADSP1 Control 30
+ */
+#define ADSP1_DBG_CLK_ENA                 0x0008  /* DSP1_DBG_CLK_ENA */
+#define ADSP1_DBG_CLK_ENA_MASK            0x0008  /* DSP1_DBG_CLK_ENA */
+#define ADSP1_DBG_CLK_ENA_SHIFT                3  /* DSP1_DBG_CLK_ENA */
+#define ADSP1_DBG_CLK_ENA_WIDTH                1  /* DSP1_DBG_CLK_ENA */
+#define ADSP1_SYS_ENA                     0x0004  /* DSP1_SYS_ENA */
+#define ADSP1_SYS_ENA_MASK                0x0004  /* DSP1_SYS_ENA */
+#define ADSP1_SYS_ENA_SHIFT                    2  /* DSP1_SYS_ENA */
+#define ADSP1_SYS_ENA_WIDTH                    1  /* DSP1_SYS_ENA */
+#define ADSP1_CORE_ENA                    0x0002  /* DSP1_CORE_ENA */
+#define ADSP1_CORE_ENA_MASK               0x0002  /* DSP1_CORE_ENA */
+#define ADSP1_CORE_ENA_SHIFT                   1  /* DSP1_CORE_ENA */
+#define ADSP1_CORE_ENA_WIDTH                   1  /* DSP1_CORE_ENA */
+#define ADSP1_START                       0x0001  /* DSP1_START */
+#define ADSP1_START_MASK                  0x0001  /* DSP1_START */
+#define ADSP1_START_SHIFT                      0  /* DSP1_START */
+#define ADSP1_START_WIDTH                      1  /* DSP1_START */
+
+/*
+ * ADSP1 Control 31
+ */
+#define ADSP1_CLK_SEL_MASK                0x0007  /* CLK_SEL_ENA */
+#define ADSP1_CLK_SEL_SHIFT                    0  /* CLK_SEL_ENA */
+#define ADSP1_CLK_SEL_WIDTH                    3  /* CLK_SEL_ENA */
+
+#define ADSP2_CONTROL                     0x0
+#define ADSP2_CLOCKING                    0x1
+#define ADSP2V2_CLOCKING                  0x2
+#define ADSP2_STATUS1                     0x4
+#define ADSP2_WDMA_CONFIG_1               0x30
+#define ADSP2_WDMA_CONFIG_2               0x31
+#define ADSP2V2_WDMA_CONFIG_2             0x32
+#define ADSP2_RDMA_CONFIG_1               0x34
+
+#define ADSP2_SCRATCH0                    0x40
+#define ADSP2_SCRATCH1                    0x41
+#define ADSP2_SCRATCH2                    0x42
+#define ADSP2_SCRATCH3                    0x43
+
+#define ADSP2V2_SCRATCH0_1                0x40
+#define ADSP2V2_SCRATCH2_3                0x42
+
+/*
+ * ADSP2 Control
+ */
+#define ADSP2_MEM_ENA                     0x0010  /* DSP1_MEM_ENA */
+#define ADSP2_MEM_ENA_MASK                0x0010  /* DSP1_MEM_ENA */
+#define ADSP2_MEM_ENA_SHIFT                    4  /* DSP1_MEM_ENA */
+#define ADSP2_MEM_ENA_WIDTH                    1  /* DSP1_MEM_ENA */
+#define ADSP2_SYS_ENA                     0x0004  /* DSP1_SYS_ENA */
+#define ADSP2_SYS_ENA_MASK                0x0004  /* DSP1_SYS_ENA */
+#define ADSP2_SYS_ENA_SHIFT                    2  /* DSP1_SYS_ENA */
+#define ADSP2_SYS_ENA_WIDTH                    1  /* DSP1_SYS_ENA */
+#define ADSP2_CORE_ENA                    0x0002  /* DSP1_CORE_ENA */
+#define ADSP2_CORE_ENA_MASK               0x0002  /* DSP1_CORE_ENA */
+#define ADSP2_CORE_ENA_SHIFT                   1  /* DSP1_CORE_ENA */
+#define ADSP2_CORE_ENA_WIDTH                   1  /* DSP1_CORE_ENA */
+#define ADSP2_START                       0x0001  /* DSP1_START */
+#define ADSP2_START_MASK                  0x0001  /* DSP1_START */
+#define ADSP2_START_SHIFT                      0  /* DSP1_START */
+#define ADSP2_START_WIDTH                      1  /* DSP1_START */
+
+/*
+ * ADSP2 clocking
+ */
+#define ADSP2_CLK_SEL_MASK                0x0007  /* CLK_SEL_ENA */
+#define ADSP2_CLK_SEL_SHIFT                    0  /* CLK_SEL_ENA */
+#define ADSP2_CLK_SEL_WIDTH                    3  /* CLK_SEL_ENA */
+
+/*
+ * ADSP2V2 clocking
+ */
+#define ADSP2V2_CLK_SEL_MASK             0x70000  /* CLK_SEL_ENA */
+#define ADSP2V2_CLK_SEL_SHIFT                 16  /* CLK_SEL_ENA */
+#define ADSP2V2_CLK_SEL_WIDTH                  3  /* CLK_SEL_ENA */
+
+#define ADSP2V2_RATE_MASK                 0x7800  /* DSP_RATE */
+#define ADSP2V2_RATE_SHIFT                    11  /* DSP_RATE */
+#define ADSP2V2_RATE_WIDTH                     4  /* DSP_RATE */
+
+/*
+ * ADSP2 Status 1
+ */
+#define ADSP2_RAM_RDY                     0x0001
+#define ADSP2_RAM_RDY_MASK                0x0001
+#define ADSP2_RAM_RDY_SHIFT                    0
+#define ADSP2_RAM_RDY_WIDTH                    1
+
+/*
+ * ADSP2 Lock support
+ */
+#define ADSP2_LOCK_CODE_0                    0x5555
+#define ADSP2_LOCK_CODE_1                    0xAAAA
+
+#define ADSP2_WATCHDOG                       0x0A
+#define ADSP2_BUS_ERR_ADDR                   0x52
+#define ADSP2_REGION_LOCK_STATUS             0x64
+#define ADSP2_LOCK_REGION_1_LOCK_REGION_0    0x66
+#define ADSP2_LOCK_REGION_3_LOCK_REGION_2    0x68
+#define ADSP2_LOCK_REGION_5_LOCK_REGION_4    0x6A
+#define ADSP2_LOCK_REGION_7_LOCK_REGION_6    0x6C
+#define ADSP2_LOCK_REGION_9_LOCK_REGION_8    0x6E
+#define ADSP2_LOCK_REGION_CTRL               0x7A
+#define ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR    0x7C
+
+#define ADSP2_REGION_LOCK_ERR_MASK           0x8000
+#define ADSP2_ADDR_ERR_MASK                  0x4000
+#define ADSP2_WDT_TIMEOUT_STS_MASK           0x2000
+#define ADSP2_CTRL_ERR_PAUSE_ENA             0x0002
+#define ADSP2_CTRL_ERR_EINT                  0x0001
+
+#define ADSP2_BUS_ERR_ADDR_MASK              0x00FFFFFF
+#define ADSP2_XMEM_ERR_ADDR_MASK             0x0000FFFF
+#define ADSP2_PMEM_ERR_ADDR_MASK             0x7FFF0000
+#define ADSP2_PMEM_ERR_ADDR_SHIFT            16
+#define ADSP2_WDT_ENA_MASK                   0xFFFFFFFD
+
+#define ADSP2_LOCK_REGION_SHIFT              16
+
+/*
+ * Event control messages
+ */
+#define CS_DSP_FW_EVENT_SHUTDOWN             0x000001
+
+/*
+ * HALO system info
+ */
+#define HALO_AHBM_WINDOW_DEBUG_0             0x02040
+#define HALO_AHBM_WINDOW_DEBUG_1             0x02044
+
+/*
+ * HALO core
+ */
+#define HALO_SCRATCH1                        0x005c0
+#define HALO_SCRATCH2                        0x005c8
+#define HALO_SCRATCH3                        0x005d0
+#define HALO_SCRATCH4                        0x005d8
+#define HALO_CCM_CORE_CONTROL                0x41000
+#define HALO_CORE_SOFT_RESET                 0x00010
+#define HALO_WDT_CONTROL                     0x47000
+
+/*
+ * HALO MPU banks
+ */
+#define HALO_MPU_XMEM_ACCESS_0               0x43000
+#define HALO_MPU_YMEM_ACCESS_0               0x43004
+#define HALO_MPU_WINDOW_ACCESS_0             0x43008
+#define HALO_MPU_XREG_ACCESS_0               0x4300C
+#define HALO_MPU_YREG_ACCESS_0               0x43014
+#define HALO_MPU_XMEM_ACCESS_1               0x43018
+#define HALO_MPU_YMEM_ACCESS_1               0x4301C
+#define HALO_MPU_WINDOW_ACCESS_1             0x43020
+#define HALO_MPU_XREG_ACCESS_1               0x43024
+#define HALO_MPU_YREG_ACCESS_1               0x4302C
+#define HALO_MPU_XMEM_ACCESS_2               0x43030
+#define HALO_MPU_YMEM_ACCESS_2               0x43034
+#define HALO_MPU_WINDOW_ACCESS_2             0x43038
+#define HALO_MPU_XREG_ACCESS_2               0x4303C
+#define HALO_MPU_YREG_ACCESS_2               0x43044
+#define HALO_MPU_XMEM_ACCESS_3               0x43048
+#define HALO_MPU_YMEM_ACCESS_3               0x4304C
+#define HALO_MPU_WINDOW_ACCESS_3             0x43050
+#define HALO_MPU_XREG_ACCESS_3               0x43054
+#define HALO_MPU_YREG_ACCESS_3               0x4305C
+#define HALO_MPU_XM_VIO_ADDR                 0x43100
+#define HALO_MPU_XM_VIO_STATUS               0x43104
+#define HALO_MPU_YM_VIO_ADDR                 0x43108
+#define HALO_MPU_YM_VIO_STATUS               0x4310C
+#define HALO_MPU_PM_VIO_ADDR                 0x43110
+#define HALO_MPU_PM_VIO_STATUS               0x43114
+#define HALO_MPU_LOCK_CONFIG                 0x43140
+
+/*
+ * HALO_AHBM_WINDOW_DEBUG_1
+ */
+#define HALO_AHBM_CORE_ERR_ADDR_MASK         0x0fffff00
+#define HALO_AHBM_CORE_ERR_ADDR_SHIFT                 8
+#define HALO_AHBM_FLAGS_ERR_MASK             0x000000ff
+
+/*
+ * HALO_CCM_CORE_CONTROL
+ */
+#define HALO_CORE_RESET                     0x00000200
+#define HALO_CORE_EN                        0x00000001
+
+/*
+ * HALO_CORE_SOFT_RESET
+ */
+#define HALO_CORE_SOFT_RESET_MASK           0x00000001
+
+/*
+ * HALO_WDT_CONTROL
+ */
+#define HALO_WDT_EN_MASK                    0x00000001
+
+/*
+ * HALO_MPU_?M_VIO_STATUS
+ */
+#define HALO_MPU_VIO_STS_MASK               0x007e0000
+#define HALO_MPU_VIO_STS_SHIFT                      17
+#define HALO_MPU_VIO_ERR_WR_MASK            0x00008000
+#define HALO_MPU_VIO_ERR_SRC_MASK           0x00007fff
+#define HALO_MPU_VIO_ERR_SRC_SHIFT                   0
+
+struct cs_dsp_ops {
+	bool (*validate_version)(struct cs_dsp *dsp, unsigned int version);
+	unsigned int (*parse_sizes)(struct cs_dsp *dsp,
+				    const char * const file,
+				    unsigned int pos,
+				    const struct firmware *firmware);
+	int (*setup_algs)(struct cs_dsp *dsp);
+	unsigned int (*region_to_reg)(struct cs_dsp_region const *mem,
+				      unsigned int offset);
+
+	void (*show_fw_status)(struct cs_dsp *dsp);
+	void (*stop_watchdog)(struct cs_dsp *dsp);
+
+	int (*enable_memory)(struct cs_dsp *dsp);
+	void (*disable_memory)(struct cs_dsp *dsp);
+	int (*lock_memory)(struct cs_dsp *dsp, unsigned int lock_regions);
+
+	int (*enable_core)(struct cs_dsp *dsp);
+	void (*disable_core)(struct cs_dsp *dsp);
+
+	int (*start_core)(struct cs_dsp *dsp);
+	void (*stop_core)(struct cs_dsp *dsp);
+};
+
+static const struct cs_dsp_ops cs_dsp_adsp1_ops;
+static const struct cs_dsp_ops cs_dsp_adsp2_ops[];
+static const struct cs_dsp_ops cs_dsp_halo_ops;
+
+struct cs_dsp_buf {
+	struct list_head list;
+	void *buf;
+};
+
+static struct cs_dsp_buf *cs_dsp_buf_alloc(const void *src, size_t len,
+					   struct list_head *list)
+{
+	struct cs_dsp_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+
+	if (buf == NULL)
+		return NULL;
+
+	buf->buf = vmalloc(len);
+	if (!buf->buf) {
+		kfree(buf);
+		return NULL;
+	}
+	memcpy(buf->buf, src, len);
+
+	if (list)
+		list_add_tail(&buf->list, list);
+
+	return buf;
+}
+
+static void cs_dsp_buf_free(struct list_head *list)
+{
+	while (!list_empty(list)) {
+		struct cs_dsp_buf *buf = list_first_entry(list,
+							  struct cs_dsp_buf,
+							  list);
+		list_del(&buf->list);
+		vfree(buf->buf);
+		kfree(buf);
+	}
+}
+
+/**
+ * cs_dsp_mem_region_name() - Return a name string for a memory type
+ * @type: the memory type to match
+ *
+ * Return: A const string identifying the memory region.
+ */
+const char *cs_dsp_mem_region_name(unsigned int type)
+{
+	switch (type) {
+	case WMFW_ADSP1_PM:
+		return "PM";
+	case WMFW_HALO_PM_PACKED:
+		return "PM_PACKED";
+	case WMFW_ADSP1_DM:
+		return "DM";
+	case WMFW_ADSP2_XM:
+		return "XM";
+	case WMFW_HALO_XM_PACKED:
+		return "XM_PACKED";
+	case WMFW_ADSP2_YM:
+		return "YM";
+	case WMFW_HALO_YM_PACKED:
+		return "YM_PACKED";
+	case WMFW_ADSP1_ZM:
+		return "ZM";
+	default:
+		return NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(cs_dsp_mem_region_name);
+
+#ifdef CONFIG_DEBUG_FS
+static void cs_dsp_debugfs_save_wmfwname(struct cs_dsp *dsp, const char *s)
+{
+	char *tmp = kasprintf(GFP_KERNEL, "%s\n", s);
+
+	kfree(dsp->wmfw_file_name);
+	dsp->wmfw_file_name = tmp;
+}
+
+static void cs_dsp_debugfs_save_binname(struct cs_dsp *dsp, const char *s)
+{
+	char *tmp = kasprintf(GFP_KERNEL, "%s\n", s);
+
+	kfree(dsp->bin_file_name);
+	dsp->bin_file_name = tmp;
+}
+
+static void cs_dsp_debugfs_clear(struct cs_dsp *dsp)
+{
+	kfree(dsp->wmfw_file_name);
+	kfree(dsp->bin_file_name);
+	dsp->wmfw_file_name = NULL;
+	dsp->bin_file_name = NULL;
+}
+
+static ssize_t cs_dsp_debugfs_wmfw_read(struct file *file,
+					char __user *user_buf,
+					size_t count, loff_t *ppos)
+{
+	struct cs_dsp *dsp = file->private_data;
+	ssize_t ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	if (!dsp->wmfw_file_name || !dsp->booted)
+		ret = 0;
+	else
+		ret = simple_read_from_buffer(user_buf, count, ppos,
+					      dsp->wmfw_file_name,
+					      strlen(dsp->wmfw_file_name));
+
+	mutex_unlock(&dsp->pwr_lock);
+	return ret;
+}
+
+static ssize_t cs_dsp_debugfs_bin_read(struct file *file,
+				       char __user *user_buf,
+				       size_t count, loff_t *ppos)
+{
+	struct cs_dsp *dsp = file->private_data;
+	ssize_t ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	if (!dsp->bin_file_name || !dsp->booted)
+		ret = 0;
+	else
+		ret = simple_read_from_buffer(user_buf, count, ppos,
+					      dsp->bin_file_name,
+					      strlen(dsp->bin_file_name));
+
+	mutex_unlock(&dsp->pwr_lock);
+	return ret;
+}
+
+static const struct {
+	const char *name;
+	const struct file_operations fops;
+} cs_dsp_debugfs_fops[] = {
+	{
+		.name = "wmfw_file_name",
+		.fops = {
+			.open = simple_open,
+			.read = cs_dsp_debugfs_wmfw_read,
+		},
+	},
+	{
+		.name = "bin_file_name",
+		.fops = {
+			.open = simple_open,
+			.read = cs_dsp_debugfs_bin_read,
+		},
+	},
+};
+
+/**
+ * cs_dsp_init_debugfs() - Create and populate DSP representation in debugfs
+ * @dsp: pointer to DSP structure
+ * @debugfs_root: pointer to debugfs directory in which to create this DSP
+ *                representation
+ */
+void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root)
+{
+	struct dentry *root = NULL;
+	int i;
+
+	root = debugfs_create_dir(dsp->name, debugfs_root);
+
+	debugfs_create_bool("booted", 0444, root, &dsp->booted);
+	debugfs_create_bool("running", 0444, root, &dsp->running);
+	debugfs_create_x32("fw_id", 0444, root, &dsp->fw_id);
+	debugfs_create_x32("fw_version", 0444, root, &dsp->fw_id_version);
+
+	for (i = 0; i < ARRAY_SIZE(cs_dsp_debugfs_fops); ++i)
+		debugfs_create_file(cs_dsp_debugfs_fops[i].name, 0444, root,
+				    dsp, &cs_dsp_debugfs_fops[i].fops);
+
+	dsp->debugfs_root = root;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_init_debugfs);
+
+/**
+ * cs_dsp_cleanup_debugfs() - Removes DSP representation from debugfs
+ * @dsp: pointer to DSP structure
+ */
+void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp)
+{
+	cs_dsp_debugfs_clear(dsp);
+	debugfs_remove_recursive(dsp->debugfs_root);
+	dsp->debugfs_root = NULL;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_cleanup_debugfs);
+#else
+void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root)
+{
+}
+EXPORT_SYMBOL_GPL(cs_dsp_init_debugfs);
+
+void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp)
+{
+}
+EXPORT_SYMBOL_GPL(cs_dsp_cleanup_debugfs);
+
+static inline void cs_dsp_debugfs_save_wmfwname(struct cs_dsp *dsp,
+						const char *s)
+{
+}
+
+static inline void cs_dsp_debugfs_save_binname(struct cs_dsp *dsp,
+					       const char *s)
+{
+}
+
+static inline void cs_dsp_debugfs_clear(struct cs_dsp *dsp)
+{
+}
+#endif
+
+static const struct cs_dsp_region *cs_dsp_find_region(struct cs_dsp *dsp,
+						      int type)
+{
+	int i;
+
+	for (i = 0; i < dsp->num_mems; i++)
+		if (dsp->mem[i].type == type)
+			return &dsp->mem[i];
+
+	return NULL;
+}
+
+static unsigned int cs_dsp_region_to_reg(struct cs_dsp_region const *mem,
+					 unsigned int offset)
+{
+	switch (mem->type) {
+	case WMFW_ADSP1_PM:
+		return mem->base + (offset * 3);
+	case WMFW_ADSP1_DM:
+	case WMFW_ADSP2_XM:
+	case WMFW_ADSP2_YM:
+	case WMFW_ADSP1_ZM:
+		return mem->base + (offset * 2);
+	default:
+		WARN(1, "Unknown memory region type");
+		return offset;
+	}
+}
+
+static unsigned int cs_dsp_halo_region_to_reg(struct cs_dsp_region const *mem,
+					      unsigned int offset)
+{
+	switch (mem->type) {
+	case WMFW_ADSP2_XM:
+	case WMFW_ADSP2_YM:
+		return mem->base + (offset * 4);
+	case WMFW_HALO_XM_PACKED:
+	case WMFW_HALO_YM_PACKED:
+		return (mem->base + (offset * 3)) & ~0x3;
+	case WMFW_HALO_PM_PACKED:
+		return mem->base + (offset * 5);
+	default:
+		WARN(1, "Unknown memory region type");
+		return offset;
+	}
+}
+
+static void cs_dsp_read_fw_status(struct cs_dsp *dsp,
+				  int noffs, unsigned int *offs)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < noffs; ++i) {
+		ret = regmap_read(dsp->regmap, dsp->base + offs[i], &offs[i]);
+		if (ret) {
+			cs_dsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret);
+			return;
+		}
+	}
+}
+
+static void cs_dsp_adsp2_show_fw_status(struct cs_dsp *dsp)
+{
+	unsigned int offs[] = {
+		ADSP2_SCRATCH0, ADSP2_SCRATCH1, ADSP2_SCRATCH2, ADSP2_SCRATCH3,
+	};
+
+	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
+
+	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
+		   offs[0], offs[1], offs[2], offs[3]);
+}
+
+static void cs_dsp_adsp2v2_show_fw_status(struct cs_dsp *dsp)
+{
+	unsigned int offs[] = { ADSP2V2_SCRATCH0_1, ADSP2V2_SCRATCH2_3 };
+
+	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
+
+	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
+		   offs[0] & 0xFFFF, offs[0] >> 16,
+		   offs[1] & 0xFFFF, offs[1] >> 16);
+}
+
+static void cs_dsp_halo_show_fw_status(struct cs_dsp *dsp)
+{
+	unsigned int offs[] = {
+		HALO_SCRATCH1, HALO_SCRATCH2, HALO_SCRATCH3, HALO_SCRATCH4,
+	};
+
+	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
+
+	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
+		   offs[0], offs[1], offs[2], offs[3]);
+}
+
+static int cs_dsp_coeff_base_reg(struct cs_dsp_coeff_ctl *ctl, unsigned int *reg)
+{
+	const struct cs_dsp_alg_region *alg_region = &ctl->alg_region;
+	struct cs_dsp *dsp = ctl->dsp;
+	const struct cs_dsp_region *mem;
+
+	mem = cs_dsp_find_region(dsp, alg_region->type);
+	if (!mem) {
+		cs_dsp_err(dsp, "No base for region %x\n",
+			   alg_region->type);
+		return -EINVAL;
+	}
+
+	*reg = dsp->ops->region_to_reg(mem, ctl->alg_region.base + ctl->offset);
+
+	return 0;
+}
+
+/**
+ * cs_dsp_coeff_write_acked_control() - Sends event_id to the acked control
+ * @ctl: pointer to acked coefficient control
+ * @event_id: the value to write to the given acked control
+ *
+ * Once the value has been written to the control the function shall block
+ * until the running firmware acknowledges the write or timeout is exceeded.
+ *
+ * Must be called with pwr_lock held.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id)
+{
+	struct cs_dsp *dsp = ctl->dsp;
+	__be32 val = cpu_to_be32(event_id);
+	unsigned int reg;
+	int i, ret;
+
+	if (!dsp->running)
+		return -EPERM;
+
+	ret = cs_dsp_coeff_base_reg(ctl, &reg);
+	if (ret)
+		return ret;
+
+	cs_dsp_dbg(dsp, "Sending 0x%x to acked control alg 0x%x %s:0x%x\n",
+		   event_id, ctl->alg_region.alg,
+		   cs_dsp_mem_region_name(ctl->alg_region.type), ctl->offset);
+
+	ret = regmap_raw_write(dsp->regmap, reg, &val, sizeof(val));
+	if (ret) {
+		cs_dsp_err(dsp, "Failed to write %x: %d\n", reg, ret);
+		return ret;
+	}
+
+	/*
+	 * Poll for ack, we initially poll at ~1ms intervals for firmwares
+	 * that respond quickly, then go to ~10ms polls. A firmware is unlikely
+	 * to ack instantly so we do the first 1ms delay before reading the
+	 * control to avoid a pointless bus transaction
+	 */
+	for (i = 0; i < CS_DSP_ACKED_CTL_TIMEOUT_MS;) {
+		switch (i) {
+		case 0 ... CS_DSP_ACKED_CTL_N_QUICKPOLLS - 1:
+			usleep_range(1000, 2000);
+			i++;
+			break;
+		default:
+			usleep_range(10000, 20000);
+			i += 10;
+			break;
+		}
+
+		ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val));
+		if (ret) {
+			cs_dsp_err(dsp, "Failed to read %x: %d\n", reg, ret);
+			return ret;
+		}
+
+		if (val == 0) {
+			cs_dsp_dbg(dsp, "Acked control ACKED at poll %u\n", i);
+			return 0;
+		}
+	}
+
+	cs_dsp_warn(dsp, "Acked control @0x%x alg:0x%x %s:0x%x timed out\n",
+		    reg, ctl->alg_region.alg,
+		    cs_dsp_mem_region_name(ctl->alg_region.type),
+		    ctl->offset);
+
+	return -ETIMEDOUT;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_coeff_write_acked_control);
+
+static int cs_dsp_coeff_write_ctrl_raw(struct cs_dsp_coeff_ctl *ctl,
+				       const void *buf, size_t len)
+{
+	struct cs_dsp *dsp = ctl->dsp;
+	void *scratch;
+	int ret;
+	unsigned int reg;
+
+	ret = cs_dsp_coeff_base_reg(ctl, &reg);
+	if (ret)
+		return ret;
+
+	scratch = kmemdup(buf, len, GFP_KERNEL | GFP_DMA);
+	if (!scratch)
+		return -ENOMEM;
+
+	ret = regmap_raw_write(dsp->regmap, reg, scratch,
+			       len);
+	if (ret) {
+		cs_dsp_err(dsp, "Failed to write %zu bytes to %x: %d\n",
+			   len, reg, ret);
+		kfree(scratch);
+		return ret;
+	}
+	cs_dsp_dbg(dsp, "Wrote %zu bytes to %x\n", len, reg);
+
+	kfree(scratch);
+
+	return 0;
+}
+
+/**
+ * cs_dsp_coeff_write_ctrl() - Writes the given buffer to the given coefficient control
+ * @ctl: pointer to coefficient control
+ * @buf: the buffer to write to the given control
+ * @len: the length of the buffer
+ *
+ * Must be called with pwr_lock held.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, const void *buf, size_t len)
+{
+	int ret = 0;
+
+	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+		ret = -EPERM;
+	else if (buf != ctl->cache)
+		memcpy(ctl->cache, buf, len);
+
+	ctl->set = 1;
+	if (ctl->enabled && ctl->dsp->running)
+		ret = cs_dsp_coeff_write_ctrl_raw(ctl, buf, len);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_coeff_write_ctrl);
+
+static int cs_dsp_coeff_read_ctrl_raw(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len)
+{
+	struct cs_dsp *dsp = ctl->dsp;
+	void *scratch;
+	int ret;
+	unsigned int reg;
+
+	ret = cs_dsp_coeff_base_reg(ctl, &reg);
+	if (ret)
+		return ret;
+
+	scratch = kmalloc(len, GFP_KERNEL | GFP_DMA);
+	if (!scratch)
+		return -ENOMEM;
+
+	ret = regmap_raw_read(dsp->regmap, reg, scratch, len);
+	if (ret) {
+		cs_dsp_err(dsp, "Failed to read %zu bytes from %x: %d\n",
+			   len, reg, ret);
+		kfree(scratch);
+		return ret;
+	}
+	cs_dsp_dbg(dsp, "Read %zu bytes from %x\n", len, reg);
+
+	memcpy(buf, scratch, len);
+	kfree(scratch);
+
+	return 0;
+}
+
+/**
+ * cs_dsp_coeff_read_ctrl() - Reads the given coefficient control into the given buffer
+ * @ctl: pointer to coefficient control
+ * @buf: the buffer to store to the given control
+ * @len: the length of the buffer
+ *
+ * Must be called with pwr_lock held.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len)
+{
+	int ret = 0;
+
+	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) {
+		if (ctl->enabled && ctl->dsp->running)
+			return cs_dsp_coeff_read_ctrl_raw(ctl, buf, len);
+		else
+			return -EPERM;
+	} else {
+		if (!ctl->flags && ctl->enabled && ctl->dsp->running)
+			ret = cs_dsp_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len);
+
+		if (buf != ctl->cache)
+			memcpy(buf, ctl->cache, len);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_coeff_read_ctrl);
+
+static int cs_dsp_coeff_init_control_caches(struct cs_dsp *dsp)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+	int ret;
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list) {
+		if (!ctl->enabled || ctl->set)
+			continue;
+		if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
+			continue;
+
+		/*
+		 * For readable controls populate the cache from the DSP memory.
+		 * For non-readable controls the cache was zero-filled when
+		 * created so we don't need to do anything.
+		 */
+		if (!ctl->flags || (ctl->flags & WMFW_CTL_FLAG_READABLE)) {
+			ret = cs_dsp_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int cs_dsp_coeff_sync_controls(struct cs_dsp *dsp)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+	int ret;
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list) {
+		if (!ctl->enabled)
+			continue;
+		if (ctl->set && !(ctl->flags & WMFW_CTL_FLAG_VOLATILE)) {
+			ret = cs_dsp_coeff_write_ctrl_raw(ctl, ctl->cache,
+							  ctl->len);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void cs_dsp_signal_event_controls(struct cs_dsp *dsp,
+					 unsigned int event)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+	int ret;
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list) {
+		if (ctl->type != WMFW_CTL_TYPE_HOSTEVENT)
+			continue;
+
+		if (!ctl->enabled)
+			continue;
+
+		ret = cs_dsp_coeff_write_acked_control(ctl, event);
+		if (ret)
+			cs_dsp_warn(dsp,
+				    "Failed to send 0x%x event to alg 0x%x (%d)\n",
+				    event, ctl->alg_region.alg, ret);
+	}
+}
+
+static void cs_dsp_free_ctl_blk(struct cs_dsp_coeff_ctl *ctl)
+{
+	kfree(ctl->cache);
+	kfree(ctl->subname);
+	kfree(ctl);
+}
+
+static int cs_dsp_create_control(struct cs_dsp *dsp,
+				 const struct cs_dsp_alg_region *alg_region,
+				 unsigned int offset, unsigned int len,
+				 const char *subname, unsigned int subname_len,
+				 unsigned int flags, unsigned int type)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+	int ret;
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list) {
+		if (ctl->fw_name == dsp->fw_name &&
+		    ctl->alg_region.alg == alg_region->alg &&
+		    ctl->alg_region.type == alg_region->type) {
+			if ((!subname && !ctl->subname) ||
+			    (subname && !strncmp(ctl->subname, subname, ctl->subname_len))) {
+				if (!ctl->enabled)
+					ctl->enabled = 1;
+				return 0;
+			}
+		}
+	}
+
+	ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
+	if (!ctl)
+		return -ENOMEM;
+
+	ctl->fw_name = dsp->fw_name;
+	ctl->alg_region = *alg_region;
+	if (subname && dsp->fw_ver >= 2) {
+		ctl->subname_len = subname_len;
+		ctl->subname = kmemdup(subname,
+				       strlen(subname) + 1, GFP_KERNEL);
+		if (!ctl->subname) {
+			ret = -ENOMEM;
+			goto err_ctl;
+		}
+	}
+	ctl->enabled = 1;
+	ctl->set = 0;
+	ctl->dsp = dsp;
+
+	ctl->flags = flags;
+	ctl->type = type;
+	ctl->offset = offset;
+	ctl->len = len;
+	ctl->cache = kzalloc(ctl->len, GFP_KERNEL);
+	if (!ctl->cache) {
+		ret = -ENOMEM;
+		goto err_ctl_subname;
+	}
+
+	list_add(&ctl->list, &dsp->ctl_list);
+
+	if (dsp->client_ops->control_add) {
+		ret = dsp->client_ops->control_add(ctl);
+		if (ret)
+			goto err_list_del;
+	}
+
+	return 0;
+
+err_list_del:
+	list_del(&ctl->list);
+	kfree(ctl->cache);
+err_ctl_subname:
+	kfree(ctl->subname);
+err_ctl:
+	kfree(ctl);
+
+	return ret;
+}
+
+struct cs_dsp_coeff_parsed_alg {
+	int id;
+	const u8 *name;
+	int name_len;
+	int ncoeff;
+};
+
+struct cs_dsp_coeff_parsed_coeff {
+	int offset;
+	int mem_type;
+	const u8 *name;
+	int name_len;
+	unsigned int ctl_type;
+	int flags;
+	int len;
+};
+
+static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str)
+{
+	int length;
+
+	switch (bytes) {
+	case 1:
+		length = **pos;
+		break;
+	case 2:
+		length = le16_to_cpu(*((__le16 *)*pos));
+		break;
+	default:
+		return 0;
+	}
+
+	if (str)
+		*str = *pos + bytes;
+
+	*pos += ((length + bytes) + 3) & ~0x03;
+
+	return length;
+}
+
+static int cs_dsp_coeff_parse_int(int bytes, const u8 **pos)
+{
+	int val = 0;
+
+	switch (bytes) {
+	case 2:
+		val = le16_to_cpu(*((__le16 *)*pos));
+		break;
+	case 4:
+		val = le32_to_cpu(*((__le32 *)*pos));
+		break;
+	default:
+		break;
+	}
+
+	*pos += bytes;
+
+	return val;
+}
+
+static inline void cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, const u8 **data,
+					  struct cs_dsp_coeff_parsed_alg *blk)
+{
+	const struct wmfw_adsp_alg_data *raw;
+
+	switch (dsp->fw_ver) {
+	case 0:
+	case 1:
+		raw = (const struct wmfw_adsp_alg_data *)*data;
+		*data = raw->data;
+
+		blk->id = le32_to_cpu(raw->id);
+		blk->name = raw->name;
+		blk->name_len = strlen(raw->name);
+		blk->ncoeff = le32_to_cpu(raw->ncoeff);
+		break;
+	default:
+		blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), data);
+		blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), data,
+							  &blk->name);
+		cs_dsp_coeff_parse_string(sizeof(u16), data, NULL);
+		blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), data);
+		break;
+	}
+
+	cs_dsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id);
+	cs_dsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name);
+	cs_dsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff);
+}
+
+static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data,
+					    struct cs_dsp_coeff_parsed_coeff *blk)
+{
+	const struct wmfw_adsp_coeff_data *raw;
+	const u8 *tmp;
+	int length;
+
+	switch (dsp->fw_ver) {
+	case 0:
+	case 1:
+		raw = (const struct wmfw_adsp_coeff_data *)*data;
+		*data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size);
+
+		blk->offset = le16_to_cpu(raw->hdr.offset);
+		blk->mem_type = le16_to_cpu(raw->hdr.type);
+		blk->name = raw->name;
+		blk->name_len = strlen(raw->name);
+		blk->ctl_type = le16_to_cpu(raw->ctl_type);
+		blk->flags = le16_to_cpu(raw->flags);
+		blk->len = le32_to_cpu(raw->len);
+		break;
+	default:
+		tmp = *data;
+		blk->offset = cs_dsp_coeff_parse_int(sizeof(raw->hdr.offset), &tmp);
+		blk->mem_type = cs_dsp_coeff_parse_int(sizeof(raw->hdr.type), &tmp);
+		length = cs_dsp_coeff_parse_int(sizeof(raw->hdr.size), &tmp);
+		blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp,
+							  &blk->name);
+		cs_dsp_coeff_parse_string(sizeof(u8), &tmp, NULL);
+		cs_dsp_coeff_parse_string(sizeof(u16), &tmp, NULL);
+		blk->ctl_type = cs_dsp_coeff_parse_int(sizeof(raw->ctl_type), &tmp);
+		blk->flags = cs_dsp_coeff_parse_int(sizeof(raw->flags), &tmp);
+		blk->len = cs_dsp_coeff_parse_int(sizeof(raw->len), &tmp);
+
+		*data = *data + sizeof(raw->hdr) + length;
+		break;
+	}
+
+	cs_dsp_dbg(dsp, "\tCoefficient type: %#x\n", blk->mem_type);
+	cs_dsp_dbg(dsp, "\tCoefficient offset: %#x\n", blk->offset);
+	cs_dsp_dbg(dsp, "\tCoefficient name: %.*s\n", blk->name_len, blk->name);
+	cs_dsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags);
+	cs_dsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type);
+	cs_dsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len);
+}
+
+static int cs_dsp_check_coeff_flags(struct cs_dsp *dsp,
+				    const struct cs_dsp_coeff_parsed_coeff *coeff_blk,
+				    unsigned int f_required,
+				    unsigned int f_illegal)
+{
+	if ((coeff_blk->flags & f_illegal) ||
+	    ((coeff_blk->flags & f_required) != f_required)) {
+		cs_dsp_err(dsp, "Illegal flags 0x%x for control type 0x%x\n",
+			   coeff_blk->flags, coeff_blk->ctl_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cs_dsp_parse_coeff(struct cs_dsp *dsp,
+			      const struct wmfw_region *region)
+{
+	struct cs_dsp_alg_region alg_region = {};
+	struct cs_dsp_coeff_parsed_alg alg_blk;
+	struct cs_dsp_coeff_parsed_coeff coeff_blk;
+	const u8 *data = region->data;
+	int i, ret;
+
+	cs_dsp_coeff_parse_alg(dsp, &data, &alg_blk);
+	for (i = 0; i < alg_blk.ncoeff; i++) {
+		cs_dsp_coeff_parse_coeff(dsp, &data, &coeff_blk);
+
+		switch (coeff_blk.ctl_type) {
+		case WMFW_CTL_TYPE_BYTES:
+			break;
+		case WMFW_CTL_TYPE_ACKED:
+			if (coeff_blk.flags & WMFW_CTL_FLAG_SYS)
+				continue;	/* ignore */
+
+			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
+						       WMFW_CTL_FLAG_VOLATILE |
+						       WMFW_CTL_FLAG_WRITEABLE |
+						       WMFW_CTL_FLAG_READABLE,
+						       0);
+			if (ret)
+				return -EINVAL;
+			break;
+		case WMFW_CTL_TYPE_HOSTEVENT:
+			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
+						       WMFW_CTL_FLAG_SYS |
+						       WMFW_CTL_FLAG_VOLATILE |
+						       WMFW_CTL_FLAG_WRITEABLE |
+						       WMFW_CTL_FLAG_READABLE,
+						       0);
+			if (ret)
+				return -EINVAL;
+			break;
+		case WMFW_CTL_TYPE_HOST_BUFFER:
+			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
+						       WMFW_CTL_FLAG_SYS |
+						       WMFW_CTL_FLAG_VOLATILE |
+						       WMFW_CTL_FLAG_READABLE,
+						       0);
+			if (ret)
+				return -EINVAL;
+			break;
+		default:
+			cs_dsp_err(dsp, "Unknown control type: %d\n",
+				   coeff_blk.ctl_type);
+			return -EINVAL;
+		}
+
+		alg_region.type = coeff_blk.mem_type;
+		alg_region.alg = alg_blk.id;
+
+		ret = cs_dsp_create_control(dsp, &alg_region,
+					    coeff_blk.offset,
+					    coeff_blk.len,
+					    coeff_blk.name,
+					    coeff_blk.name_len,
+					    coeff_blk.flags,
+					    coeff_blk.ctl_type);
+		if (ret < 0)
+			cs_dsp_err(dsp, "Failed to create control: %.*s, %d\n",
+				   coeff_blk.name_len, coeff_blk.name, ret);
+	}
+
+	return 0;
+}
+
+static unsigned int cs_dsp_adsp1_parse_sizes(struct cs_dsp *dsp,
+					     const char * const file,
+					     unsigned int pos,
+					     const struct firmware *firmware)
+{
+	const struct wmfw_adsp1_sizes *adsp1_sizes;
+
+	adsp1_sizes = (void *)&firmware->data[pos];
+
+	cs_dsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file,
+		   le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm),
+		   le32_to_cpu(adsp1_sizes->zm));
+
+	return pos + sizeof(*adsp1_sizes);
+}
+
+static unsigned int cs_dsp_adsp2_parse_sizes(struct cs_dsp *dsp,
+					     const char * const file,
+					     unsigned int pos,
+					     const struct firmware *firmware)
+{
+	const struct wmfw_adsp2_sizes *adsp2_sizes;
+
+	adsp2_sizes = (void *)&firmware->data[pos];
+
+	cs_dsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file,
+		   le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym),
+		   le32_to_cpu(adsp2_sizes->pm), le32_to_cpu(adsp2_sizes->zm));
+
+	return pos + sizeof(*adsp2_sizes);
+}
+
+static bool cs_dsp_validate_version(struct cs_dsp *dsp, unsigned int version)
+{
+	switch (version) {
+	case 0:
+		cs_dsp_warn(dsp, "Deprecated file format %d\n", version);
+		return true;
+	case 1:
+	case 2:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool cs_dsp_halo_validate_version(struct cs_dsp *dsp, unsigned int version)
+{
+	switch (version) {
+	case 3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
+		       const char *file)
+{
+	LIST_HEAD(buf_list);
+	struct regmap *regmap = dsp->regmap;
+	unsigned int pos = 0;
+	const struct wmfw_header *header;
+	const struct wmfw_adsp1_sizes *adsp1_sizes;
+	const struct wmfw_footer *footer;
+	const struct wmfw_region *region;
+	const struct cs_dsp_region *mem;
+	const char *region_name;
+	char *text = NULL;
+	struct cs_dsp_buf *buf;
+	unsigned int reg;
+	int regions = 0;
+	int ret, offset, type;
+
+	ret = -EINVAL;
+
+	pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer);
+	if (pos >= firmware->size) {
+		cs_dsp_err(dsp, "%s: file too short, %zu bytes\n",
+			   file, firmware->size);
+		goto out_fw;
+	}
+
+	header = (void *)&firmware->data[0];
+
+	if (memcmp(&header->magic[0], "WMFW", 4) != 0) {
+		cs_dsp_err(dsp, "%s: invalid magic\n", file);
+		goto out_fw;
+	}
+
+	if (!dsp->ops->validate_version(dsp, header->ver)) {
+		cs_dsp_err(dsp, "%s: unknown file format %d\n",
+			   file, header->ver);
+		goto out_fw;
+	}
+
+	cs_dsp_info(dsp, "Firmware version: %d\n", header->ver);
+	dsp->fw_ver = header->ver;
+
+	if (header->core != dsp->type) {
+		cs_dsp_err(dsp, "%s: invalid core %d != %d\n",
+			   file, header->core, dsp->type);
+		goto out_fw;
+	}
+
+	pos = sizeof(*header);
+	pos = dsp->ops->parse_sizes(dsp, file, pos, firmware);
+
+	footer = (void *)&firmware->data[pos];
+	pos += sizeof(*footer);
+
+	if (le32_to_cpu(header->len) != pos) {
+		cs_dsp_err(dsp, "%s: unexpected header length %d\n",
+			   file, le32_to_cpu(header->len));
+		goto out_fw;
+	}
+
+	cs_dsp_dbg(dsp, "%s: timestamp %llu\n", file,
+		   le64_to_cpu(footer->timestamp));
+
+	while (pos < firmware->size &&
+	       sizeof(*region) < firmware->size - pos) {
+		region = (void *)&(firmware->data[pos]);
+		region_name = "Unknown";
+		reg = 0;
+		text = NULL;
+		offset = le32_to_cpu(region->offset) & 0xffffff;
+		type = be32_to_cpu(region->type) & 0xff;
+
+		switch (type) {
+		case WMFW_NAME_TEXT:
+			region_name = "Firmware name";
+			text = kzalloc(le32_to_cpu(region->len) + 1,
+				       GFP_KERNEL);
+			break;
+		case WMFW_ALGORITHM_DATA:
+			region_name = "Algorithm";
+			ret = cs_dsp_parse_coeff(dsp, region);
+			if (ret != 0)
+				goto out_fw;
+			break;
+		case WMFW_INFO_TEXT:
+			region_name = "Information";
+			text = kzalloc(le32_to_cpu(region->len) + 1,
+				       GFP_KERNEL);
+			break;
+		case WMFW_ABSOLUTE:
+			region_name = "Absolute";
+			reg = offset;
+			break;
+		case WMFW_ADSP1_PM:
+		case WMFW_ADSP1_DM:
+		case WMFW_ADSP2_XM:
+		case WMFW_ADSP2_YM:
+		case WMFW_ADSP1_ZM:
+		case WMFW_HALO_PM_PACKED:
+		case WMFW_HALO_XM_PACKED:
+		case WMFW_HALO_YM_PACKED:
+			mem = cs_dsp_find_region(dsp, type);
+			if (!mem) {
+				cs_dsp_err(dsp, "No region of type: %x\n", type);
+				ret = -EINVAL;
+				goto out_fw;
+			}
+
+			region_name = cs_dsp_mem_region_name(type);
+			reg = dsp->ops->region_to_reg(mem, offset);
+			break;
+		default:
+			cs_dsp_warn(dsp,
+				    "%s.%d: Unknown region type %x at %d(%x)\n",
+				    file, regions, type, pos, pos);
+			break;
+		}
+
+		cs_dsp_dbg(dsp, "%s.%d: %d bytes at %d in %s\n", file,
+			   regions, le32_to_cpu(region->len), offset,
+			   region_name);
+
+		if (le32_to_cpu(region->len) >
+		    firmware->size - pos - sizeof(*region)) {
+			cs_dsp_err(dsp,
+				   "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+				   file, regions, region_name,
+				   le32_to_cpu(region->len), firmware->size);
+			ret = -EINVAL;
+			goto out_fw;
+		}
+
+		if (text) {
+			memcpy(text, region->data, le32_to_cpu(region->len));
+			cs_dsp_info(dsp, "%s: %s\n", file, text);
+			kfree(text);
+			text = NULL;
+		}
+
+		if (reg) {
+			buf = cs_dsp_buf_alloc(region->data,
+					       le32_to_cpu(region->len),
+					       &buf_list);
+			if (!buf) {
+				cs_dsp_err(dsp, "Out of memory\n");
+				ret = -ENOMEM;
+				goto out_fw;
+			}
+
+			ret = regmap_raw_write_async(regmap, reg, buf->buf,
+						     le32_to_cpu(region->len));
+			if (ret != 0) {
+				cs_dsp_err(dsp,
+					   "%s.%d: Failed to write %d bytes at %d in %s: %d\n",
+					   file, regions,
+					   le32_to_cpu(region->len), offset,
+					   region_name, ret);
+				goto out_fw;
+			}
+		}
+
+		pos += le32_to_cpu(region->len) + sizeof(*region);
+		regions++;
+	}
+
+	ret = regmap_async_complete(regmap);
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret);
+		goto out_fw;
+	}
+
+	if (pos > firmware->size)
+		cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n",
+			    file, regions, pos - firmware->size);
+
+	cs_dsp_debugfs_save_wmfwname(dsp, file);
+
+out_fw:
+	regmap_async_complete(regmap);
+	cs_dsp_buf_free(&buf_list);
+	kfree(text);
+
+	return ret;
+}
+
+/**
+ * cs_dsp_get_ctl() - Finds a matching coefficient control
+ * @dsp: pointer to DSP structure
+ * @name: pointer to string to match with a control's subname
+ * @type: the algorithm type to match
+ * @alg: the algorithm id to match
+ *
+ * Find cs_dsp_coeff_ctl with input name as its subname
+ *
+ * Return: pointer to the control on success, NULL if not found
+ */
+struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type,
+					unsigned int alg)
+{
+	struct cs_dsp_coeff_ctl *pos, *rslt = NULL;
+
+	list_for_each_entry(pos, &dsp->ctl_list, list) {
+		if (!pos->subname)
+			continue;
+		if (strncmp(pos->subname, name, pos->subname_len) == 0 &&
+		    pos->fw_name == dsp->fw_name &&
+		    pos->alg_region.alg == alg &&
+		    pos->alg_region.type == type) {
+			rslt = pos;
+			break;
+		}
+	}
+
+	return rslt;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_get_ctl);
+
+static void cs_dsp_ctl_fixup_base(struct cs_dsp *dsp,
+				  const struct cs_dsp_alg_region *alg_region)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list) {
+		if (ctl->fw_name == dsp->fw_name &&
+		    alg_region->alg == ctl->alg_region.alg &&
+		    alg_region->type == ctl->alg_region.type) {
+			ctl->alg_region.base = alg_region->base;
+		}
+	}
+}
+
+static void *cs_dsp_read_algs(struct cs_dsp *dsp, size_t n_algs,
+			      const struct cs_dsp_region *mem,
+			      unsigned int pos, unsigned int len)
+{
+	void *alg;
+	unsigned int reg;
+	int ret;
+	__be32 val;
+
+	if (n_algs == 0) {
+		cs_dsp_err(dsp, "No algorithms\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (n_algs > 1024) {
+		cs_dsp_err(dsp, "Algorithm count %zx excessive\n", n_algs);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Read the terminator first to validate the length */
+	reg = dsp->ops->region_to_reg(mem, pos + len);
+
+	ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val));
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to read algorithm list end: %d\n",
+			   ret);
+		return ERR_PTR(ret);
+	}
+
+	if (be32_to_cpu(val) != 0xbedead)
+		cs_dsp_warn(dsp, "Algorithm list end %x 0x%x != 0xbedead\n",
+			    reg, be32_to_cpu(val));
+
+	/* Convert length from DSP words to bytes */
+	len *= sizeof(u32);
+
+	alg = kzalloc(len, GFP_KERNEL | GFP_DMA);
+	if (!alg)
+		return ERR_PTR(-ENOMEM);
+
+	reg = dsp->ops->region_to_reg(mem, pos);
+
+	ret = regmap_raw_read(dsp->regmap, reg, alg, len);
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to read algorithm list: %d\n", ret);
+		kfree(alg);
+		return ERR_PTR(ret);
+	}
+
+	return alg;
+}
+
+/**
+ * cs_dsp_find_alg_region() - Finds a matching algorithm region
+ * @dsp: pointer to DSP structure
+ * @type: the algorithm type to match
+ * @id: the algorithm id to match
+ *
+ * Return: Pointer to matching algorithm region, or NULL if not found.
+ */
+struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp,
+						 int type, unsigned int id)
+{
+	struct cs_dsp_alg_region *alg_region;
+
+	list_for_each_entry(alg_region, &dsp->alg_regions, list) {
+		if (id == alg_region->alg && type == alg_region->type)
+			return alg_region;
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_find_alg_region);
+
+static struct cs_dsp_alg_region *cs_dsp_create_region(struct cs_dsp *dsp,
+						      int type, __be32 id,
+						      __be32 base)
+{
+	struct cs_dsp_alg_region *alg_region;
+
+	alg_region = kzalloc(sizeof(*alg_region), GFP_KERNEL);
+	if (!alg_region)
+		return ERR_PTR(-ENOMEM);
+
+	alg_region->type = type;
+	alg_region->alg = be32_to_cpu(id);
+	alg_region->base = be32_to_cpu(base);
+
+	list_add_tail(&alg_region->list, &dsp->alg_regions);
+
+	if (dsp->fw_ver > 0)
+		cs_dsp_ctl_fixup_base(dsp, alg_region);
+
+	return alg_region;
+}
+
+static void cs_dsp_free_alg_regions(struct cs_dsp *dsp)
+{
+	struct cs_dsp_alg_region *alg_region;
+
+	while (!list_empty(&dsp->alg_regions)) {
+		alg_region = list_first_entry(&dsp->alg_regions,
+					      struct cs_dsp_alg_region,
+					      list);
+		list_del(&alg_region->list);
+		kfree(alg_region);
+	}
+}
+
+static void cs_dsp_parse_wmfw_id_header(struct cs_dsp *dsp,
+					struct wmfw_id_hdr *fw, int nalgs)
+{
+	dsp->fw_id = be32_to_cpu(fw->id);
+	dsp->fw_id_version = be32_to_cpu(fw->ver);
+
+	cs_dsp_info(dsp, "Firmware: %x v%d.%d.%d, %d algorithms\n",
+		    dsp->fw_id, (dsp->fw_id_version & 0xff0000) >> 16,
+		    (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff,
+		    nalgs);
+}
+
+static void cs_dsp_parse_wmfw_v3_id_header(struct cs_dsp *dsp,
+					   struct wmfw_v3_id_hdr *fw, int nalgs)
+{
+	dsp->fw_id = be32_to_cpu(fw->id);
+	dsp->fw_id_version = be32_to_cpu(fw->ver);
+	dsp->fw_vendor_id = be32_to_cpu(fw->vendor_id);
+
+	cs_dsp_info(dsp, "Firmware: %x vendor: 0x%x v%d.%d.%d, %d algorithms\n",
+		    dsp->fw_id, dsp->fw_vendor_id,
+		    (dsp->fw_id_version & 0xff0000) >> 16,
+		    (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff,
+		    nalgs);
+}
+
+static int cs_dsp_create_regions(struct cs_dsp *dsp, __be32 id, int nregions,
+				 const int *type, __be32 *base)
+{
+	struct cs_dsp_alg_region *alg_region;
+	int i;
+
+	for (i = 0; i < nregions; i++) {
+		alg_region = cs_dsp_create_region(dsp, type[i], id, base[i]);
+		if (IS_ERR(alg_region))
+			return PTR_ERR(alg_region);
+	}
+
+	return 0;
+}
+
+static int cs_dsp_adsp1_setup_algs(struct cs_dsp *dsp)
+{
+	struct wmfw_adsp1_id_hdr adsp1_id;
+	struct wmfw_adsp1_alg_hdr *adsp1_alg;
+	struct cs_dsp_alg_region *alg_region;
+	const struct cs_dsp_region *mem;
+	unsigned int pos, len;
+	size_t n_algs;
+	int i, ret;
+
+	mem = cs_dsp_find_region(dsp, WMFW_ADSP1_DM);
+	if (WARN_ON(!mem))
+		return -EINVAL;
+
+	ret = regmap_raw_read(dsp->regmap, mem->base, &adsp1_id,
+			      sizeof(adsp1_id));
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
+			   ret);
+		return ret;
+	}
+
+	n_algs = be32_to_cpu(adsp1_id.n_algs);
+
+	cs_dsp_parse_wmfw_id_header(dsp, &adsp1_id.fw, n_algs);
+
+	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_ZM,
+					  adsp1_id.fw.id, adsp1_id.zm);
+	if (IS_ERR(alg_region))
+		return PTR_ERR(alg_region);
+
+	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_DM,
+					  adsp1_id.fw.id, adsp1_id.dm);
+	if (IS_ERR(alg_region))
+		return PTR_ERR(alg_region);
+
+	/* Calculate offset and length in DSP words */
+	pos = sizeof(adsp1_id) / sizeof(u32);
+	len = (sizeof(*adsp1_alg) * n_algs) / sizeof(u32);
+
+	adsp1_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
+	if (IS_ERR(adsp1_alg))
+		return PTR_ERR(adsp1_alg);
+
+	for (i = 0; i < n_algs; i++) {
+		cs_dsp_info(dsp, "%d: ID %x v%d.%d.%d DM@%x ZM@%x\n",
+			    i, be32_to_cpu(adsp1_alg[i].alg.id),
+			    (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff0000) >> 16,
+			    (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff00) >> 8,
+			    be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff,
+			    be32_to_cpu(adsp1_alg[i].dm),
+			    be32_to_cpu(adsp1_alg[i].zm));
+
+		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_DM,
+						  adsp1_alg[i].alg.id,
+						  adsp1_alg[i].dm);
+		if (IS_ERR(alg_region)) {
+			ret = PTR_ERR(alg_region);
+			goto out;
+		}
+		if (dsp->fw_ver == 0) {
+			if (i + 1 < n_algs) {
+				len = be32_to_cpu(adsp1_alg[i + 1].dm);
+				len -= be32_to_cpu(adsp1_alg[i].dm);
+				len *= 4;
+				cs_dsp_create_control(dsp, alg_region, 0,
+						      len, NULL, 0, 0,
+						      WMFW_CTL_TYPE_BYTES);
+			} else {
+				cs_dsp_warn(dsp, "Missing length info for region DM with ID %x\n",
+					    be32_to_cpu(adsp1_alg[i].alg.id));
+			}
+		}
+
+		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_ZM,
+						  adsp1_alg[i].alg.id,
+						  adsp1_alg[i].zm);
+		if (IS_ERR(alg_region)) {
+			ret = PTR_ERR(alg_region);
+			goto out;
+		}
+		if (dsp->fw_ver == 0) {
+			if (i + 1 < n_algs) {
+				len = be32_to_cpu(adsp1_alg[i + 1].zm);
+				len -= be32_to_cpu(adsp1_alg[i].zm);
+				len *= 4;
+				cs_dsp_create_control(dsp, alg_region, 0,
+						      len, NULL, 0, 0,
+						      WMFW_CTL_TYPE_BYTES);
+			} else {
+				cs_dsp_warn(dsp, "Missing length info for region ZM with ID %x\n",
+					    be32_to_cpu(adsp1_alg[i].alg.id));
+			}
+		}
+	}
+
+out:
+	kfree(adsp1_alg);
+	return ret;
+}
+
+static int cs_dsp_adsp2_setup_algs(struct cs_dsp *dsp)
+{
+	struct wmfw_adsp2_id_hdr adsp2_id;
+	struct wmfw_adsp2_alg_hdr *adsp2_alg;
+	struct cs_dsp_alg_region *alg_region;
+	const struct cs_dsp_region *mem;
+	unsigned int pos, len;
+	size_t n_algs;
+	int i, ret;
+
+	mem = cs_dsp_find_region(dsp, WMFW_ADSP2_XM);
+	if (WARN_ON(!mem))
+		return -EINVAL;
+
+	ret = regmap_raw_read(dsp->regmap, mem->base, &adsp2_id,
+			      sizeof(adsp2_id));
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
+			   ret);
+		return ret;
+	}
+
+	n_algs = be32_to_cpu(adsp2_id.n_algs);
+
+	cs_dsp_parse_wmfw_id_header(dsp, &adsp2_id.fw, n_algs);
+
+	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM,
+					  adsp2_id.fw.id, adsp2_id.xm);
+	if (IS_ERR(alg_region))
+		return PTR_ERR(alg_region);
+
+	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_YM,
+					  adsp2_id.fw.id, adsp2_id.ym);
+	if (IS_ERR(alg_region))
+		return PTR_ERR(alg_region);
+
+	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_ZM,
+					  adsp2_id.fw.id, adsp2_id.zm);
+	if (IS_ERR(alg_region))
+		return PTR_ERR(alg_region);
+
+	/* Calculate offset and length in DSP words */
+	pos = sizeof(adsp2_id) / sizeof(u32);
+	len = (sizeof(*adsp2_alg) * n_algs) / sizeof(u32);
+
+	adsp2_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
+	if (IS_ERR(adsp2_alg))
+		return PTR_ERR(adsp2_alg);
+
+	for (i = 0; i < n_algs; i++) {
+		cs_dsp_info(dsp,
+			    "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n",
+			    i, be32_to_cpu(adsp2_alg[i].alg.id),
+			    (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16,
+			    (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8,
+			    be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff,
+			    be32_to_cpu(adsp2_alg[i].xm),
+			    be32_to_cpu(adsp2_alg[i].ym),
+			    be32_to_cpu(adsp2_alg[i].zm));
+
+		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM,
+						  adsp2_alg[i].alg.id,
+						  adsp2_alg[i].xm);
+		if (IS_ERR(alg_region)) {
+			ret = PTR_ERR(alg_region);
+			goto out;
+		}
+		if (dsp->fw_ver == 0) {
+			if (i + 1 < n_algs) {
+				len = be32_to_cpu(adsp2_alg[i + 1].xm);
+				len -= be32_to_cpu(adsp2_alg[i].xm);
+				len *= 4;
+				cs_dsp_create_control(dsp, alg_region, 0,
+						      len, NULL, 0, 0,
+						      WMFW_CTL_TYPE_BYTES);
+			} else {
+				cs_dsp_warn(dsp, "Missing length info for region XM with ID %x\n",
+					    be32_to_cpu(adsp2_alg[i].alg.id));
+			}
+		}
+
+		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_YM,
+						  adsp2_alg[i].alg.id,
+						  adsp2_alg[i].ym);
+		if (IS_ERR(alg_region)) {
+			ret = PTR_ERR(alg_region);
+			goto out;
+		}
+		if (dsp->fw_ver == 0) {
+			if (i + 1 < n_algs) {
+				len = be32_to_cpu(adsp2_alg[i + 1].ym);
+				len -= be32_to_cpu(adsp2_alg[i].ym);
+				len *= 4;
+				cs_dsp_create_control(dsp, alg_region, 0,
+						      len, NULL, 0, 0,
+						      WMFW_CTL_TYPE_BYTES);
+			} else {
+				cs_dsp_warn(dsp, "Missing length info for region YM with ID %x\n",
+					    be32_to_cpu(adsp2_alg[i].alg.id));
+			}
+		}
+
+		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_ZM,
+						  adsp2_alg[i].alg.id,
+						  adsp2_alg[i].zm);
+		if (IS_ERR(alg_region)) {
+			ret = PTR_ERR(alg_region);
+			goto out;
+		}
+		if (dsp->fw_ver == 0) {
+			if (i + 1 < n_algs) {
+				len = be32_to_cpu(adsp2_alg[i + 1].zm);
+				len -= be32_to_cpu(adsp2_alg[i].zm);
+				len *= 4;
+				cs_dsp_create_control(dsp, alg_region, 0,
+						      len, NULL, 0, 0,
+						      WMFW_CTL_TYPE_BYTES);
+			} else {
+				cs_dsp_warn(dsp, "Missing length info for region ZM with ID %x\n",
+					    be32_to_cpu(adsp2_alg[i].alg.id));
+			}
+		}
+	}
+
+out:
+	kfree(adsp2_alg);
+	return ret;
+}
+
+static int cs_dsp_halo_create_regions(struct cs_dsp *dsp, __be32 id,
+				      __be32 xm_base, __be32 ym_base)
+{
+	static const int types[] = {
+		WMFW_ADSP2_XM, WMFW_HALO_XM_PACKED,
+		WMFW_ADSP2_YM, WMFW_HALO_YM_PACKED
+	};
+	__be32 bases[] = { xm_base, xm_base, ym_base, ym_base };
+
+	return cs_dsp_create_regions(dsp, id, ARRAY_SIZE(types), types, bases);
+}
+
+static int cs_dsp_halo_setup_algs(struct cs_dsp *dsp)
+{
+	struct wmfw_halo_id_hdr halo_id;
+	struct wmfw_halo_alg_hdr *halo_alg;
+	const struct cs_dsp_region *mem;
+	unsigned int pos, len;
+	size_t n_algs;
+	int i, ret;
+
+	mem = cs_dsp_find_region(dsp, WMFW_ADSP2_XM);
+	if (WARN_ON(!mem))
+		return -EINVAL;
+
+	ret = regmap_raw_read(dsp->regmap, mem->base, &halo_id,
+			      sizeof(halo_id));
+	if (ret != 0) {
+		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
+			   ret);
+		return ret;
+	}
+
+	n_algs = be32_to_cpu(halo_id.n_algs);
+
+	cs_dsp_parse_wmfw_v3_id_header(dsp, &halo_id.fw, n_algs);
+
+	ret = cs_dsp_halo_create_regions(dsp, halo_id.fw.id,
+					 halo_id.xm_base, halo_id.ym_base);
+	if (ret)
+		return ret;
+
+	/* Calculate offset and length in DSP words */
+	pos = sizeof(halo_id) / sizeof(u32);
+	len = (sizeof(*halo_alg) * n_algs) / sizeof(u32);
+
+	halo_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
+	if (IS_ERR(halo_alg))
+		return PTR_ERR(halo_alg);
+
+	for (i = 0; i < n_algs; i++) {
+		cs_dsp_info(dsp,
+			    "%d: ID %x v%d.%d.%d XM@%x YM@%x\n",
+			    i, be32_to_cpu(halo_alg[i].alg.id),
+			    (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16,
+			    (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8,
+			    be32_to_cpu(halo_alg[i].alg.ver) & 0xff,
+			    be32_to_cpu(halo_alg[i].xm_base),
+			    be32_to_cpu(halo_alg[i].ym_base));
+
+		ret = cs_dsp_halo_create_regions(dsp, halo_alg[i].alg.id,
+						 halo_alg[i].xm_base,
+						 halo_alg[i].ym_base);
+		if (ret)
+			goto out;
+	}
+
+out:
+	kfree(halo_alg);
+	return ret;
+}
+
+static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware,
+			     const char *file)
+{
+	LIST_HEAD(buf_list);
+	struct regmap *regmap = dsp->regmap;
+	struct wmfw_coeff_hdr *hdr;
+	struct wmfw_coeff_item *blk;
+	const struct cs_dsp_region *mem;
+	struct cs_dsp_alg_region *alg_region;
+	const char *region_name;
+	int ret, pos, blocks, type, offset, reg;
+	struct cs_dsp_buf *buf;
+
+	if (!firmware)
+		return 0;
+
+	ret = -EINVAL;
+
+	if (sizeof(*hdr) >= firmware->size) {
+		cs_dsp_err(dsp, "%s: coefficient file too short, %zu bytes\n",
+			   file, firmware->size);
+		goto out_fw;
+	}
+
+	hdr = (void *)&firmware->data[0];
+	if (memcmp(hdr->magic, "WMDR", 4) != 0) {
+		cs_dsp_err(dsp, "%s: invalid coefficient magic\n", file);
+		goto out_fw;
+	}
+
+	switch (be32_to_cpu(hdr->rev) & 0xff) {
+	case 1:
+		break;
+	default:
+		cs_dsp_err(dsp, "%s: Unsupported coefficient file format %d\n",
+			   file, be32_to_cpu(hdr->rev) & 0xff);
+		ret = -EINVAL;
+		goto out_fw;
+	}
+
+	cs_dsp_dbg(dsp, "%s: v%d.%d.%d\n", file,
+		   (le32_to_cpu(hdr->ver) >> 16) & 0xff,
+		   (le32_to_cpu(hdr->ver) >>  8) & 0xff,
+		   le32_to_cpu(hdr->ver) & 0xff);
+
+	pos = le32_to_cpu(hdr->len);
+
+	blocks = 0;
+	while (pos < firmware->size &&
+	       sizeof(*blk) < firmware->size - pos) {
+		blk = (void *)(&firmware->data[pos]);
+
+		type = le16_to_cpu(blk->type);
+		offset = le16_to_cpu(blk->offset);
+
+		cs_dsp_dbg(dsp, "%s.%d: %x v%d.%d.%d\n",
+			   file, blocks, le32_to_cpu(blk->id),
+			   (le32_to_cpu(blk->ver) >> 16) & 0xff,
+			   (le32_to_cpu(blk->ver) >>  8) & 0xff,
+			   le32_to_cpu(blk->ver) & 0xff);
+		cs_dsp_dbg(dsp, "%s.%d: %d bytes at 0x%x in %x\n",
+			   file, blocks, le32_to_cpu(blk->len), offset, type);
+
+		reg = 0;
+		region_name = "Unknown";
+		switch (type) {
+		case (WMFW_NAME_TEXT << 8):
+		case (WMFW_INFO_TEXT << 8):
+		case (WMFW_METADATA << 8):
+			break;
+		case (WMFW_ABSOLUTE << 8):
+			/*
+			 * Old files may use this for global
+			 * coefficients.
+			 */
+			if (le32_to_cpu(blk->id) == dsp->fw_id &&
+			    offset == 0) {
+				region_name = "global coefficients";
+				mem = cs_dsp_find_region(dsp, type);
+				if (!mem) {
+					cs_dsp_err(dsp, "No ZM\n");
+					break;
+				}
+				reg = dsp->ops->region_to_reg(mem, 0);
+
+			} else {
+				region_name = "register";
+				reg = offset;
+			}
+			break;
+
+		case WMFW_ADSP1_DM:
+		case WMFW_ADSP1_ZM:
+		case WMFW_ADSP2_XM:
+		case WMFW_ADSP2_YM:
+		case WMFW_HALO_XM_PACKED:
+		case WMFW_HALO_YM_PACKED:
+		case WMFW_HALO_PM_PACKED:
+			cs_dsp_dbg(dsp, "%s.%d: %d bytes in %x for %x\n",
+				   file, blocks, le32_to_cpu(blk->len),
+				   type, le32_to_cpu(blk->id));
+
+			mem = cs_dsp_find_region(dsp, type);
+			if (!mem) {
+				cs_dsp_err(dsp, "No base for region %x\n", type);
+				break;
+			}
+
+			alg_region = cs_dsp_find_alg_region(dsp, type,
+							    le32_to_cpu(blk->id));
+			if (alg_region) {
+				reg = alg_region->base;
+				reg = dsp->ops->region_to_reg(mem, reg);
+				reg += offset;
+			} else {
+				cs_dsp_err(dsp, "No %x for algorithm %x\n",
+					   type, le32_to_cpu(blk->id));
+			}
+			break;
+
+		default:
+			cs_dsp_err(dsp, "%s.%d: Unknown region type %x at %d\n",
+				   file, blocks, type, pos);
+			break;
+		}
+
+		if (reg) {
+			if (le32_to_cpu(blk->len) >
+			    firmware->size - pos - sizeof(*blk)) {
+				cs_dsp_err(dsp,
+					   "%s.%d: %s region len %d bytes exceeds file length %zu\n",
+					   file, blocks, region_name,
+					   le32_to_cpu(blk->len),
+					   firmware->size);
+				ret = -EINVAL;
+				goto out_fw;
+			}
+
+			buf = cs_dsp_buf_alloc(blk->data,
+					       le32_to_cpu(blk->len),
+					       &buf_list);
+			if (!buf) {
+				cs_dsp_err(dsp, "Out of memory\n");
+				ret = -ENOMEM;
+				goto out_fw;
+			}
+
+			cs_dsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n",
+				   file, blocks, le32_to_cpu(blk->len),
+				   reg);
+			ret = regmap_raw_write_async(regmap, reg, buf->buf,
+						     le32_to_cpu(blk->len));
+			if (ret != 0) {
+				cs_dsp_err(dsp,
+					   "%s.%d: Failed to write to %x in %s: %d\n",
+					   file, blocks, reg, region_name, ret);
+			}
+		}
+
+		pos += (le32_to_cpu(blk->len) + sizeof(*blk) + 3) & ~0x03;
+		blocks++;
+	}
+
+	ret = regmap_async_complete(regmap);
+	if (ret != 0)
+		cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret);
+
+	if (pos > firmware->size)
+		cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n",
+			    file, blocks, pos - firmware->size);
+
+	cs_dsp_debugfs_save_binname(dsp, file);
+
+out_fw:
+	regmap_async_complete(regmap);
+	cs_dsp_buf_free(&buf_list);
+	return ret;
+}
+
+static int cs_dsp_create_name(struct cs_dsp *dsp)
+{
+	if (!dsp->name) {
+		dsp->name = devm_kasprintf(dsp->dev, GFP_KERNEL, "DSP%d",
+					   dsp->num);
+		if (!dsp->name)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int cs_dsp_common_init(struct cs_dsp *dsp)
+{
+	int ret;
+
+	ret = cs_dsp_create_name(dsp);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&dsp->alg_regions);
+	INIT_LIST_HEAD(&dsp->ctl_list);
+
+	mutex_init(&dsp->pwr_lock);
+
+	return 0;
+}
+
+/**
+ * cs_dsp_adsp1_init() - Initialise a cs_dsp structure representing a ADSP1 device
+ * @dsp: pointer to DSP structure
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_adsp1_init(struct cs_dsp *dsp)
+{
+	dsp->ops = &cs_dsp_adsp1_ops;
+
+	return cs_dsp_common_init(dsp);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_adsp1_init);
+
+/**
+ * cs_dsp_adsp1_power_up() - Load and start the named firmware
+ * @dsp: pointer to DSP structure
+ * @wmfw_firmware: the firmware to be sent
+ * @wmfw_filename: file name of firmware to be sent
+ * @coeff_firmware: the coefficient data to be sent
+ * @coeff_filename: file name of coefficient to data be sent
+ * @fw_name: the user-friendly firmware name
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_adsp1_power_up(struct cs_dsp *dsp,
+			  const struct firmware *wmfw_firmware, char *wmfw_filename,
+			  const struct firmware *coeff_firmware, char *coeff_filename,
+			  const char *fw_name)
+{
+	unsigned int val;
+	int ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	dsp->fw_name = fw_name;
+
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
+			   ADSP1_SYS_ENA, ADSP1_SYS_ENA);
+
+	/*
+	 * For simplicity set the DSP clock rate to be the
+	 * SYSCLK rate rather than making it configurable.
+	 */
+	if (dsp->sysclk_reg) {
+		ret = regmap_read(dsp->regmap, dsp->sysclk_reg, &val);
+		if (ret != 0) {
+			cs_dsp_err(dsp, "Failed to read SYSCLK state: %d\n", ret);
+			goto err_mutex;
+		}
+
+		val = (val & dsp->sysclk_mask) >> dsp->sysclk_shift;
+
+		ret = regmap_update_bits(dsp->regmap,
+					 dsp->base + ADSP1_CONTROL_31,
+					 ADSP1_CLK_SEL_MASK, val);
+		if (ret != 0) {
+			cs_dsp_err(dsp, "Failed to set clock rate: %d\n", ret);
+			goto err_mutex;
+		}
+	}
+
+	ret = cs_dsp_load(dsp, wmfw_firmware, wmfw_filename);
+	if (ret != 0)
+		goto err_ena;
+
+	ret = cs_dsp_adsp1_setup_algs(dsp);
+	if (ret != 0)
+		goto err_ena;
+
+	ret = cs_dsp_load_coeff(dsp, coeff_firmware, coeff_filename);
+	if (ret != 0)
+		goto err_ena;
+
+	/* Initialize caches for enabled and unset controls */
+	ret = cs_dsp_coeff_init_control_caches(dsp);
+	if (ret != 0)
+		goto err_ena;
+
+	/* Sync set controls */
+	ret = cs_dsp_coeff_sync_controls(dsp);
+	if (ret != 0)
+		goto err_ena;
+
+	dsp->booted = true;
+
+	/* Start the core running */
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
+			   ADSP1_CORE_ENA | ADSP1_START,
+			   ADSP1_CORE_ENA | ADSP1_START);
+
+	dsp->running = true;
+
+	mutex_unlock(&dsp->pwr_lock);
+
+	return 0;
+
+err_ena:
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
+			   ADSP1_SYS_ENA, 0);
+err_mutex:
+	mutex_unlock(&dsp->pwr_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_adsp1_power_up);
+
+/**
+ * cs_dsp_adsp1_power_down() - Halts the DSP
+ * @dsp: pointer to DSP structure
+ */
+void cs_dsp_adsp1_power_down(struct cs_dsp *dsp)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	dsp->running = false;
+	dsp->booted = false;
+
+	/* Halt the core */
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
+			   ADSP1_CORE_ENA | ADSP1_START, 0);
+
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_19,
+			   ADSP1_WDMA_BUFFER_LENGTH_MASK, 0);
+
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
+			   ADSP1_SYS_ENA, 0);
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list)
+		ctl->enabled = 0;
+
+	cs_dsp_free_alg_regions(dsp);
+
+	mutex_unlock(&dsp->pwr_lock);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_adsp1_power_down);
+
+static int cs_dsp_adsp2v2_enable_core(struct cs_dsp *dsp)
+{
+	unsigned int val;
+	int ret, count;
+
+	/* Wait for the RAM to start, should be near instantaneous */
+	for (count = 0; count < 10; ++count) {
+		ret = regmap_read(dsp->regmap, dsp->base + ADSP2_STATUS1, &val);
+		if (ret != 0)
+			return ret;
+
+		if (val & ADSP2_RAM_RDY)
+			break;
+
+		usleep_range(250, 500);
+	}
+
+	if (!(val & ADSP2_RAM_RDY)) {
+		cs_dsp_err(dsp, "Failed to start DSP RAM\n");
+		return -EBUSY;
+	}
+
+	cs_dsp_dbg(dsp, "RAM ready after %d polls\n", count);
+
+	return 0;
+}
+
+static int cs_dsp_adsp2_enable_core(struct cs_dsp *dsp)
+{
+	int ret;
+
+	ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL,
+				       ADSP2_SYS_ENA, ADSP2_SYS_ENA);
+	if (ret != 0)
+		return ret;
+
+	return cs_dsp_adsp2v2_enable_core(dsp);
+}
+
+static int cs_dsp_adsp2_lock(struct cs_dsp *dsp, unsigned int lock_regions)
+{
+	struct regmap *regmap = dsp->regmap;
+	unsigned int code0, code1, lock_reg;
+
+	if (!(lock_regions & CS_ADSP2_REGION_ALL))
+		return 0;
+
+	lock_regions &= CS_ADSP2_REGION_ALL;
+	lock_reg = dsp->base + ADSP2_LOCK_REGION_1_LOCK_REGION_0;
+
+	while (lock_regions) {
+		code0 = code1 = 0;
+		if (lock_regions & BIT(0)) {
+			code0 = ADSP2_LOCK_CODE_0;
+			code1 = ADSP2_LOCK_CODE_1;
+		}
+		if (lock_regions & BIT(1)) {
+			code0 |= ADSP2_LOCK_CODE_0 << ADSP2_LOCK_REGION_SHIFT;
+			code1 |= ADSP2_LOCK_CODE_1 << ADSP2_LOCK_REGION_SHIFT;
+		}
+		regmap_write(regmap, lock_reg, code0);
+		regmap_write(regmap, lock_reg, code1);
+		lock_regions >>= 2;
+		lock_reg += 2;
+	}
+
+	return 0;
+}
+
+static int cs_dsp_adsp2_enable_memory(struct cs_dsp *dsp)
+{
+	return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+				  ADSP2_MEM_ENA, ADSP2_MEM_ENA);
+}
+
+static void cs_dsp_adsp2_disable_memory(struct cs_dsp *dsp)
+{
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+			   ADSP2_MEM_ENA, 0);
+}
+
+static void cs_dsp_adsp2_disable_core(struct cs_dsp *dsp)
+{
+	regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0);
+	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0);
+	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_2, 0);
+
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+			   ADSP2_SYS_ENA, 0);
+}
+
+static void cs_dsp_adsp2v2_disable_core(struct cs_dsp *dsp)
+{
+	regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0);
+	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0);
+	regmap_write(dsp->regmap, dsp->base + ADSP2V2_WDMA_CONFIG_2, 0);
+}
+
+static int cs_dsp_halo_configure_mpu(struct cs_dsp *dsp, unsigned int lock_regions)
+{
+	struct reg_sequence config[] = {
+		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0x5555 },
+		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0xAAAA },
+		{ dsp->base + HALO_MPU_XMEM_ACCESS_0,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_YMEM_ACCESS_0,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_WINDOW_ACCESS_0, lock_regions },
+		{ dsp->base + HALO_MPU_XREG_ACCESS_0,   lock_regions },
+		{ dsp->base + HALO_MPU_YREG_ACCESS_0,   lock_regions },
+		{ dsp->base + HALO_MPU_XMEM_ACCESS_1,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_YMEM_ACCESS_1,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_WINDOW_ACCESS_1, lock_regions },
+		{ dsp->base + HALO_MPU_XREG_ACCESS_1,   lock_regions },
+		{ dsp->base + HALO_MPU_YREG_ACCESS_1,   lock_regions },
+		{ dsp->base + HALO_MPU_XMEM_ACCESS_2,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_YMEM_ACCESS_2,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_WINDOW_ACCESS_2, lock_regions },
+		{ dsp->base + HALO_MPU_XREG_ACCESS_2,   lock_regions },
+		{ dsp->base + HALO_MPU_YREG_ACCESS_2,   lock_regions },
+		{ dsp->base + HALO_MPU_XMEM_ACCESS_3,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_YMEM_ACCESS_3,   0xFFFFFFFF },
+		{ dsp->base + HALO_MPU_WINDOW_ACCESS_3, lock_regions },
+		{ dsp->base + HALO_MPU_XREG_ACCESS_3,   lock_regions },
+		{ dsp->base + HALO_MPU_YREG_ACCESS_3,   lock_regions },
+		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0 },
+	};
+
+	return regmap_multi_reg_write(dsp->regmap, config, ARRAY_SIZE(config));
+}
+
+/**
+ * cs_dsp_set_dspclk() - Applies the given frequency to the given cs_dsp
+ * @dsp: pointer to DSP structure
+ * @freq: clock rate to set
+ *
+ * This is only for use on ADSP2 cores.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_set_dspclk(struct cs_dsp *dsp, unsigned int freq)
+{
+	int ret;
+
+	ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CLOCKING,
+				 ADSP2_CLK_SEL_MASK,
+				 freq << ADSP2_CLK_SEL_SHIFT);
+	if (ret)
+		cs_dsp_err(dsp, "Failed to set clock rate: %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_set_dspclk);
+
+static void cs_dsp_stop_watchdog(struct cs_dsp *dsp)
+{
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_WATCHDOG,
+			   ADSP2_WDT_ENA_MASK, 0);
+}
+
+static void cs_dsp_halo_stop_watchdog(struct cs_dsp *dsp)
+{
+	regmap_update_bits(dsp->regmap, dsp->base + HALO_WDT_CONTROL,
+			   HALO_WDT_EN_MASK, 0);
+}
+
+/**
+ * cs_dsp_power_up() - Downloads firmware to the DSP
+ * @dsp: pointer to DSP structure
+ * @wmfw_firmware: the firmware to be sent
+ * @wmfw_filename: file name of firmware to be sent
+ * @coeff_firmware: the coefficient data to be sent
+ * @coeff_filename: file name of coefficient to data be sent
+ * @fw_name: the user-friendly firmware name
+ *
+ * This function is used on ADSP2 and Halo DSP cores, it powers-up the DSP core
+ * and downloads the firmware but does not start the firmware running. The
+ * cs_dsp booted flag will be set once completed and if the core has a low-power
+ * memory retention mode it will be put into this state after the firmware is
+ * downloaded.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_power_up(struct cs_dsp *dsp,
+		    const struct firmware *wmfw_firmware, char *wmfw_filename,
+		    const struct firmware *coeff_firmware, char *coeff_filename,
+		    const char *fw_name)
+{
+	int ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	dsp->fw_name = fw_name;
+
+	if (dsp->ops->enable_memory) {
+		ret = dsp->ops->enable_memory(dsp);
+		if (ret != 0)
+			goto err_mutex;
+	}
+
+	if (dsp->ops->enable_core) {
+		ret = dsp->ops->enable_core(dsp);
+		if (ret != 0)
+			goto err_mem;
+	}
+
+	ret = cs_dsp_load(dsp, wmfw_firmware, wmfw_filename);
+	if (ret != 0)
+		goto err_ena;
+
+	ret = dsp->ops->setup_algs(dsp);
+	if (ret != 0)
+		goto err_ena;
+
+	ret = cs_dsp_load_coeff(dsp, coeff_firmware, coeff_filename);
+	if (ret != 0)
+		goto err_ena;
+
+	/* Initialize caches for enabled and unset controls */
+	ret = cs_dsp_coeff_init_control_caches(dsp);
+	if (ret != 0)
+		goto err_ena;
+
+	if (dsp->ops->disable_core)
+		dsp->ops->disable_core(dsp);
+
+	dsp->booted = true;
+
+	mutex_unlock(&dsp->pwr_lock);
+
+	return 0;
+err_ena:
+	if (dsp->ops->disable_core)
+		dsp->ops->disable_core(dsp);
+err_mem:
+	if (dsp->ops->disable_memory)
+		dsp->ops->disable_memory(dsp);
+err_mutex:
+	mutex_unlock(&dsp->pwr_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_power_up);
+
+/**
+ * cs_dsp_power_down() - Powers-down the DSP
+ * @dsp: pointer to DSP structure
+ *
+ * cs_dsp_stop() must have been called before this function. The core will be
+ * fully powered down and so the memory will not be retained.
+ */
+void cs_dsp_power_down(struct cs_dsp *dsp)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	cs_dsp_debugfs_clear(dsp);
+
+	dsp->fw_id = 0;
+	dsp->fw_id_version = 0;
+
+	dsp->booted = false;
+
+	if (dsp->ops->disable_memory)
+		dsp->ops->disable_memory(dsp);
+
+	list_for_each_entry(ctl, &dsp->ctl_list, list)
+		ctl->enabled = 0;
+
+	cs_dsp_free_alg_regions(dsp);
+
+	mutex_unlock(&dsp->pwr_lock);
+
+	cs_dsp_dbg(dsp, "Shutdown complete\n");
+}
+EXPORT_SYMBOL_GPL(cs_dsp_power_down);
+
+static int cs_dsp_adsp2_start_core(struct cs_dsp *dsp)
+{
+	return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+				  ADSP2_CORE_ENA | ADSP2_START,
+				  ADSP2_CORE_ENA | ADSP2_START);
+}
+
+static void cs_dsp_adsp2_stop_core(struct cs_dsp *dsp)
+{
+	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+			   ADSP2_CORE_ENA | ADSP2_START, 0);
+}
+
+/**
+ * cs_dsp_run() - Starts the firmware running
+ * @dsp: pointer to DSP structure
+ *
+ * cs_dsp_power_up() must have previously been called successfully.
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_run(struct cs_dsp *dsp)
+{
+	int ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	if (!dsp->booted) {
+		ret = -EIO;
+		goto err;
+	}
+
+	if (dsp->ops->enable_core) {
+		ret = dsp->ops->enable_core(dsp);
+		if (ret != 0)
+			goto err;
+	}
+
+	/* Sync set controls */
+	ret = cs_dsp_coeff_sync_controls(dsp);
+	if (ret != 0)
+		goto err;
+
+	if (dsp->ops->lock_memory) {
+		ret = dsp->ops->lock_memory(dsp, dsp->lock_regions);
+		if (ret != 0) {
+			cs_dsp_err(dsp, "Error configuring MPU: %d\n", ret);
+			goto err;
+		}
+	}
+
+	if (dsp->ops->start_core) {
+		ret = dsp->ops->start_core(dsp);
+		if (ret != 0)
+			goto err;
+	}
+
+	dsp->running = true;
+
+	if (dsp->client_ops->post_run) {
+		ret = dsp->client_ops->post_run(dsp);
+		if (ret)
+			goto err;
+	}
+
+	mutex_unlock(&dsp->pwr_lock);
+
+	return 0;
+
+err:
+	if (dsp->ops->stop_core)
+		dsp->ops->stop_core(dsp);
+	if (dsp->ops->disable_core)
+		dsp->ops->disable_core(dsp);
+	mutex_unlock(&dsp->pwr_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_run);
+
+/**
+ * cs_dsp_stop() - Stops the firmware
+ * @dsp: pointer to DSP structure
+ *
+ * Memory will not be disabled so firmware will remain loaded.
+ */
+void cs_dsp_stop(struct cs_dsp *dsp)
+{
+	/* Tell the firmware to cleanup */
+	cs_dsp_signal_event_controls(dsp, CS_DSP_FW_EVENT_SHUTDOWN);
+
+	if (dsp->ops->stop_watchdog)
+		dsp->ops->stop_watchdog(dsp);
+
+	/* Log firmware state, it can be useful for analysis */
+	if (dsp->ops->show_fw_status)
+		dsp->ops->show_fw_status(dsp);
+
+	mutex_lock(&dsp->pwr_lock);
+
+	dsp->running = false;
+
+	if (dsp->ops->stop_core)
+		dsp->ops->stop_core(dsp);
+	if (dsp->ops->disable_core)
+		dsp->ops->disable_core(dsp);
+
+	if (dsp->client_ops->post_stop)
+		dsp->client_ops->post_stop(dsp);
+
+	mutex_unlock(&dsp->pwr_lock);
+
+	cs_dsp_dbg(dsp, "Execution stopped\n");
+}
+EXPORT_SYMBOL_GPL(cs_dsp_stop);
+
+static int cs_dsp_halo_start_core(struct cs_dsp *dsp)
+{
+	return regmap_update_bits(dsp->regmap,
+				  dsp->base + HALO_CCM_CORE_CONTROL,
+				  HALO_CORE_RESET | HALO_CORE_EN,
+				  HALO_CORE_RESET | HALO_CORE_EN);
+}
+
+static void cs_dsp_halo_stop_core(struct cs_dsp *dsp)
+{
+	regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL,
+			   HALO_CORE_EN, 0);
+
+	/* reset halo core with CORE_SOFT_RESET */
+	regmap_update_bits(dsp->regmap, dsp->base + HALO_CORE_SOFT_RESET,
+			   HALO_CORE_SOFT_RESET_MASK, 1);
+}
+
+/**
+ * cs_dsp_adsp2_init() - Initialise a cs_dsp structure representing a ADSP2 core
+ * @dsp: pointer to DSP structure
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_adsp2_init(struct cs_dsp *dsp)
+{
+	int ret;
+
+	switch (dsp->rev) {
+	case 0:
+		/*
+		 * Disable the DSP memory by default when in reset for a small
+		 * power saving.
+		 */
+		ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
+					 ADSP2_MEM_ENA, 0);
+		if (ret) {
+			cs_dsp_err(dsp,
+				   "Failed to clear memory retention: %d\n", ret);
+			return ret;
+		}
+
+		dsp->ops = &cs_dsp_adsp2_ops[0];
+		break;
+	case 1:
+		dsp->ops = &cs_dsp_adsp2_ops[1];
+		break;
+	default:
+		dsp->ops = &cs_dsp_adsp2_ops[2];
+		break;
+	}
+
+	return cs_dsp_common_init(dsp);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_adsp2_init);
+
+/**
+ * cs_dsp_halo_init() - Initialise a cs_dsp structure representing a HALO Core DSP
+ * @dsp: pointer to DSP structure
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_halo_init(struct cs_dsp *dsp)
+{
+	dsp->ops = &cs_dsp_halo_ops;
+
+	return cs_dsp_common_init(dsp);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_halo_init);
+
+/**
+ * cs_dsp_remove() - Clean a cs_dsp before deletion
+ * @dsp: pointer to DSP structure
+ */
+void cs_dsp_remove(struct cs_dsp *dsp)
+{
+	struct cs_dsp_coeff_ctl *ctl;
+
+	while (!list_empty(&dsp->ctl_list)) {
+		ctl = list_first_entry(&dsp->ctl_list, struct cs_dsp_coeff_ctl, list);
+
+		if (dsp->client_ops->control_remove)
+			dsp->client_ops->control_remove(ctl);
+
+		list_del(&ctl->list);
+		cs_dsp_free_ctl_blk(ctl);
+	}
+}
+EXPORT_SYMBOL_GPL(cs_dsp_remove);
+
+/**
+ * cs_dsp_read_raw_data_block() - Reads a block of data from DSP memory
+ * @dsp: pointer to DSP structure
+ * @mem_type: the type of DSP memory containing the data to be read
+ * @mem_addr: the address of the data within the memory region
+ * @num_words: the length of the data to read
+ * @data: a buffer to store the fetched data
+ *
+ * If this is used to read unpacked 24-bit memory, each 24-bit DSP word will
+ * occupy 32-bits in data (MSbyte will be 0). This padding can be removed using
+ * cs_dsp_remove_padding()
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_read_raw_data_block(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr,
+			       unsigned int num_words, __be32 *data)
+{
+	struct cs_dsp_region const *mem = cs_dsp_find_region(dsp, mem_type);
+	unsigned int reg;
+	int ret;
+
+	if (!mem)
+		return -EINVAL;
+
+	reg = dsp->ops->region_to_reg(mem, mem_addr);
+
+	ret = regmap_raw_read(dsp->regmap, reg, data,
+			      sizeof(*data) * num_words);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_read_raw_data_block);
+
+/**
+ * cs_dsp_read_data_word() - Reads a word from DSP memory
+ * @dsp: pointer to DSP structure
+ * @mem_type: the type of DSP memory containing the data to be read
+ * @mem_addr: the address of the data within the memory region
+ * @data: a buffer to store the fetched data
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_read_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 *data)
+{
+	__be32 raw;
+	int ret;
+
+	ret = cs_dsp_read_raw_data_block(dsp, mem_type, mem_addr, 1, &raw);
+	if (ret < 0)
+		return ret;
+
+	*data = be32_to_cpu(raw) & 0x00ffffffu;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cs_dsp_read_data_word);
+
+/**
+ * cs_dsp_write_data_word() - Writes a word to DSP memory
+ * @dsp: pointer to DSP structure
+ * @mem_type: the type of DSP memory containing the data to be written
+ * @mem_addr: the address of the data within the memory region
+ * @data: the data to be written
+ *
+ * Return: Zero for success, a negative number on error.
+ */
+int cs_dsp_write_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 data)
+{
+	struct cs_dsp_region const *mem = cs_dsp_find_region(dsp, mem_type);
+	__be32 val = cpu_to_be32(data & 0x00ffffffu);
+	unsigned int reg;
+
+	if (!mem)
+		return -EINVAL;
+
+	reg = dsp->ops->region_to_reg(mem, mem_addr);
+
+	return regmap_raw_write(dsp->regmap, reg, &val, sizeof(val));
+}
+EXPORT_SYMBOL_GPL(cs_dsp_write_data_word);
+
+/**
+ * cs_dsp_remove_padding() - Convert unpacked words to packed bytes
+ * @buf: buffer containing DSP words read from DSP memory
+ * @nwords: number of words to convert
+ *
+ * DSP words from the register map have pad bytes and the data bytes
+ * are in swapped order. This swaps to the native endian order and
+ * strips the pad bytes.
+ */
+void cs_dsp_remove_padding(u32 *buf, int nwords)
+{
+	const __be32 *pack_in = (__be32 *)buf;
+	u8 *pack_out = (u8 *)buf;
+	int i;
+
+	for (i = 0; i < nwords; i++) {
+		u32 word = be32_to_cpu(*pack_in++);
+		*pack_out++ = (u8)word;
+		*pack_out++ = (u8)(word >> 8);
+		*pack_out++ = (u8)(word >> 16);
+	}
+}
+EXPORT_SYMBOL_GPL(cs_dsp_remove_padding);
+
+/**
+ * cs_dsp_adsp2_bus_error() - Handle a DSP bus error interrupt
+ * @dsp: pointer to DSP structure
+ *
+ * The firmware and DSP state will be logged for future analysis.
+ */
+void cs_dsp_adsp2_bus_error(struct cs_dsp *dsp)
+{
+	unsigned int val;
+	struct regmap *regmap = dsp->regmap;
+	int ret = 0;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val);
+	if (ret) {
+		cs_dsp_err(dsp,
+			   "Failed to read Region Lock Ctrl register: %d\n", ret);
+		goto error;
+	}
+
+	if (val & ADSP2_WDT_TIMEOUT_STS_MASK) {
+		cs_dsp_err(dsp, "watchdog timeout error\n");
+		dsp->ops->stop_watchdog(dsp);
+		if (dsp->client_ops->watchdog_expired)
+			dsp->client_ops->watchdog_expired(dsp);
+	}
+
+	if (val & (ADSP2_ADDR_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) {
+		if (val & ADSP2_ADDR_ERR_MASK)
+			cs_dsp_err(dsp, "bus error: address error\n");
+		else
+			cs_dsp_err(dsp, "bus error: region lock error\n");
+
+		ret = regmap_read(regmap, dsp->base + ADSP2_BUS_ERR_ADDR, &val);
+		if (ret) {
+			cs_dsp_err(dsp,
+				   "Failed to read Bus Err Addr register: %d\n",
+				   ret);
+			goto error;
+		}
+
+		cs_dsp_err(dsp, "bus error address = 0x%x\n",
+			   val & ADSP2_BUS_ERR_ADDR_MASK);
+
+		ret = regmap_read(regmap,
+				  dsp->base + ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR,
+				  &val);
+		if (ret) {
+			cs_dsp_err(dsp,
+				   "Failed to read Pmem Xmem Err Addr register: %d\n",
+				   ret);
+			goto error;
+		}
+
+		cs_dsp_err(dsp, "xmem error address = 0x%x\n",
+			   val & ADSP2_XMEM_ERR_ADDR_MASK);
+		cs_dsp_err(dsp, "pmem error address = 0x%x\n",
+			   (val & ADSP2_PMEM_ERR_ADDR_MASK) >>
+			   ADSP2_PMEM_ERR_ADDR_SHIFT);
+	}
+
+	regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL,
+			   ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT);
+
+error:
+	mutex_unlock(&dsp->pwr_lock);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_adsp2_bus_error);
+
+/**
+ * cs_dsp_halo_bus_error() - Handle a DSP bus error interrupt
+ * @dsp: pointer to DSP structure
+ *
+ * The firmware and DSP state will be logged for future analysis.
+ */
+void cs_dsp_halo_bus_error(struct cs_dsp *dsp)
+{
+	struct regmap *regmap = dsp->regmap;
+	unsigned int fault[6];
+	struct reg_sequence clear[] = {
+		{ dsp->base + HALO_MPU_XM_VIO_STATUS,     0x0 },
+		{ dsp->base + HALO_MPU_YM_VIO_STATUS,     0x0 },
+		{ dsp->base + HALO_MPU_PM_VIO_STATUS,     0x0 },
+	};
+	int ret;
+
+	mutex_lock(&dsp->pwr_lock);
+
+	ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_1,
+			  fault);
+	if (ret) {
+		cs_dsp_warn(dsp, "Failed to read AHB DEBUG_1: %d\n", ret);
+		goto exit_unlock;
+	}
+
+	cs_dsp_warn(dsp, "AHB: STATUS: 0x%x ADDR: 0x%x\n",
+		    *fault & HALO_AHBM_FLAGS_ERR_MASK,
+		    (*fault & HALO_AHBM_CORE_ERR_ADDR_MASK) >>
+		    HALO_AHBM_CORE_ERR_ADDR_SHIFT);
+
+	ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_0,
+			  fault);
+	if (ret) {
+		cs_dsp_warn(dsp, "Failed to read AHB DEBUG_0: %d\n", ret);
+		goto exit_unlock;
+	}
+
+	cs_dsp_warn(dsp, "AHB: SYS_ADDR: 0x%x\n", *fault);
+
+	ret = regmap_bulk_read(regmap, dsp->base + HALO_MPU_XM_VIO_ADDR,
+			       fault, ARRAY_SIZE(fault));
+	if (ret) {
+		cs_dsp_warn(dsp, "Failed to read MPU fault info: %d\n", ret);
+		goto exit_unlock;
+	}
+
+	cs_dsp_warn(dsp, "XM: STATUS:0x%x ADDR:0x%x\n", fault[1], fault[0]);
+	cs_dsp_warn(dsp, "YM: STATUS:0x%x ADDR:0x%x\n", fault[3], fault[2]);
+	cs_dsp_warn(dsp, "PM: STATUS:0x%x ADDR:0x%x\n", fault[5], fault[4]);
+
+	ret = regmap_multi_reg_write(dsp->regmap, clear, ARRAY_SIZE(clear));
+	if (ret)
+		cs_dsp_warn(dsp, "Failed to clear MPU status: %d\n", ret);
+
+exit_unlock:
+	mutex_unlock(&dsp->pwr_lock);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_halo_bus_error);
+
+/**
+ * cs_dsp_halo_wdt_expire() - Handle DSP watchdog expiry
+ * @dsp: pointer to DSP structure
+ *
+ * This is logged for future analysis.
+ */
+void cs_dsp_halo_wdt_expire(struct cs_dsp *dsp)
+{
+	mutex_lock(&dsp->pwr_lock);
+
+	cs_dsp_warn(dsp, "WDT Expiry Fault\n");
+
+	dsp->ops->stop_watchdog(dsp);
+	if (dsp->client_ops->watchdog_expired)
+		dsp->client_ops->watchdog_expired(dsp);
+
+	mutex_unlock(&dsp->pwr_lock);
+}
+EXPORT_SYMBOL_GPL(cs_dsp_halo_wdt_expire);
+
+static const struct cs_dsp_ops cs_dsp_adsp1_ops = {
+	.validate_version = cs_dsp_validate_version,
+	.parse_sizes = cs_dsp_adsp1_parse_sizes,
+	.region_to_reg = cs_dsp_region_to_reg,
+};
+
+static const struct cs_dsp_ops cs_dsp_adsp2_ops[] = {
+	{
+		.parse_sizes = cs_dsp_adsp2_parse_sizes,
+		.validate_version = cs_dsp_validate_version,
+		.setup_algs = cs_dsp_adsp2_setup_algs,
+		.region_to_reg = cs_dsp_region_to_reg,
+
+		.show_fw_status = cs_dsp_adsp2_show_fw_status,
+
+		.enable_memory = cs_dsp_adsp2_enable_memory,
+		.disable_memory = cs_dsp_adsp2_disable_memory,
+
+		.enable_core = cs_dsp_adsp2_enable_core,
+		.disable_core = cs_dsp_adsp2_disable_core,
+
+		.start_core = cs_dsp_adsp2_start_core,
+		.stop_core = cs_dsp_adsp2_stop_core,
+
+	},
+	{
+		.parse_sizes = cs_dsp_adsp2_parse_sizes,
+		.validate_version = cs_dsp_validate_version,
+		.setup_algs = cs_dsp_adsp2_setup_algs,
+		.region_to_reg = cs_dsp_region_to_reg,
+
+		.show_fw_status = cs_dsp_adsp2v2_show_fw_status,
+
+		.enable_memory = cs_dsp_adsp2_enable_memory,
+		.disable_memory = cs_dsp_adsp2_disable_memory,
+		.lock_memory = cs_dsp_adsp2_lock,
+
+		.enable_core = cs_dsp_adsp2v2_enable_core,
+		.disable_core = cs_dsp_adsp2v2_disable_core,
+
+		.start_core = cs_dsp_adsp2_start_core,
+		.stop_core = cs_dsp_adsp2_stop_core,
+	},
+	{
+		.parse_sizes = cs_dsp_adsp2_parse_sizes,
+		.validate_version = cs_dsp_validate_version,
+		.setup_algs = cs_dsp_adsp2_setup_algs,
+		.region_to_reg = cs_dsp_region_to_reg,
+
+		.show_fw_status = cs_dsp_adsp2v2_show_fw_status,
+		.stop_watchdog = cs_dsp_stop_watchdog,
+
+		.enable_memory = cs_dsp_adsp2_enable_memory,
+		.disable_memory = cs_dsp_adsp2_disable_memory,
+		.lock_memory = cs_dsp_adsp2_lock,
+
+		.enable_core = cs_dsp_adsp2v2_enable_core,
+		.disable_core = cs_dsp_adsp2v2_disable_core,
+
+		.start_core = cs_dsp_adsp2_start_core,
+		.stop_core = cs_dsp_adsp2_stop_core,
+	},
+};
+
+static const struct cs_dsp_ops cs_dsp_halo_ops = {
+	.parse_sizes = cs_dsp_adsp2_parse_sizes,
+	.validate_version = cs_dsp_halo_validate_version,
+	.setup_algs = cs_dsp_halo_setup_algs,
+	.region_to_reg = cs_dsp_halo_region_to_reg,
+
+	.show_fw_status = cs_dsp_halo_show_fw_status,
+	.stop_watchdog = cs_dsp_halo_stop_watchdog,
+
+	.lock_memory = cs_dsp_halo_configure_mpu,
+
+	.start_core = cs_dsp_halo_start_core,
+	.stop_core = cs_dsp_halo_stop_core,
+};
+
+MODULE_DESCRIPTION("Cirrus Logic DSP Support");
+MODULE_AUTHOR("Simon Trimmer <simont@opensource.cirrus.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/firmware/cirrus/cs_dsp.h b/include/linux/firmware/cirrus/cs_dsp.h
new file mode 100644
index 000000000000..9ad9eaaaa552
--- /dev/null
+++ b/include/linux/firmware/cirrus/cs_dsp.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * cs_dsp.h  --  Cirrus Logic DSP firmware support
+ *
+ * Based on sound/soc/codecs/wm_adsp.h
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ * Copyright (C) 2015-2021 Cirrus Logic, Inc. and
+ *                         Cirrus Logic International Semiconductor Ltd.
+ */
+#ifndef __CS_DSP_H
+#define __CS_DSP_H
+
+#define CS_ADSP2_REGION_0 BIT(0)
+#define CS_ADSP2_REGION_1 BIT(1)
+#define CS_ADSP2_REGION_2 BIT(2)
+#define CS_ADSP2_REGION_3 BIT(3)
+#define CS_ADSP2_REGION_4 BIT(4)
+#define CS_ADSP2_REGION_5 BIT(5)
+#define CS_ADSP2_REGION_6 BIT(6)
+#define CS_ADSP2_REGION_7 BIT(7)
+#define CS_ADSP2_REGION_8 BIT(8)
+#define CS_ADSP2_REGION_9 BIT(9)
+#define CS_ADSP2_REGION_1_9 (CS_ADSP2_REGION_1 | \
+		CS_ADSP2_REGION_2 | CS_ADSP2_REGION_3 | \
+		CS_ADSP2_REGION_4 | CS_ADSP2_REGION_5 | \
+		CS_ADSP2_REGION_6 | CS_ADSP2_REGION_7 | \
+		CS_ADSP2_REGION_8 | CS_ADSP2_REGION_9)
+#define CS_ADSP2_REGION_ALL (CS_ADSP2_REGION_0 | CS_ADSP2_REGION_1_9)
+
+#define CS_DSP_DATA_WORD_SIZE                3
+
+#define CS_DSP_ACKED_CTL_TIMEOUT_MS          100
+#define CS_DSP_ACKED_CTL_N_QUICKPOLLS        10
+#define CS_DSP_ACKED_CTL_MIN_VALUE           0
+#define CS_DSP_ACKED_CTL_MAX_VALUE           0xFFFFFF
+
+/**
+ * struct cs_dsp_region - Describes a logical memory region in DSP address space
+ * @type:	Memory region type
+ * @base:	Address of region
+ */
+struct cs_dsp_region {
+	int type;
+	unsigned int base;
+};
+
+/**
+ * struct cs_dsp_alg_region - Describes a logical algorithm region in DSP address space
+ * @list:	List node for internal use
+ * @alg:	Algorithm id
+ * @type:	Memory region type
+ * @base:	Address of region
+ */
+struct cs_dsp_alg_region {
+	struct list_head list;
+	unsigned int alg;
+	int type;
+	unsigned int base;
+};
+
+/**
+ * struct cs_dsp_coeff_ctl - Describes a coefficient control
+ * @fw_name:		Name of the firmware
+ * @subname:		Name of the control parsed from the WMFW
+ * @subname_len:	Length of subname
+ * @alg_region:		Logical region associated with this control
+ * @dsp:		DSP instance associated with this control
+ * @enabled:		Flag indicating whether control is enabled
+ * @list:		List node for internal use
+ * @cache:		Cached value of the control
+ * @offset:		Offset of control within alg_region
+ * @len:		Length of the cached value
+ * @set:		Flag indicating the value has been written by the user
+ * @flags:		Bitfield of WMFW_CTL_FLAG_ control flags defined in wmfw.h
+ * @type:		One of the WMFW_CTL_TYPE_ control types defined in wmfw.h
+ * @priv:		For use by the client
+ */
+struct cs_dsp_coeff_ctl {
+	const char *fw_name;
+	/* Subname is needed to match with firmware */
+	const char *subname;
+	unsigned int subname_len;
+	struct cs_dsp_alg_region alg_region;
+	struct cs_dsp *dsp;
+	unsigned int enabled:1;
+	struct list_head list;
+	void *cache;
+	unsigned int offset;
+	size_t len;
+	unsigned int set:1;
+	unsigned int flags;
+	unsigned int type;
+
+	void *priv;
+};
+
+struct cs_dsp_ops;
+struct cs_dsp_client_ops;
+
+/**
+ * struct cs_dsp - Configuration and state of a Cirrus Logic DSP
+ * @name:		The name of the DSP instance
+ * @rev:		Revision of the DSP
+ * @num:		DSP instance number
+ * @type:		Type of DSP
+ * @dev:		Driver model representation of the device
+ * @regmap:		Register map of the device
+ * @ops:		Function pointers for internal callbacks
+ * @client_ops:		Function pointers for client callbacks
+ * @base:		Address of the DSP registers
+ * @base_sysinfo:	Address of the sysinfo register (Halo only)
+ * @sysclk_reg:		Address of the sysclk register (ADSP1 only)
+ * @sysclk_mask:	Mask of frequency bits within sysclk register (ADSP1 only)
+ * @sysclk_shift:	Shift of frequency bits within sysclk register (ADSP1 only)
+ * @alg_regions:	List of currently loaded algorithm regions
+ * @fw_file_name:	Filename of the current firmware
+ * @fw_name:		Name of the current firmware
+ * @fw_id:		ID of the current firmware, obtained from the wmfw
+ * @fw_id_version:	Version of the firmware, obtained from the wmfw
+ * @fw_vendor_id:	Vendor of the firmware, obtained from the wmfw
+ * @mem:		DSP memory region descriptions
+ * @num_mems:		Number of memory regions in this DSP
+ * @fw_ver:		Version of the wmfw file format
+ * @booted:		Flag indicating DSP has been configured
+ * @running:		Flag indicating DSP is executing firmware
+ * @ctl_list:		Controls defined within the loaded DSP firmware
+ * @lock_regions:	Enable MPU traps on specified memory regions
+ * @pwr_lock:		Lock used to serialize accesses
+ * @debugfs_root:	Debugfs directory for this DSP instance
+ * @wmfw_file_name:	Filename of the currently loaded firmware
+ * @bin_file_name:	Filename of the currently loaded coefficients
+ */
+struct cs_dsp {
+	const char *name;
+	int rev;
+	int num;
+	int type;
+	struct device *dev;
+	struct regmap *regmap;
+
+	const struct cs_dsp_ops *ops;
+	const struct cs_dsp_client_ops *client_ops;
+
+	unsigned int base;
+	unsigned int base_sysinfo;
+	unsigned int sysclk_reg;
+	unsigned int sysclk_mask;
+	unsigned int sysclk_shift;
+
+	struct list_head alg_regions;
+
+	const char *fw_name;
+	unsigned int fw_id;
+	unsigned int fw_id_version;
+	unsigned int fw_vendor_id;
+
+	const struct cs_dsp_region *mem;
+	int num_mems;
+
+	int fw_ver;
+
+	bool booted;
+	bool running;
+
+	struct list_head ctl_list;
+
+	struct mutex pwr_lock;
+
+	unsigned int lock_regions;
+
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_root;
+	char *wmfw_file_name;
+	char *bin_file_name;
+#endif
+};
+
+/**
+ * struct cs_dsp_client_ops - client callbacks
+ * @control_add:	Called under the pwr_lock when a control is created
+ * @control_remove:	Called under the pwr_lock when a control is destroyed
+ * @post_run:		Called under the pwr_lock by cs_dsp_run()
+ * @post_stop:		Called under the pwr_lock by cs_dsp_stop()
+ * @watchdog_expired:	Called when a watchdog expiry is detected
+ *
+ * These callbacks give the cs_dsp client an opportunity to respond to events
+ * or to perform actions atomically.
+ */
+struct cs_dsp_client_ops {
+	int (*control_add)(struct cs_dsp_coeff_ctl *ctl);
+	void (*control_remove)(struct cs_dsp_coeff_ctl *ctl);
+	int (*post_run)(struct cs_dsp *dsp);
+	void (*post_stop)(struct cs_dsp *dsp);
+	void (*watchdog_expired)(struct cs_dsp *dsp);
+};
+
+int cs_dsp_adsp1_init(struct cs_dsp *dsp);
+int cs_dsp_adsp2_init(struct cs_dsp *dsp);
+int cs_dsp_halo_init(struct cs_dsp *dsp);
+
+int cs_dsp_adsp1_power_up(struct cs_dsp *dsp,
+			  const struct firmware *wmfw_firmware, char *wmfw_filename,
+			  const struct firmware *coeff_firmware, char *coeff_filename,
+			  const char *fw_name);
+void cs_dsp_adsp1_power_down(struct cs_dsp *dsp);
+int cs_dsp_power_up(struct cs_dsp *dsp,
+		    const struct firmware *wmfw_firmware, char *wmfw_filename,
+		    const struct firmware *coeff_firmware, char *coeff_filename,
+		    const char *fw_name);
+void cs_dsp_power_down(struct cs_dsp *dsp);
+int cs_dsp_run(struct cs_dsp *dsp);
+void cs_dsp_stop(struct cs_dsp *dsp);
+
+void cs_dsp_remove(struct cs_dsp *dsp);
+
+int cs_dsp_set_dspclk(struct cs_dsp *dsp, unsigned int freq);
+void cs_dsp_adsp2_bus_error(struct cs_dsp *dsp);
+void cs_dsp_halo_bus_error(struct cs_dsp *dsp);
+void cs_dsp_halo_wdt_expire(struct cs_dsp *dsp);
+
+void cs_dsp_init_debugfs(struct cs_dsp *dsp, struct dentry *debugfs_root);
+void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp);
+
+int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl, unsigned int event_id);
+int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, const void *buf, size_t len);
+int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len);
+struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp, const char *name, int type,
+					unsigned int alg);
+
+int cs_dsp_read_raw_data_block(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr,
+			       unsigned int num_words, __be32 *data);
+int cs_dsp_read_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 *data);
+int cs_dsp_write_data_word(struct cs_dsp *dsp, int mem_type, unsigned int mem_addr, u32 data);
+void cs_dsp_remove_padding(u32 *buf, int nwords);
+
+struct cs_dsp_alg_region *cs_dsp_find_alg_region(struct cs_dsp *dsp,
+						 int type, unsigned int id);
+
+const char *cs_dsp_mem_region_name(unsigned int type);
+
+#endif
diff --git a/include/linux/firmware/cirrus/wmfw.h b/include/linux/firmware/cirrus/wmfw.h
new file mode 100644
index 000000000000..a19bf7c6fc8b
--- /dev/null
+++ b/include/linux/firmware/cirrus/wmfw.h
@@ -0,0 +1,202 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * wmfw.h - Wolfson firmware format information
+ *
+ * Copyright 2012 Wolfson Microelectronics plc
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ */
+
+#ifndef __WMFW_H
+#define __WMFW_H
+
+#include <linux/types.h>
+
+#define WMFW_MAX_ALG_NAME         256
+#define WMFW_MAX_ALG_DESCR_NAME   256
+
+#define WMFW_MAX_COEFF_NAME       256
+#define WMFW_MAX_COEFF_DESCR_NAME 256
+
+#define WMFW_CTL_FLAG_SYS         0x8000
+#define WMFW_CTL_FLAG_VOLATILE    0x0004
+#define WMFW_CTL_FLAG_WRITEABLE   0x0002
+#define WMFW_CTL_FLAG_READABLE    0x0001
+
+#define WMFW_CTL_TYPE_BYTES       0x0004 /* byte control */
+
+/* Non-ALSA coefficient types start at 0x1000 */
+#define WMFW_CTL_TYPE_ACKED       0x1000 /* acked control */
+#define WMFW_CTL_TYPE_HOSTEVENT   0x1001 /* event control */
+#define WMFW_CTL_TYPE_HOST_BUFFER 0x1002 /* host buffer pointer */
+
+struct wmfw_header {
+	char magic[4];
+	__le32 len;
+	__le16 rev;
+	u8 core;
+	u8 ver;
+} __packed;
+
+struct wmfw_footer {
+	__le64 timestamp;
+	__le32 checksum;
+} __packed;
+
+struct wmfw_adsp1_sizes {
+	__le32 dm;
+	__le32 pm;
+	__le32 zm;
+} __packed;
+
+struct wmfw_adsp2_sizes {
+	__le32 xm;
+	__le32 ym;
+	__le32 pm;
+	__le32 zm;
+} __packed;
+
+struct wmfw_region {
+	union {
+		__be32 type;
+		__le32 offset;
+	};
+	__le32 len;
+	u8 data[];
+} __packed;
+
+struct wmfw_id_hdr {
+	__be32 core_id;
+	__be32 core_rev;
+	__be32 id;
+	__be32 ver;
+} __packed;
+
+struct wmfw_v3_id_hdr {
+	__be32 core_id;
+	__be32 block_rev;
+	__be32 vendor_id;
+	__be32 id;
+	__be32 ver;
+} __packed;
+
+struct wmfw_adsp1_id_hdr {
+	struct wmfw_id_hdr fw;
+	__be32 zm;
+	__be32 dm;
+	__be32 n_algs;
+} __packed;
+
+struct wmfw_adsp2_id_hdr {
+	struct wmfw_id_hdr fw;
+	__be32 zm;
+	__be32 xm;
+	__be32 ym;
+	__be32 n_algs;
+} __packed;
+
+struct wmfw_halo_id_hdr {
+	struct wmfw_v3_id_hdr fw;
+	__be32 xm_base;
+	__be32 xm_size;
+	__be32 ym_base;
+	__be32 ym_size;
+	__be32 n_algs;
+} __packed;
+
+struct wmfw_alg_hdr {
+	__be32 id;
+	__be32 ver;
+} __packed;
+
+struct wmfw_adsp1_alg_hdr {
+	struct wmfw_alg_hdr alg;
+	__be32 zm;
+	__be32 dm;
+} __packed;
+
+struct wmfw_adsp2_alg_hdr {
+	struct wmfw_alg_hdr alg;
+	__be32 zm;
+	__be32 xm;
+	__be32 ym;
+} __packed;
+
+struct wmfw_halo_alg_hdr {
+	struct wmfw_alg_hdr alg;
+	__be32 xm_base;
+	__be32 xm_size;
+	__be32 ym_base;
+	__be32 ym_size;
+} __packed;
+
+struct wmfw_adsp_alg_data {
+	__le32 id;
+	u8 name[WMFW_MAX_ALG_NAME];
+	u8 descr[WMFW_MAX_ALG_DESCR_NAME];
+	__le32 ncoeff;
+	u8 data[];
+} __packed;
+
+struct wmfw_adsp_coeff_data {
+	struct {
+		__le16 offset;
+		__le16 type;
+		__le32 size;
+	} hdr;
+	u8 name[WMFW_MAX_COEFF_NAME];
+	u8 descr[WMFW_MAX_COEFF_DESCR_NAME];
+	__le16 ctl_type;
+	__le16 flags;
+	__le32 len;
+	u8 data[];
+} __packed;
+
+struct wmfw_coeff_hdr {
+	u8 magic[4];
+	__le32 len;
+	union {
+		__be32 rev;
+		__le32 ver;
+	};
+	union {
+		__be32 core;
+		__le32 core_ver;
+	};
+	u8 data[];
+} __packed;
+
+struct wmfw_coeff_item {
+	__le16 offset;
+	__le16 type;
+	__le32 id;
+	__le32 ver;
+	__le32 sr;
+	__le32 len;
+	u8 data[];
+} __packed;
+
+#define WMFW_ADSP1 1
+#define WMFW_ADSP2 2
+#define WMFW_HALO 4
+
+#define WMFW_ABSOLUTE         0xf0
+#define WMFW_ALGORITHM_DATA   0xf2
+#define WMFW_METADATA         0xfc
+#define WMFW_NAME_TEXT        0xfe
+#define WMFW_INFO_TEXT        0xff
+
+#define WMFW_ADSP1_PM 2
+#define WMFW_ADSP1_DM 3
+#define WMFW_ADSP1_ZM 4
+
+#define WMFW_ADSP2_PM 2
+#define WMFW_ADSP2_ZM 4
+#define WMFW_ADSP2_XM 5
+#define WMFW_ADSP2_YM 6
+
+#define WMFW_HALO_PM_PACKED 0x10
+#define WMFW_HALO_XM_PACKED 0x11
+#define WMFW_HALO_YM_PACKED 0x12
+
+#endif
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index ab7ac5e0bd68..deda5ee02ebb 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -333,6 +333,7 @@ config SND_SOC_WM_HUBS
 
 config SND_SOC_WM_ADSP
 	tristate
+	select CS_DSP
 	select SND_SOC_COMPRESS
 	default y if SND_SOC_MADERA=y
 	default y if SND_SOC_CS47L24=y
diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c
index 6c5d55b3b311..f17c749c24c3 100644
--- a/sound/soc/codecs/wm_adsp.c
+++ b/sound/soc/codecs/wm_adsp.c
@@ -19,7 +19,6 @@
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
-#include <linux/vmalloc.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <sound/core.h>
@@ -44,15 +43,6 @@
 #define adsp_dbg(_dsp, fmt, ...) \
 	dev_dbg(_dsp->cs_dsp.dev, "%s: " fmt, _dsp->cs_dsp.name, ##__VA_ARGS__)
 
-#define cs_dsp_err(_dsp, fmt, ...) \
-	dev_err(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
-#define cs_dsp_warn(_dsp, fmt, ...) \
-	dev_warn(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
-#define cs_dsp_info(_dsp, fmt, ...) \
-	dev_info(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
-#define cs_dsp_dbg(_dsp, fmt, ...) \
-	dev_dbg(_dsp->dev, "%s: " fmt, _dsp->name, ##__VA_ARGS__)
-
 #define compr_err(_obj, fmt, ...) \
 	adsp_err(_obj->dsp, "%s: " fmt, _obj->name ? _obj->name : "legacy", \
 		 ##__VA_ARGS__)
@@ -60,305 +50,11 @@
 	adsp_dbg(_obj->dsp, "%s: " fmt, _obj->name ? _obj->name : "legacy", \
 		 ##__VA_ARGS__)
 
-#define ADSP1_CONTROL_1                   0x00
-#define ADSP1_CONTROL_2                   0x02
-#define ADSP1_CONTROL_3                   0x03
-#define ADSP1_CONTROL_4                   0x04
-#define ADSP1_CONTROL_5                   0x06
-#define ADSP1_CONTROL_6                   0x07
-#define ADSP1_CONTROL_7                   0x08
-#define ADSP1_CONTROL_8                   0x09
-#define ADSP1_CONTROL_9                   0x0A
-#define ADSP1_CONTROL_10                  0x0B
-#define ADSP1_CONTROL_11                  0x0C
-#define ADSP1_CONTROL_12                  0x0D
-#define ADSP1_CONTROL_13                  0x0F
-#define ADSP1_CONTROL_14                  0x10
-#define ADSP1_CONTROL_15                  0x11
-#define ADSP1_CONTROL_16                  0x12
-#define ADSP1_CONTROL_17                  0x13
-#define ADSP1_CONTROL_18                  0x14
-#define ADSP1_CONTROL_19                  0x16
-#define ADSP1_CONTROL_20                  0x17
-#define ADSP1_CONTROL_21                  0x18
-#define ADSP1_CONTROL_22                  0x1A
-#define ADSP1_CONTROL_23                  0x1B
-#define ADSP1_CONTROL_24                  0x1C
-#define ADSP1_CONTROL_25                  0x1E
-#define ADSP1_CONTROL_26                  0x20
-#define ADSP1_CONTROL_27                  0x21
-#define ADSP1_CONTROL_28                  0x22
-#define ADSP1_CONTROL_29                  0x23
-#define ADSP1_CONTROL_30                  0x24
-#define ADSP1_CONTROL_31                  0x26
-
-/*
- * ADSP1 Control 19
- */
-#define ADSP1_WDMA_BUFFER_LENGTH_MASK     0x00FF  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
-#define ADSP1_WDMA_BUFFER_LENGTH_SHIFT         0  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
-#define ADSP1_WDMA_BUFFER_LENGTH_WIDTH         8  /* DSP1_WDMA_BUFFER_LENGTH - [7:0] */
-
-
-/*
- * ADSP1 Control 30
- */
-#define ADSP1_DBG_CLK_ENA                 0x0008  /* DSP1_DBG_CLK_ENA */
-#define ADSP1_DBG_CLK_ENA_MASK            0x0008  /* DSP1_DBG_CLK_ENA */
-#define ADSP1_DBG_CLK_ENA_SHIFT                3  /* DSP1_DBG_CLK_ENA */
-#define ADSP1_DBG_CLK_ENA_WIDTH                1  /* DSP1_DBG_CLK_ENA */
-#define ADSP1_SYS_ENA                     0x0004  /* DSP1_SYS_ENA */
-#define ADSP1_SYS_ENA_MASK                0x0004  /* DSP1_SYS_ENA */
-#define ADSP1_SYS_ENA_SHIFT                    2  /* DSP1_SYS_ENA */
-#define ADSP1_SYS_ENA_WIDTH                    1  /* DSP1_SYS_ENA */
-#define ADSP1_CORE_ENA                    0x0002  /* DSP1_CORE_ENA */
-#define ADSP1_CORE_ENA_MASK               0x0002  /* DSP1_CORE_ENA */
-#define ADSP1_CORE_ENA_SHIFT                   1  /* DSP1_CORE_ENA */
-#define ADSP1_CORE_ENA_WIDTH                   1  /* DSP1_CORE_ENA */
-#define ADSP1_START                       0x0001  /* DSP1_START */
-#define ADSP1_START_MASK                  0x0001  /* DSP1_START */
-#define ADSP1_START_SHIFT                      0  /* DSP1_START */
-#define ADSP1_START_WIDTH                      1  /* DSP1_START */
-
-/*
- * ADSP1 Control 31
- */
-#define ADSP1_CLK_SEL_MASK                0x0007  /* CLK_SEL_ENA */
-#define ADSP1_CLK_SEL_SHIFT                    0  /* CLK_SEL_ENA */
-#define ADSP1_CLK_SEL_WIDTH                    3  /* CLK_SEL_ENA */
-
-#define ADSP2_CONTROL                     0x0
-#define ADSP2_CLOCKING                    0x1
-#define ADSP2V2_CLOCKING                  0x2
-#define ADSP2_STATUS1                     0x4
-#define ADSP2_WDMA_CONFIG_1               0x30
-#define ADSP2_WDMA_CONFIG_2               0x31
-#define ADSP2V2_WDMA_CONFIG_2             0x32
-#define ADSP2_RDMA_CONFIG_1               0x34
-
-#define ADSP2_SCRATCH0                    0x40
-#define ADSP2_SCRATCH1                    0x41
-#define ADSP2_SCRATCH2                    0x42
-#define ADSP2_SCRATCH3                    0x43
-
-#define ADSP2V2_SCRATCH0_1                0x40
-#define ADSP2V2_SCRATCH2_3                0x42
-
-/*
- * ADSP2 Control
- */
-
-#define ADSP2_MEM_ENA                     0x0010  /* DSP1_MEM_ENA */
-#define ADSP2_MEM_ENA_MASK                0x0010  /* DSP1_MEM_ENA */
-#define ADSP2_MEM_ENA_SHIFT                    4  /* DSP1_MEM_ENA */
-#define ADSP2_MEM_ENA_WIDTH                    1  /* DSP1_MEM_ENA */
-#define ADSP2_SYS_ENA                     0x0004  /* DSP1_SYS_ENA */
-#define ADSP2_SYS_ENA_MASK                0x0004  /* DSP1_SYS_ENA */
-#define ADSP2_SYS_ENA_SHIFT                    2  /* DSP1_SYS_ENA */
-#define ADSP2_SYS_ENA_WIDTH                    1  /* DSP1_SYS_ENA */
-#define ADSP2_CORE_ENA                    0x0002  /* DSP1_CORE_ENA */
-#define ADSP2_CORE_ENA_MASK               0x0002  /* DSP1_CORE_ENA */
-#define ADSP2_CORE_ENA_SHIFT                   1  /* DSP1_CORE_ENA */
-#define ADSP2_CORE_ENA_WIDTH                   1  /* DSP1_CORE_ENA */
-#define ADSP2_START                       0x0001  /* DSP1_START */
-#define ADSP2_START_MASK                  0x0001  /* DSP1_START */
-#define ADSP2_START_SHIFT                      0  /* DSP1_START */
-#define ADSP2_START_WIDTH                      1  /* DSP1_START */
-
-/*
- * ADSP2 clocking
- */
-#define ADSP2_CLK_SEL_MASK                0x0007  /* CLK_SEL_ENA */
-#define ADSP2_CLK_SEL_SHIFT                    0  /* CLK_SEL_ENA */
-#define ADSP2_CLK_SEL_WIDTH                    3  /* CLK_SEL_ENA */
-
-/*
- * ADSP2V2 clocking
- */
-#define ADSP2V2_CLK_SEL_MASK             0x70000  /* CLK_SEL_ENA */
-#define ADSP2V2_CLK_SEL_SHIFT                 16  /* CLK_SEL_ENA */
-#define ADSP2V2_CLK_SEL_WIDTH                  3  /* CLK_SEL_ENA */
-
-#define ADSP2V2_RATE_MASK                 0x7800  /* DSP_RATE */
-#define ADSP2V2_RATE_SHIFT                    11  /* DSP_RATE */
-#define ADSP2V2_RATE_WIDTH                     4  /* DSP_RATE */
-
-/*
- * ADSP2 Status 1
- */
-#define ADSP2_RAM_RDY                     0x0001
-#define ADSP2_RAM_RDY_MASK                0x0001
-#define ADSP2_RAM_RDY_SHIFT                    0
-#define ADSP2_RAM_RDY_WIDTH                    1
-
-/*
- * ADSP2 Lock support
- */
-#define ADSP2_LOCK_CODE_0                    0x5555
-#define ADSP2_LOCK_CODE_1                    0xAAAA
-
-#define ADSP2_WATCHDOG                       0x0A
-#define ADSP2_BUS_ERR_ADDR                   0x52
-#define ADSP2_REGION_LOCK_STATUS             0x64
-#define ADSP2_LOCK_REGION_1_LOCK_REGION_0    0x66
-#define ADSP2_LOCK_REGION_3_LOCK_REGION_2    0x68
-#define ADSP2_LOCK_REGION_5_LOCK_REGION_4    0x6A
-#define ADSP2_LOCK_REGION_7_LOCK_REGION_6    0x6C
-#define ADSP2_LOCK_REGION_9_LOCK_REGION_8    0x6E
-#define ADSP2_LOCK_REGION_CTRL               0x7A
-#define ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR    0x7C
-
-#define ADSP2_REGION_LOCK_ERR_MASK           0x8000
-#define ADSP2_ADDR_ERR_MASK                  0x4000
-#define ADSP2_WDT_TIMEOUT_STS_MASK           0x2000
-#define ADSP2_CTRL_ERR_PAUSE_ENA             0x0002
-#define ADSP2_CTRL_ERR_EINT                  0x0001
-
-#define ADSP2_BUS_ERR_ADDR_MASK              0x00FFFFFF
-#define ADSP2_XMEM_ERR_ADDR_MASK             0x0000FFFF
-#define ADSP2_PMEM_ERR_ADDR_MASK             0x7FFF0000
-#define ADSP2_PMEM_ERR_ADDR_SHIFT            16
-#define ADSP2_WDT_ENA_MASK                   0xFFFFFFFD
-
-#define ADSP2_LOCK_REGION_SHIFT              16
-
 #define ADSP_MAX_STD_CTRL_SIZE               512
 
-#define CS_DSP_ACKED_CTL_TIMEOUT_MS          100
-#define CS_DSP_ACKED_CTL_N_QUICKPOLLS        10
-#define CS_DSP_ACKED_CTL_MIN_VALUE           0
-#define CS_DSP_ACKED_CTL_MAX_VALUE           0xFFFFFF
-
-/*
- * Event control messages
- */
-#define CS_DSP_FW_EVENT_SHUTDOWN             0x000001
-
-/*
- * HALO system info
- */
-#define HALO_AHBM_WINDOW_DEBUG_0             0x02040
-#define HALO_AHBM_WINDOW_DEBUG_1             0x02044
-
-/*
- * HALO core
- */
-#define HALO_SCRATCH1                        0x005c0
-#define HALO_SCRATCH2                        0x005c8
-#define HALO_SCRATCH3                        0x005d0
-#define HALO_SCRATCH4                        0x005d8
-#define HALO_CCM_CORE_CONTROL                0x41000
-#define HALO_CORE_SOFT_RESET                 0x00010
-#define HALO_WDT_CONTROL                     0x47000
-
-/*
- * HALO MPU banks
- */
-#define HALO_MPU_XMEM_ACCESS_0               0x43000
-#define HALO_MPU_YMEM_ACCESS_0               0x43004
-#define HALO_MPU_WINDOW_ACCESS_0             0x43008
-#define HALO_MPU_XREG_ACCESS_0               0x4300C
-#define HALO_MPU_YREG_ACCESS_0               0x43014
-#define HALO_MPU_XMEM_ACCESS_1               0x43018
-#define HALO_MPU_YMEM_ACCESS_1               0x4301C
-#define HALO_MPU_WINDOW_ACCESS_1             0x43020
-#define HALO_MPU_XREG_ACCESS_1               0x43024
-#define HALO_MPU_YREG_ACCESS_1               0x4302C
-#define HALO_MPU_XMEM_ACCESS_2               0x43030
-#define HALO_MPU_YMEM_ACCESS_2               0x43034
-#define HALO_MPU_WINDOW_ACCESS_2             0x43038
-#define HALO_MPU_XREG_ACCESS_2               0x4303C
-#define HALO_MPU_YREG_ACCESS_2               0x43044
-#define HALO_MPU_XMEM_ACCESS_3               0x43048
-#define HALO_MPU_YMEM_ACCESS_3               0x4304C
-#define HALO_MPU_WINDOW_ACCESS_3             0x43050
-#define HALO_MPU_XREG_ACCESS_3               0x43054
-#define HALO_MPU_YREG_ACCESS_3               0x4305C
-#define HALO_MPU_XM_VIO_ADDR                 0x43100
-#define HALO_MPU_XM_VIO_STATUS               0x43104
-#define HALO_MPU_YM_VIO_ADDR                 0x43108
-#define HALO_MPU_YM_VIO_STATUS               0x4310C
-#define HALO_MPU_PM_VIO_ADDR                 0x43110
-#define HALO_MPU_PM_VIO_STATUS               0x43114
-#define HALO_MPU_LOCK_CONFIG                 0x43140
-
-/*
- * HALO_AHBM_WINDOW_DEBUG_1
- */
-#define HALO_AHBM_CORE_ERR_ADDR_MASK         0x0fffff00
-#define HALO_AHBM_CORE_ERR_ADDR_SHIFT                 8
-#define HALO_AHBM_FLAGS_ERR_MASK             0x000000ff
-
-/*
- * HALO_CCM_CORE_CONTROL
- */
-#define HALO_CORE_RESET                     0x00000200
-#define HALO_CORE_EN                        0x00000001
-
-/*
- * HALO_CORE_SOFT_RESET
- */
-#define HALO_CORE_SOFT_RESET_MASK           0x00000001
-
-/*
- * HALO_WDT_CONTROL
- */
-#define HALO_WDT_EN_MASK                    0x00000001
-
-/*
- * HALO_MPU_?M_VIO_STATUS
- */
-#define HALO_MPU_VIO_STS_MASK               0x007e0000
-#define HALO_MPU_VIO_STS_SHIFT                      17
-#define HALO_MPU_VIO_ERR_WR_MASK            0x00008000
-#define HALO_MPU_VIO_ERR_SRC_MASK           0x00007fff
-#define HALO_MPU_VIO_ERR_SRC_SHIFT                   0
-
-static const struct cs_dsp_ops cs_dsp_adsp1_ops;
-static const struct cs_dsp_ops cs_dsp_adsp2_ops[];
-static const struct cs_dsp_ops cs_dsp_halo_ops;
-
 static const struct cs_dsp_client_ops wm_adsp1_client_ops;
 static const struct cs_dsp_client_ops wm_adsp2_client_ops;
 
-struct cs_dsp_buf {
-	struct list_head list;
-	void *buf;
-};
-
-static struct cs_dsp_buf *cs_dsp_buf_alloc(const void *src, size_t len,
-					   struct list_head *list)
-{
-	struct cs_dsp_buf *buf = kzalloc(sizeof(*buf), GFP_KERNEL);
-
-	if (buf == NULL)
-		return NULL;
-
-	buf->buf = vmalloc(len);
-	if (!buf->buf) {
-		kfree(buf);
-		return NULL;
-	}
-	memcpy(buf->buf, src, len);
-
-	if (list)
-		list_add_tail(&buf->list, list);
-
-	return buf;
-}
-
-static void cs_dsp_buf_free(struct list_head *list)
-{
-	while (!list_empty(list)) {
-		struct cs_dsp_buf *buf = list_first_entry(list,
-							  struct cs_dsp_buf,
-							  list);
-		list_del(&buf->list);
-		vfree(buf->buf);
-		kfree(buf);
-	}
-}
-
 #define WM_ADSP_FW_MBC_VSS  0
 #define WM_ADSP_FW_HIFI     1
 #define WM_ADSP_FW_TX       2
@@ -483,8 +179,6 @@ struct wm_adsp_compr {
 	const char *name;
 };
 
-#define CS_DSP_DATA_WORD_SIZE          3
-
 #define WM_ADSP_MIN_FRAGMENTS          1
 #define WM_ADSP_MAX_FRAGMENTS          256
 #define WM_ADSP_MIN_FRAGMENT_SIZE      (64 * CS_DSP_DATA_WORD_SIZE)
@@ -616,166 +310,6 @@ struct wm_coeff_ctl {
 	struct work_struct work;
 };
 
-static const char *cs_dsp_mem_region_name(unsigned int type)
-{
-	switch (type) {
-	case WMFW_ADSP1_PM:
-		return "PM";
-	case WMFW_HALO_PM_PACKED:
-		return "PM_PACKED";
-	case WMFW_ADSP1_DM:
-		return "DM";
-	case WMFW_ADSP2_XM:
-		return "XM";
-	case WMFW_HALO_XM_PACKED:
-		return "XM_PACKED";
-	case WMFW_ADSP2_YM:
-		return "YM";
-	case WMFW_HALO_YM_PACKED:
-		return "YM_PACKED";
-	case WMFW_ADSP1_ZM:
-		return "ZM";
-	default:
-		return NULL;
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-static void cs_dsp_debugfs_save_wmfwname(struct cs_dsp *dsp, const char *s)
-{
-	char *tmp = kasprintf(GFP_KERNEL, "%s\n", s);
-
-	kfree(dsp->wmfw_file_name);
-	dsp->wmfw_file_name = tmp;
-}
-
-static void cs_dsp_debugfs_save_binname(struct cs_dsp *dsp, const char *s)
-{
-	char *tmp = kasprintf(GFP_KERNEL, "%s\n", s);
-
-	kfree(dsp->bin_file_name);
-	dsp->bin_file_name = tmp;
-}
-
-static void cs_dsp_debugfs_clear(struct cs_dsp *dsp)
-{
-	kfree(dsp->wmfw_file_name);
-	kfree(dsp->bin_file_name);
-	dsp->wmfw_file_name = NULL;
-	dsp->bin_file_name = NULL;
-}
-
-static ssize_t cs_dsp_debugfs_wmfw_read(struct file *file,
-					char __user *user_buf,
-					size_t count, loff_t *ppos)
-{
-	struct cs_dsp *dsp = file->private_data;
-	ssize_t ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	if (!dsp->wmfw_file_name || !dsp->booted)
-		ret = 0;
-	else
-		ret = simple_read_from_buffer(user_buf, count, ppos,
-					      dsp->wmfw_file_name,
-					      strlen(dsp->wmfw_file_name));
-
-	mutex_unlock(&dsp->pwr_lock);
-	return ret;
-}
-
-static ssize_t cs_dsp_debugfs_bin_read(struct file *file,
-				       char __user *user_buf,
-				       size_t count, loff_t *ppos)
-{
-	struct cs_dsp *dsp = file->private_data;
-	ssize_t ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	if (!dsp->bin_file_name || !dsp->booted)
-		ret = 0;
-	else
-		ret = simple_read_from_buffer(user_buf, count, ppos,
-					      dsp->bin_file_name,
-					      strlen(dsp->bin_file_name));
-
-	mutex_unlock(&dsp->pwr_lock);
-	return ret;
-}
-
-static const struct {
-	const char *name;
-	const struct file_operations fops;
-} cs_dsp_debugfs_fops[] = {
-	{
-		.name = "wmfw_file_name",
-		.fops = {
-			.open = simple_open,
-			.read = cs_dsp_debugfs_wmfw_read,
-		},
-	},
-	{
-		.name = "bin_file_name",
-		.fops = {
-			.open = simple_open,
-			.read = cs_dsp_debugfs_bin_read,
-		},
-	},
-};
-
-static void cs_dsp_init_debugfs(struct cs_dsp *dsp,
-				struct dentry *debugfs_root)
-{
-	struct dentry *root = NULL;
-	int i;
-
-	root = debugfs_create_dir(dsp->name, debugfs_root);
-
-	debugfs_create_bool("booted", 0444, root, &dsp->booted);
-	debugfs_create_bool("running", 0444, root, &dsp->running);
-	debugfs_create_x32("fw_id", 0444, root, &dsp->fw_id);
-	debugfs_create_x32("fw_version", 0444, root, &dsp->fw_id_version);
-
-	for (i = 0; i < ARRAY_SIZE(cs_dsp_debugfs_fops); ++i)
-		debugfs_create_file(cs_dsp_debugfs_fops[i].name, 0444, root,
-				    dsp, &cs_dsp_debugfs_fops[i].fops);
-
-	dsp->debugfs_root = root;
-}
-
-static void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp)
-{
-	cs_dsp_debugfs_clear(dsp);
-	debugfs_remove_recursive(dsp->debugfs_root);
-	dsp->debugfs_root = NULL;
-}
-#else
-static inline void cs_dsp_init_debugfs(struct cs_dsp *dsp,
-				       struct dentry *debugfs_root)
-{
-}
-
-static inline void cs_dsp_cleanup_debugfs(struct cs_dsp *dsp)
-{
-}
-
-static inline void cs_dsp_debugfs_save_wmfwname(struct cs_dsp *dsp,
-						const char *s)
-{
-}
-
-static inline void cs_dsp_debugfs_save_binname(struct cs_dsp *dsp,
-					       const char *s)
-{
-}
-
-static inline void cs_dsp_debugfs_clear(struct cs_dsp *dsp)
-{
-}
-#endif
-
 int wm_adsp_fw_get(struct snd_kcontrol *kcontrol,
 		   struct snd_ctl_elem_value *ucontrol)
 {
@@ -827,126 +361,11 @@ const struct soc_enum wm_adsp_fw_enum[] = {
 };
 EXPORT_SYMBOL_GPL(wm_adsp_fw_enum);
 
-static const struct cs_dsp_region *cs_dsp_find_region(struct cs_dsp *dsp,
-						      int type)
-{
-	int i;
-
-	for (i = 0; i < dsp->num_mems; i++)
-		if (dsp->mem[i].type == type)
-			return &dsp->mem[i];
-
-	return NULL;
-}
-
-static unsigned int cs_dsp_region_to_reg(struct cs_dsp_region const *mem,
-					 unsigned int offset)
-{
-	switch (mem->type) {
-	case WMFW_ADSP1_PM:
-		return mem->base + (offset * 3);
-	case WMFW_ADSP1_DM:
-	case WMFW_ADSP2_XM:
-	case WMFW_ADSP2_YM:
-	case WMFW_ADSP1_ZM:
-		return mem->base + (offset * 2);
-	default:
-		WARN(1, "Unknown memory region type");
-		return offset;
-	}
-}
-
-static unsigned int cs_dsp_halo_region_to_reg(struct cs_dsp_region const *mem,
-					      unsigned int offset)
-{
-	switch (mem->type) {
-	case WMFW_ADSP2_XM:
-	case WMFW_ADSP2_YM:
-		return mem->base + (offset * 4);
-	case WMFW_HALO_XM_PACKED:
-	case WMFW_HALO_YM_PACKED:
-		return (mem->base + (offset * 3)) & ~0x3;
-	case WMFW_HALO_PM_PACKED:
-		return mem->base + (offset * 5);
-	default:
-		WARN(1, "Unknown memory region type");
-		return offset;
-	}
-}
-
-static void cs_dsp_read_fw_status(struct cs_dsp *dsp,
-				  int noffs, unsigned int *offs)
-{
-	unsigned int i;
-	int ret;
-
-	for (i = 0; i < noffs; ++i) {
-		ret = regmap_read(dsp->regmap, dsp->base + offs[i], &offs[i]);
-		if (ret) {
-			cs_dsp_err(dsp, "Failed to read SCRATCH%u: %d\n", i, ret);
-			return;
-		}
-	}
-}
-
-static void cs_dsp_adsp2_show_fw_status(struct cs_dsp *dsp)
-{
-	unsigned int offs[] = {
-		ADSP2_SCRATCH0, ADSP2_SCRATCH1, ADSP2_SCRATCH2, ADSP2_SCRATCH3,
-	};
-
-	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
-
-	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
-		   offs[0], offs[1], offs[2], offs[3]);
-}
-
-static void cs_dsp_adsp2v2_show_fw_status(struct cs_dsp *dsp)
-{
-	unsigned int offs[] = { ADSP2V2_SCRATCH0_1, ADSP2V2_SCRATCH2_3 };
-
-	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
-
-	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
-		   offs[0] & 0xFFFF, offs[0] >> 16,
-		   offs[1] & 0xFFFF, offs[1] >> 16);
-}
-
-static void cs_dsp_halo_show_fw_status(struct cs_dsp *dsp)
-{
-	unsigned int offs[] = {
-		HALO_SCRATCH1, HALO_SCRATCH2, HALO_SCRATCH3, HALO_SCRATCH4,
-	};
-
-	cs_dsp_read_fw_status(dsp, ARRAY_SIZE(offs), offs);
-
-	cs_dsp_dbg(dsp, "FW SCRATCH 0:0x%x 1:0x%x 2:0x%x 3:0x%x\n",
-		   offs[0], offs[1], offs[2], offs[3]);
-}
-
 static inline struct wm_coeff_ctl *bytes_ext_to_ctl(struct soc_bytes_ext *ext)
 {
 	return container_of(ext, struct wm_coeff_ctl, bytes_ext);
 }
 
-static int cs_dsp_coeff_base_reg(struct cs_dsp_coeff_ctl *ctl, unsigned int *reg)
-{
-	const struct cs_dsp_alg_region *alg_region = &ctl->alg_region;
-	struct cs_dsp *dsp = ctl->dsp;
-	const struct cs_dsp_region *mem;
-
-	mem = cs_dsp_find_region(dsp, alg_region->type);
-	if (!mem) {
-		cs_dsp_err(dsp, "No base for region %x\n",
-			   alg_region->type);
-		return -EINVAL;
-	}
-
-	*reg = dsp->ops->region_to_reg(mem, ctl->alg_region.base + ctl->offset);
-
-	return 0;
-}
-
 static int wm_coeff_info(struct snd_kcontrol *kctl,
 			 struct snd_ctl_elem_info *uinfo)
 {
@@ -972,117 +391,6 @@ static int wm_coeff_info(struct snd_kcontrol *kctl,
 	return 0;
 }
 
-static int cs_dsp_coeff_write_acked_control(struct cs_dsp_coeff_ctl *ctl,
-					    unsigned int event_id)
-{
-	struct cs_dsp *dsp = ctl->dsp;
-	__be32 val = cpu_to_be32(event_id);
-	unsigned int reg;
-	int i, ret;
-
-	if (!dsp->running)
-		return -EPERM;
-
-	ret = cs_dsp_coeff_base_reg(ctl, &reg);
-	if (ret)
-		return ret;
-
-	cs_dsp_dbg(dsp, "Sending 0x%x to acked control alg 0x%x %s:0x%x\n",
-		   event_id, ctl->alg_region.alg,
-		   cs_dsp_mem_region_name(ctl->alg_region.type), ctl->offset);
-
-	ret = regmap_raw_write(dsp->regmap, reg, &val, sizeof(val));
-	if (ret) {
-		cs_dsp_err(dsp, "Failed to write %x: %d\n", reg, ret);
-		return ret;
-	}
-
-	/*
-	 * Poll for ack, we initially poll at ~1ms intervals for firmwares
-	 * that respond quickly, then go to ~10ms polls. A firmware is unlikely
-	 * to ack instantly so we do the first 1ms delay before reading the
-	 * control to avoid a pointless bus transaction
-	 */
-	for (i = 0; i < CS_DSP_ACKED_CTL_TIMEOUT_MS;) {
-		switch (i) {
-		case 0 ... CS_DSP_ACKED_CTL_N_QUICKPOLLS - 1:
-			usleep_range(1000, 2000);
-			i++;
-			break;
-		default:
-			usleep_range(10000, 20000);
-			i += 10;
-			break;
-		}
-
-		ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val));
-		if (ret) {
-			cs_dsp_err(dsp, "Failed to read %x: %d\n", reg, ret);
-			return ret;
-		}
-
-		if (val == 0) {
-			cs_dsp_dbg(dsp, "Acked control ACKED at poll %u\n", i);
-			return 0;
-		}
-	}
-
-	cs_dsp_warn(dsp, "Acked control @0x%x alg:0x%x %s:0x%x timed out\n",
-		    reg, ctl->alg_region.alg,
-		    cs_dsp_mem_region_name(ctl->alg_region.type),
-		    ctl->offset);
-
-	return -ETIMEDOUT;
-}
-
-static int cs_dsp_coeff_write_ctrl_raw(struct cs_dsp_coeff_ctl *ctl,
-				       const void *buf, size_t len)
-{
-	struct cs_dsp *dsp = ctl->dsp;
-	void *scratch;
-	int ret;
-	unsigned int reg;
-
-	ret = cs_dsp_coeff_base_reg(ctl, &reg);
-	if (ret)
-		return ret;
-
-	scratch = kmemdup(buf, len, GFP_KERNEL | GFP_DMA);
-	if (!scratch)
-		return -ENOMEM;
-
-	ret = regmap_raw_write(dsp->regmap, reg, scratch,
-			       len);
-	if (ret) {
-		cs_dsp_err(dsp, "Failed to write %zu bytes to %x: %d\n",
-			   len, reg, ret);
-		kfree(scratch);
-		return ret;
-	}
-	cs_dsp_dbg(dsp, "Wrote %zu bytes to %x\n", len, reg);
-
-	kfree(scratch);
-
-	return 0;
-}
-
-static int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl,
-				   const void *buf, size_t len)
-{
-	int ret = 0;
-
-	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
-		ret = -EPERM;
-	else if (buf != ctl->cache)
-		memcpy(ctl->cache, buf, len);
-
-	ctl->set = 1;
-	if (ctl->enabled && ctl->dsp->running)
-		ret = cs_dsp_coeff_write_ctrl_raw(ctl, buf, len);
-
-	return ret;
-}
-
 static int wm_coeff_put(struct snd_kcontrol *kctl,
 			struct snd_ctl_elem_value *ucontrol)
 {
@@ -1146,65 +454,14 @@ static int wm_coeff_put_acked(struct snd_kcontrol *kctl,
 	return ret;
 }
 
-static int cs_dsp_coeff_read_ctrl_raw(struct cs_dsp_coeff_ctl *ctl,
-				      void *buf, size_t len)
+static int wm_coeff_get(struct snd_kcontrol *kctl,
+			struct snd_ctl_elem_value *ucontrol)
 {
-	struct cs_dsp *dsp = ctl->dsp;
-	void *scratch;
-	int ret;
-	unsigned int reg;
-
-	ret = cs_dsp_coeff_base_reg(ctl, &reg);
-	if (ret)
-		return ret;
-
-	scratch = kmalloc(len, GFP_KERNEL | GFP_DMA);
-	if (!scratch)
-		return -ENOMEM;
-
-	ret = regmap_raw_read(dsp->regmap, reg, scratch, len);
-	if (ret) {
-		cs_dsp_err(dsp, "Failed to read %zu bytes from %x: %d\n",
-			   len, reg, ret);
-		kfree(scratch);
-		return ret;
-	}
-	cs_dsp_dbg(dsp, "Read %zu bytes from %x\n", len, reg);
-
-	memcpy(buf, scratch, len);
-	kfree(scratch);
-
-	return 0;
-}
-
-static int cs_dsp_coeff_read_ctrl(struct cs_dsp_coeff_ctl *ctl, void *buf, size_t len)
-{
-	int ret = 0;
-
-	if (ctl->flags & WMFW_CTL_FLAG_VOLATILE) {
-		if (ctl->enabled && ctl->dsp->running)
-			return cs_dsp_coeff_read_ctrl_raw(ctl, buf, len);
-		else
-			return -EPERM;
-	} else {
-		if (!ctl->flags && ctl->enabled && ctl->dsp->running)
-			ret = cs_dsp_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len);
-
-		if (buf != ctl->cache)
-			memcpy(buf, ctl->cache, len);
-	}
-
-	return ret;
-}
-
-static int wm_coeff_get(struct snd_kcontrol *kctl,
-			struct snd_ctl_elem_value *ucontrol)
-{
-	struct soc_bytes_ext *bytes_ext =
-		(struct soc_bytes_ext *)kctl->private_value;
-	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
-	struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
-	char *p = ucontrol->value.bytes.data;
+	struct soc_bytes_ext *bytes_ext =
+		(struct soc_bytes_ext *)kctl->private_value;
+	struct wm_coeff_ctl *ctl = bytes_ext_to_ctl(bytes_ext);
+	struct cs_dsp_coeff_ctl *cs_ctl = ctl->cs_ctl;
+	char *p = ucontrol->value.bytes.data;
 	int ret;
 
 	mutex_lock(&cs_ctl->dsp->pwr_lock);
@@ -1328,72 +585,6 @@ err_kcontrol:
 	return ret;
 }
 
-static int cs_dsp_coeff_init_control_caches(struct cs_dsp *dsp)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-	int ret;
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list) {
-		if (!ctl->enabled || ctl->set)
-			continue;
-		if (ctl->flags & WMFW_CTL_FLAG_VOLATILE)
-			continue;
-
-		/*
-		 * For readable controls populate the cache from the DSP memory.
-		 * For non-readable controls the cache was zero-filled when
-		 * created so we don't need to do anything.
-		 */
-		if (!ctl->flags || (ctl->flags & WMFW_CTL_FLAG_READABLE)) {
-			ret = cs_dsp_coeff_read_ctrl_raw(ctl, ctl->cache, ctl->len);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static int cs_dsp_coeff_sync_controls(struct cs_dsp *dsp)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-	int ret;
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list) {
-		if (!ctl->enabled)
-			continue;
-		if (ctl->set && !(ctl->flags & WMFW_CTL_FLAG_VOLATILE)) {
-			ret = cs_dsp_coeff_write_ctrl_raw(ctl, ctl->cache,
-							  ctl->len);
-			if (ret < 0)
-				return ret;
-		}
-	}
-
-	return 0;
-}
-
-static void cs_dsp_signal_event_controls(struct cs_dsp *dsp,
-					 unsigned int event)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-	int ret;
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list) {
-		if (ctl->type != WMFW_CTL_TYPE_HOSTEVENT)
-			continue;
-
-		if (!ctl->enabled)
-			continue;
-
-		ret = cs_dsp_coeff_write_acked_control(ctl, event);
-		if (ret)
-			cs_dsp_warn(dsp,
-				    "Failed to send 0x%x event to alg 0x%x (%d)\n",
-				    event, ctl->alg_region.alg, ret);
-	}
-}
-
 static void wm_adsp_ctl_work(struct work_struct *work)
 {
 	struct wm_coeff_ctl *ctl = container_of(work,
@@ -1406,13 +597,6 @@ static void wm_adsp_ctl_work(struct work_struct *work)
 	wmfw_add_ctl(dsp, ctl);
 }
 
-static void cs_dsp_free_ctl_blk(struct cs_dsp_coeff_ctl *ctl)
-{
-	kfree(ctl->cache);
-	kfree(ctl->subname);
-	kfree(ctl);
-}
-
 static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl)
 {
 	struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp);
@@ -1498,315 +682,66 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
 	kfree(ctl);
 }
 
-static int cs_dsp_create_control(struct cs_dsp *dsp,
-				 const struct cs_dsp_alg_region *alg_region,
-				 unsigned int offset, unsigned int len,
-				 const char *subname, unsigned int subname_len,
-				 unsigned int flags, unsigned int type)
+int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
+		      unsigned int alg, void *buf, size_t len)
 {
-	struct cs_dsp_coeff_ctl *ctl;
+	struct cs_dsp_coeff_ctl *cs_ctl;
+	struct wm_coeff_ctl *ctl;
+	struct snd_kcontrol *kcontrol;
+	char ctl_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
 	int ret;
 
-	list_for_each_entry(ctl, &dsp->ctl_list, list) {
-		if (ctl->fw_name == dsp->fw_name &&
-		    ctl->alg_region.alg == alg_region->alg &&
-		    ctl->alg_region.type == alg_region->type) {
-			if ((!subname && !ctl->subname) ||
-			    (subname && !strncmp(ctl->subname, subname, ctl->subname_len))) {
-				if (!ctl->enabled)
-					ctl->enabled = 1;
-				return 0;
-			}
-		}
-	}
-
-	ctl = kzalloc(sizeof(*ctl), GFP_KERNEL);
-	if (!ctl)
-		return -ENOMEM;
-
-	ctl->fw_name = dsp->fw_name;
-	ctl->alg_region = *alg_region;
-	if (subname && dsp->fw_ver >= 2) {
-		ctl->subname_len = subname_len;
-		ctl->subname = kmemdup(subname,
-				       strlen(subname) + 1, GFP_KERNEL);
-		if (!ctl->subname) {
-			ret = -ENOMEM;
-			goto err_ctl;
-		}
-	}
-	ctl->enabled = 1;
-	ctl->set = 0;
-	ctl->dsp = dsp;
-
-	ctl->flags = flags;
-	ctl->type = type;
-	ctl->offset = offset;
-	ctl->len = len;
-	ctl->cache = kzalloc(ctl->len, GFP_KERNEL);
-	if (!ctl->cache) {
-		ret = -ENOMEM;
-		goto err_ctl_subname;
-	}
-
-	list_add(&ctl->list, &dsp->ctl_list);
-
-	if (dsp->client_ops->control_add) {
-		ret = dsp->client_ops->control_add(ctl);
-		if (ret)
-			goto err_list_del;
-	}
-
-	return 0;
-
-err_list_del:
-	list_del(&ctl->list);
-	kfree(ctl->cache);
-err_ctl_subname:
-	kfree(ctl->subname);
-err_ctl:
-	kfree(ctl);
-
-	return ret;
-}
+	cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+	if (!cs_ctl)
+		return -EINVAL;
 
-struct cs_dsp_coeff_parsed_alg {
-	int id;
-	const u8 *name;
-	int name_len;
-	int ncoeff;
-};
+	ctl = cs_ctl->priv;
 
-struct cs_dsp_coeff_parsed_coeff {
-	int offset;
-	int mem_type;
-	const u8 *name;
-	int name_len;
-	unsigned int ctl_type;
-	int flags;
-	int len;
-};
+	if (len > cs_ctl->len)
+		return -EINVAL;
 
-static int cs_dsp_coeff_parse_string(int bytes, const u8 **pos, const u8 **str)
-{
-	int length;
+	ret = cs_dsp_coeff_write_ctrl(cs_ctl, buf, len);
+	if (ret)
+		return ret;
 
-	switch (bytes) {
-	case 1:
-		length = **pos;
-		break;
-	case 2:
-		length = le16_to_cpu(*((__le16 *)*pos));
-		break;
-	default:
+	if (cs_ctl->flags & WMFW_CTL_FLAG_SYS)
 		return 0;
-	}
-
-	if (str)
-		*str = *pos + bytes;
-
-	*pos += ((length + bytes) + 3) & ~0x03;
-
-	return length;
-}
-
-static int cs_dsp_coeff_parse_int(int bytes, const u8 **pos)
-{
-	int val = 0;
-
-	switch (bytes) {
-	case 2:
-		val = le16_to_cpu(*((__le16 *)*pos));
-		break;
-	case 4:
-		val = le32_to_cpu(*((__le32 *)*pos));
-		break;
-	default:
-		break;
-	}
-
-	*pos += bytes;
-
-	return val;
-}
-
-static inline void cs_dsp_coeff_parse_alg(struct cs_dsp *dsp, const u8 **data,
-					  struct cs_dsp_coeff_parsed_alg *blk)
-{
-	const struct wmfw_adsp_alg_data *raw;
-
-	switch (dsp->fw_ver) {
-	case 0:
-	case 1:
-		raw = (const struct wmfw_adsp_alg_data *)*data;
-		*data = raw->data;
-
-		blk->id = le32_to_cpu(raw->id);
-		blk->name = raw->name;
-		blk->name_len = strlen(raw->name);
-		blk->ncoeff = le32_to_cpu(raw->ncoeff);
-		break;
-	default:
-		blk->id = cs_dsp_coeff_parse_int(sizeof(raw->id), data);
-		blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), data,
-							  &blk->name);
-		cs_dsp_coeff_parse_string(sizeof(u16), data, NULL);
-		blk->ncoeff = cs_dsp_coeff_parse_int(sizeof(raw->ncoeff), data);
-		break;
-	}
-
-	cs_dsp_dbg(dsp, "Algorithm ID: %#x\n", blk->id);
-	cs_dsp_dbg(dsp, "Algorithm name: %.*s\n", blk->name_len, blk->name);
-	cs_dsp_dbg(dsp, "# of coefficient descriptors: %#x\n", blk->ncoeff);
-}
-
-static inline void cs_dsp_coeff_parse_coeff(struct cs_dsp *dsp, const u8 **data,
-					    struct cs_dsp_coeff_parsed_coeff *blk)
-{
-	const struct wmfw_adsp_coeff_data *raw;
-	const u8 *tmp;
-	int length;
-
-	switch (dsp->fw_ver) {
-	case 0:
-	case 1:
-		raw = (const struct wmfw_adsp_coeff_data *)*data;
-		*data = *data + sizeof(raw->hdr) + le32_to_cpu(raw->hdr.size);
-
-		blk->offset = le16_to_cpu(raw->hdr.offset);
-		blk->mem_type = le16_to_cpu(raw->hdr.type);
-		blk->name = raw->name;
-		blk->name_len = strlen(raw->name);
-		blk->ctl_type = le16_to_cpu(raw->ctl_type);
-		blk->flags = le16_to_cpu(raw->flags);
-		blk->len = le32_to_cpu(raw->len);
-		break;
-	default:
-		tmp = *data;
-		blk->offset = cs_dsp_coeff_parse_int(sizeof(raw->hdr.offset), &tmp);
-		blk->mem_type = cs_dsp_coeff_parse_int(sizeof(raw->hdr.type), &tmp);
-		length = cs_dsp_coeff_parse_int(sizeof(raw->hdr.size), &tmp);
-		blk->name_len = cs_dsp_coeff_parse_string(sizeof(u8), &tmp,
-							  &blk->name);
-		cs_dsp_coeff_parse_string(sizeof(u8), &tmp, NULL);
-		cs_dsp_coeff_parse_string(sizeof(u16), &tmp, NULL);
-		blk->ctl_type = cs_dsp_coeff_parse_int(sizeof(raw->ctl_type), &tmp);
-		blk->flags = cs_dsp_coeff_parse_int(sizeof(raw->flags), &tmp);
-		blk->len = cs_dsp_coeff_parse_int(sizeof(raw->len), &tmp);
-
-		*data = *data + sizeof(raw->hdr) + length;
-		break;
-	}
 
-	cs_dsp_dbg(dsp, "\tCoefficient type: %#x\n", blk->mem_type);
-	cs_dsp_dbg(dsp, "\tCoefficient offset: %#x\n", blk->offset);
-	cs_dsp_dbg(dsp, "\tCoefficient name: %.*s\n", blk->name_len, blk->name);
-	cs_dsp_dbg(dsp, "\tCoefficient flags: %#x\n", blk->flags);
-	cs_dsp_dbg(dsp, "\tALSA control type: %#x\n", blk->ctl_type);
-	cs_dsp_dbg(dsp, "\tALSA control len: %#x\n", blk->len);
-}
+	if (dsp->component->name_prefix)
+		snprintf(ctl_name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s",
+			 dsp->component->name_prefix, ctl->name);
+	else
+		snprintf(ctl_name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s",
+			 ctl->name);
 
-static int cs_dsp_check_coeff_flags(struct cs_dsp *dsp,
-				    const struct cs_dsp_coeff_parsed_coeff *coeff_blk,
-				    unsigned int f_required,
-				    unsigned int f_illegal)
-{
-	if ((coeff_blk->flags & f_illegal) ||
-	    ((coeff_blk->flags & f_required) != f_required)) {
-		cs_dsp_err(dsp, "Illegal flags 0x%x for control type 0x%x\n",
-			   coeff_blk->flags, coeff_blk->ctl_type);
+	kcontrol = snd_soc_card_get_kcontrol(dsp->component->card, ctl_name);
+	if (!kcontrol) {
+		adsp_err(dsp, "Can't find kcontrol %s\n", ctl_name);
 		return -EINVAL;
 	}
 
-	return 0;
-}
-
-static int cs_dsp_parse_coeff(struct cs_dsp *dsp,
-			      const struct wmfw_region *region)
-{
-	struct cs_dsp_alg_region alg_region = {};
-	struct cs_dsp_coeff_parsed_alg alg_blk;
-	struct cs_dsp_coeff_parsed_coeff coeff_blk;
-	const u8 *data = region->data;
-	int i, ret;
-
-	cs_dsp_coeff_parse_alg(dsp, &data, &alg_blk);
-	for (i = 0; i < alg_blk.ncoeff; i++) {
-		cs_dsp_coeff_parse_coeff(dsp, &data, &coeff_blk);
-
-		switch (coeff_blk.ctl_type) {
-		case WMFW_CTL_TYPE_BYTES:
-			break;
-		case WMFW_CTL_TYPE_ACKED:
-			if (coeff_blk.flags & WMFW_CTL_FLAG_SYS)
-				continue;	/* ignore */
-
-			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
-						       WMFW_CTL_FLAG_VOLATILE |
-						       WMFW_CTL_FLAG_WRITEABLE |
-						       WMFW_CTL_FLAG_READABLE,
-						       0);
-			if (ret)
-				return -EINVAL;
-			break;
-		case WMFW_CTL_TYPE_HOSTEVENT:
-			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
-						       WMFW_CTL_FLAG_SYS |
-						       WMFW_CTL_FLAG_VOLATILE |
-						       WMFW_CTL_FLAG_WRITEABLE |
-						       WMFW_CTL_FLAG_READABLE,
-						       0);
-			if (ret)
-				return -EINVAL;
-			break;
-		case WMFW_CTL_TYPE_HOST_BUFFER:
-			ret = cs_dsp_check_coeff_flags(dsp, &coeff_blk,
-						       WMFW_CTL_FLAG_SYS |
-						       WMFW_CTL_FLAG_VOLATILE |
-						       WMFW_CTL_FLAG_READABLE,
-						       0);
-			if (ret)
-				return -EINVAL;
-			break;
-		default:
-			cs_dsp_err(dsp, "Unknown control type: %d\n",
-				   coeff_blk.ctl_type);
-			return -EINVAL;
-		}
-
-		alg_region.type = coeff_blk.mem_type;
-		alg_region.alg = alg_blk.id;
-
-		ret = cs_dsp_create_control(dsp, &alg_region,
-					    coeff_blk.offset,
-					    coeff_blk.len,
-					    coeff_blk.name,
-					    coeff_blk.name_len,
-					    coeff_blk.flags,
-					    coeff_blk.ctl_type);
-		if (ret < 0)
-			cs_dsp_err(dsp, "Failed to create control: %.*s, %d\n",
-				   coeff_blk.name_len, coeff_blk.name, ret);
-	}
+	snd_ctl_notify(dsp->component->card->snd_card,
+		       SNDRV_CTL_EVENT_MASK_VALUE, &kcontrol->id);
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(wm_adsp_write_ctl);
 
-static unsigned int cs_dsp_adsp1_parse_sizes(struct cs_dsp *dsp,
-					     const char * const file,
-					     unsigned int pos,
-					     const struct firmware *firmware)
+int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type,
+		     unsigned int alg, void *buf, size_t len)
 {
-	const struct wmfw_adsp1_sizes *adsp1_sizes;
+	struct cs_dsp_coeff_ctl *cs_ctl;
 
-	adsp1_sizes = (void *)&firmware->data[pos];
+	cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+	if (!cs_ctl)
+		return -EINVAL;
 
-	cs_dsp_dbg(dsp, "%s: %d DM, %d PM, %d ZM\n", file,
-		   le32_to_cpu(adsp1_sizes->dm), le32_to_cpu(adsp1_sizes->pm),
-		   le32_to_cpu(adsp1_sizes->zm));
+	if (len > cs_ctl->len)
+		return -EINVAL;
 
-	return pos + sizeof(*adsp1_sizes);
+	return cs_dsp_coeff_read_ctrl(cs_ctl, buf, len);
 }
+EXPORT_SYMBOL_GPL(wm_adsp_read_ctl);
 
 static void wm_adsp_release_firmware_files(struct wm_adsp *dsp,
 					   const struct firmware *wmfw_firmware,
@@ -1863,1314 +798,82 @@ static int wm_adsp_request_firmware_files(struct wm_adsp *dsp,
 	return 0;
 }
 
-static unsigned int cs_dsp_adsp2_parse_sizes(struct cs_dsp *dsp,
-					     const char * const file,
-					     unsigned int pos,
-					     const struct firmware *firmware)
+static int wm_adsp_common_init(struct wm_adsp *dsp)
 {
-	const struct wmfw_adsp2_sizes *adsp2_sizes;
-
-	adsp2_sizes = (void *)&firmware->data[pos];
+	char *p;
 
-	cs_dsp_dbg(dsp, "%s: %d XM, %d YM %d PM, %d ZM\n", file,
-		   le32_to_cpu(adsp2_sizes->xm), le32_to_cpu(adsp2_sizes->ym),
-		   le32_to_cpu(adsp2_sizes->pm), le32_to_cpu(adsp2_sizes->zm));
+	INIT_LIST_HEAD(&dsp->compr_list);
+	INIT_LIST_HEAD(&dsp->buffer_list);
 
-	return pos + sizeof(*adsp2_sizes);
-}
+	if (!dsp->fwf_name) {
+		p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
 
-static bool cs_dsp_validate_version(struct cs_dsp *dsp, unsigned int version)
-{
-	switch (version) {
-	case 0:
-		cs_dsp_warn(dsp, "Deprecated file format %d\n", version);
-		return true;
-	case 1:
-	case 2:
-		return true;
-	default:
-		return false;
+		dsp->fwf_name = p;
+		for (; *p != 0; ++p)
+			*p = tolower(*p);
 	}
-}
 
-static bool cs_dsp_halo_validate_version(struct cs_dsp *dsp, unsigned int version)
-{
-	switch (version) {
-	case 3:
-		return true;
-	default:
-		return false;
-	}
+	return 0;
 }
 
-static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware,
-		       const char *file)
+int wm_adsp1_init(struct wm_adsp *dsp)
 {
-	LIST_HEAD(buf_list);
-	struct regmap *regmap = dsp->regmap;
-	unsigned int pos = 0;
-	const struct wmfw_header *header;
-	const struct wmfw_adsp1_sizes *adsp1_sizes;
-	const struct wmfw_footer *footer;
-	const struct wmfw_region *region;
-	const struct cs_dsp_region *mem;
-	const char *region_name;
-	char *text = NULL;
-	struct cs_dsp_buf *buf;
-	unsigned int reg;
-	int regions = 0;
-	int ret, offset, type;
-
-	ret = -EINVAL;
-
-	pos = sizeof(*header) + sizeof(*adsp1_sizes) + sizeof(*footer);
-	if (pos >= firmware->size) {
-		cs_dsp_err(dsp, "%s: file too short, %zu bytes\n",
-			   file, firmware->size);
-		goto out_fw;
-	}
-
-	header = (void *)&firmware->data[0];
-
-	if (memcmp(&header->magic[0], "WMFW", 4) != 0) {
-		cs_dsp_err(dsp, "%s: invalid magic\n", file);
-		goto out_fw;
-	}
-
-	if (!dsp->ops->validate_version(dsp, header->ver)) {
-		cs_dsp_err(dsp, "%s: unknown file format %d\n",
-			   file, header->ver);
-		goto out_fw;
-	}
-
-	cs_dsp_info(dsp, "Firmware version: %d\n", header->ver);
-	dsp->fw_ver = header->ver;
+	int ret;
 
-	if (header->core != dsp->type) {
-		cs_dsp_err(dsp, "%s: invalid core %d != %d\n",
-			   file, header->core, dsp->type);
-		goto out_fw;
-	}
+	dsp->cs_dsp.client_ops = &wm_adsp1_client_ops;
 
-	pos = sizeof(*header);
-	pos = dsp->ops->parse_sizes(dsp, file, pos, firmware);
+	ret = cs_dsp_adsp1_init(&dsp->cs_dsp);
+	if (ret)
+		return ret;
 
-	footer = (void *)&firmware->data[pos];
-	pos += sizeof(*footer);
+	return wm_adsp_common_init(dsp);
+}
+EXPORT_SYMBOL_GPL(wm_adsp1_init);
 
-	if (le32_to_cpu(header->len) != pos) {
-		cs_dsp_err(dsp, "%s: unexpected header length %d\n",
-			   file, le32_to_cpu(header->len));
-		goto out_fw;
-	}
+int wm_adsp1_event(struct snd_soc_dapm_widget *w,
+		   struct snd_kcontrol *kcontrol,
+		   int event)
+{
+	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
+	struct wm_adsp *dsps = snd_soc_component_get_drvdata(component);
+	struct wm_adsp *dsp = &dsps[w->shift];
+	int ret = 0;
+	char *wmfw_filename = NULL;
+	const struct firmware *wmfw_firmware = NULL;
+	char *coeff_filename = NULL;
+	const struct firmware *coeff_firmware = NULL;
 
-	cs_dsp_dbg(dsp, "%s: timestamp %llu\n", file,
-		   le64_to_cpu(footer->timestamp));
-
-	while (pos < firmware->size &&
-	       sizeof(*region) < firmware->size - pos) {
-		region = (void *)&(firmware->data[pos]);
-		region_name = "Unknown";
-		reg = 0;
-		text = NULL;
-		offset = le32_to_cpu(region->offset) & 0xffffff;
-		type = be32_to_cpu(region->type) & 0xff;
-
-		switch (type) {
-		case WMFW_NAME_TEXT:
-			region_name = "Firmware name";
-			text = kzalloc(le32_to_cpu(region->len) + 1,
-				       GFP_KERNEL);
-			break;
-		case WMFW_ALGORITHM_DATA:
-			region_name = "Algorithm";
-			ret = cs_dsp_parse_coeff(dsp, region);
-			if (ret != 0)
-				goto out_fw;
-			break;
-		case WMFW_INFO_TEXT:
-			region_name = "Information";
-			text = kzalloc(le32_to_cpu(region->len) + 1,
-				       GFP_KERNEL);
-			break;
-		case WMFW_ABSOLUTE:
-			region_name = "Absolute";
-			reg = offset;
-			break;
-		case WMFW_ADSP1_PM:
-		case WMFW_ADSP1_DM:
-		case WMFW_ADSP2_XM:
-		case WMFW_ADSP2_YM:
-		case WMFW_ADSP1_ZM:
-		case WMFW_HALO_PM_PACKED:
-		case WMFW_HALO_XM_PACKED:
-		case WMFW_HALO_YM_PACKED:
-			mem = cs_dsp_find_region(dsp, type);
-			if (!mem) {
-				cs_dsp_err(dsp, "No region of type: %x\n", type);
-				ret = -EINVAL;
-				goto out_fw;
-			}
-
-			region_name = cs_dsp_mem_region_name(type);
-			reg = dsp->ops->region_to_reg(mem, offset);
-			break;
-		default:
-			cs_dsp_warn(dsp,
-				    "%s.%d: Unknown region type %x at %d(%x)\n",
-				    file, regions, type, pos, pos);
-			break;
-		}
-
-		cs_dsp_dbg(dsp, "%s.%d: %d bytes at %d in %s\n", file,
-			   regions, le32_to_cpu(region->len), offset,
-			   region_name);
-
-		if (le32_to_cpu(region->len) >
-		    firmware->size - pos - sizeof(*region)) {
-			cs_dsp_err(dsp,
-				   "%s.%d: %s region len %d bytes exceeds file length %zu\n",
-				   file, regions, region_name,
-				   le32_to_cpu(region->len), firmware->size);
-			ret = -EINVAL;
-			goto out_fw;
-		}
-
-		if (text) {
-			memcpy(text, region->data, le32_to_cpu(region->len));
-			cs_dsp_info(dsp, "%s: %s\n", file, text);
-			kfree(text);
-			text = NULL;
-		}
-
-		if (reg) {
-			buf = cs_dsp_buf_alloc(region->data,
-					       le32_to_cpu(region->len),
-					       &buf_list);
-			if (!buf) {
-				cs_dsp_err(dsp, "Out of memory\n");
-				ret = -ENOMEM;
-				goto out_fw;
-			}
-
-			ret = regmap_raw_write_async(regmap, reg, buf->buf,
-						     le32_to_cpu(region->len));
-			if (ret != 0) {
-				cs_dsp_err(dsp,
-					   "%s.%d: Failed to write %d bytes at %d in %s: %d\n",
-					   file, regions,
-					   le32_to_cpu(region->len), offset,
-					   region_name, ret);
-				goto out_fw;
-			}
-		}
-
-		pos += le32_to_cpu(region->len) + sizeof(*region);
-		regions++;
-	}
-
-	ret = regmap_async_complete(regmap);
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret);
-		goto out_fw;
-	}
-
-	if (pos > firmware->size)
-		cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n",
-			    file, regions, pos - firmware->size);
-
-	cs_dsp_debugfs_save_wmfwname(dsp, file);
-
-out_fw:
-	regmap_async_complete(regmap);
-	cs_dsp_buf_free(&buf_list);
-	kfree(text);
-
-	return ret;
-}
-
-/*
- * Find cs_dsp_coeff_ctl with input name as its subname
- * If not found, return NULL
- */
-static struct cs_dsp_coeff_ctl *cs_dsp_get_ctl(struct cs_dsp *dsp,
-					       const char *name, int type,
-					       unsigned int alg)
-{
-	struct cs_dsp_coeff_ctl *pos, *rslt = NULL;
-
-	list_for_each_entry(pos, &dsp->ctl_list, list) {
-		if (!pos->subname)
-			continue;
-		if (strncmp(pos->subname, name, pos->subname_len) == 0 &&
-		    pos->fw_name == dsp->fw_name &&
-		    pos->alg_region.alg == alg &&
-		    pos->alg_region.type == type) {
-			rslt = pos;
-			break;
-		}
-	}
-
-	return rslt;
-}
-
-int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
-		      unsigned int alg, void *buf, size_t len)
-{
-	struct cs_dsp_coeff_ctl *cs_ctl;
-	struct wm_coeff_ctl *ctl;
-	struct snd_kcontrol *kcontrol;
-	char ctl_name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN];
-	int ret;
-
-	cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
-	if (!cs_ctl)
-		return -EINVAL;
-
-	ctl = cs_ctl->priv;
-
-	if (len > cs_ctl->len)
-		return -EINVAL;
-
-	ret = cs_dsp_coeff_write_ctrl(cs_ctl, buf, len);
-	if (ret)
-		return ret;
-
-	if (cs_ctl->flags & WMFW_CTL_FLAG_SYS)
-		return 0;
-
-	if (dsp->component->name_prefix)
-		snprintf(ctl_name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s",
-			 dsp->component->name_prefix, ctl->name);
-	else
-		snprintf(ctl_name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s",
-			 ctl->name);
-
-	kcontrol = snd_soc_card_get_kcontrol(dsp->component->card, ctl_name);
-	if (!kcontrol) {
-		adsp_err(dsp, "Can't find kcontrol %s\n", ctl_name);
-		return -EINVAL;
-	}
-
-	snd_ctl_notify(dsp->component->card->snd_card,
-		       SNDRV_CTL_EVENT_MASK_VALUE, &kcontrol->id);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(wm_adsp_write_ctl);
-
-int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type,
-		     unsigned int alg, void *buf, size_t len)
-{
-	struct cs_dsp_coeff_ctl *cs_ctl;
-
-	cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
-	if (!cs_ctl)
-		return -EINVAL;
-
-	if (len > cs_ctl->len)
-		return -EINVAL;
-
-	return cs_dsp_coeff_read_ctrl(cs_ctl, buf, len);
-}
-EXPORT_SYMBOL_GPL(wm_adsp_read_ctl);
-
-static void cs_dsp_ctl_fixup_base(struct cs_dsp *dsp,
-				  const struct cs_dsp_alg_region *alg_region)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list) {
-		if (ctl->fw_name == dsp->fw_name &&
-		    alg_region->alg == ctl->alg_region.alg &&
-		    alg_region->type == ctl->alg_region.type) {
-			ctl->alg_region.base = alg_region->base;
-		}
-	}
-}
-
-static void *cs_dsp_read_algs(struct cs_dsp *dsp, size_t n_algs,
-			      const struct cs_dsp_region *mem,
-			      unsigned int pos, unsigned int len)
-{
-	void *alg;
-	unsigned int reg;
-	int ret;
-	__be32 val;
-
-	if (n_algs == 0) {
-		cs_dsp_err(dsp, "No algorithms\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	if (n_algs > 1024) {
-		cs_dsp_err(dsp, "Algorithm count %zx excessive\n", n_algs);
-		return ERR_PTR(-EINVAL);
-	}
-
-	/* Read the terminator first to validate the length */
-	reg = dsp->ops->region_to_reg(mem, pos + len);
-
-	ret = regmap_raw_read(dsp->regmap, reg, &val, sizeof(val));
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to read algorithm list end: %d\n",
-			   ret);
-		return ERR_PTR(ret);
-	}
-
-	if (be32_to_cpu(val) != 0xbedead)
-		cs_dsp_warn(dsp, "Algorithm list end %x 0x%x != 0xbedead\n",
-			    reg, be32_to_cpu(val));
-
-	/* Convert length from DSP words to bytes */
-	len *= sizeof(u32);
-
-	alg = kzalloc(len, GFP_KERNEL | GFP_DMA);
-	if (!alg)
-		return ERR_PTR(-ENOMEM);
-
-	reg = dsp->ops->region_to_reg(mem, pos);
-
-	ret = regmap_raw_read(dsp->regmap, reg, alg, len);
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to read algorithm list: %d\n", ret);
-		kfree(alg);
-		return ERR_PTR(ret);
-	}
-
-	return alg;
-}
-
-static struct cs_dsp_alg_region *
-	cs_dsp_find_alg_region(struct cs_dsp *dsp, int type, unsigned int id)
-{
-	struct cs_dsp_alg_region *alg_region;
-
-	list_for_each_entry(alg_region, &dsp->alg_regions, list) {
-		if (id == alg_region->alg && type == alg_region->type)
-			return alg_region;
-	}
-
-	return NULL;
-}
-
-static struct cs_dsp_alg_region *cs_dsp_create_region(struct cs_dsp *dsp,
-						      int type, __be32 id,
-						      __be32 base)
-{
-	struct cs_dsp_alg_region *alg_region;
-
-	alg_region = kzalloc(sizeof(*alg_region), GFP_KERNEL);
-	if (!alg_region)
-		return ERR_PTR(-ENOMEM);
-
-	alg_region->type = type;
-	alg_region->alg = be32_to_cpu(id);
-	alg_region->base = be32_to_cpu(base);
-
-	list_add_tail(&alg_region->list, &dsp->alg_regions);
-
-	if (dsp->fw_ver > 0)
-		cs_dsp_ctl_fixup_base(dsp, alg_region);
-
-	return alg_region;
-}
-
-static void cs_dsp_free_alg_regions(struct cs_dsp *dsp)
-{
-	struct cs_dsp_alg_region *alg_region;
-
-	while (!list_empty(&dsp->alg_regions)) {
-		alg_region = list_first_entry(&dsp->alg_regions,
-					      struct cs_dsp_alg_region,
-					      list);
-		list_del(&alg_region->list);
-		kfree(alg_region);
-	}
-}
-
-static void cs_dsp_parse_wmfw_id_header(struct cs_dsp *dsp,
-					struct wmfw_id_hdr *fw, int nalgs)
-{
-	dsp->fw_id = be32_to_cpu(fw->id);
-	dsp->fw_id_version = be32_to_cpu(fw->ver);
-
-	cs_dsp_info(dsp, "Firmware: %x v%d.%d.%d, %d algorithms\n",
-		    dsp->fw_id, (dsp->fw_id_version & 0xff0000) >> 16,
-		    (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff,
-		    nalgs);
-}
-
-static void cs_dsp_parse_wmfw_v3_id_header(struct cs_dsp *dsp,
-					   struct wmfw_v3_id_hdr *fw, int nalgs)
-{
-	dsp->fw_id = be32_to_cpu(fw->id);
-	dsp->fw_id_version = be32_to_cpu(fw->ver);
-	dsp->fw_vendor_id = be32_to_cpu(fw->vendor_id);
-
-	cs_dsp_info(dsp, "Firmware: %x vendor: 0x%x v%d.%d.%d, %d algorithms\n",
-		    dsp->fw_id, dsp->fw_vendor_id,
-		    (dsp->fw_id_version & 0xff0000) >> 16,
-		    (dsp->fw_id_version & 0xff00) >> 8, dsp->fw_id_version & 0xff,
-		    nalgs);
-}
-
-static int cs_dsp_create_regions(struct cs_dsp *dsp, __be32 id, int nregions,
-				 const int *type, __be32 *base)
-{
-	struct cs_dsp_alg_region *alg_region;
-	int i;
-
-	for (i = 0; i < nregions; i++) {
-		alg_region = cs_dsp_create_region(dsp, type[i], id, base[i]);
-		if (IS_ERR(alg_region))
-			return PTR_ERR(alg_region);
-	}
-
-	return 0;
-}
-
-static int cs_dsp_adsp1_setup_algs(struct cs_dsp *dsp)
-{
-	struct wmfw_adsp1_id_hdr adsp1_id;
-	struct wmfw_adsp1_alg_hdr *adsp1_alg;
-	struct cs_dsp_alg_region *alg_region;
-	const struct cs_dsp_region *mem;
-	unsigned int pos, len;
-	size_t n_algs;
-	int i, ret;
-
-	mem = cs_dsp_find_region(dsp, WMFW_ADSP1_DM);
-	if (WARN_ON(!mem))
-		return -EINVAL;
-
-	ret = regmap_raw_read(dsp->regmap, mem->base, &adsp1_id,
-			      sizeof(adsp1_id));
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
-			   ret);
-		return ret;
-	}
-
-	n_algs = be32_to_cpu(adsp1_id.n_algs);
-
-	cs_dsp_parse_wmfw_id_header(dsp, &adsp1_id.fw, n_algs);
-
-	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_ZM,
-					  adsp1_id.fw.id, adsp1_id.zm);
-	if (IS_ERR(alg_region))
-		return PTR_ERR(alg_region);
-
-	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_DM,
-					  adsp1_id.fw.id, adsp1_id.dm);
-	if (IS_ERR(alg_region))
-		return PTR_ERR(alg_region);
-
-	/* Calculate offset and length in DSP words */
-	pos = sizeof(adsp1_id) / sizeof(u32);
-	len = (sizeof(*adsp1_alg) * n_algs) / sizeof(u32);
-
-	adsp1_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
-	if (IS_ERR(adsp1_alg))
-		return PTR_ERR(adsp1_alg);
-
-	for (i = 0; i < n_algs; i++) {
-		cs_dsp_info(dsp, "%d: ID %x v%d.%d.%d DM@%x ZM@%x\n",
-			    i, be32_to_cpu(adsp1_alg[i].alg.id),
-			    (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff0000) >> 16,
-			    (be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff00) >> 8,
-			    be32_to_cpu(adsp1_alg[i].alg.ver) & 0xff,
-			    be32_to_cpu(adsp1_alg[i].dm),
-			    be32_to_cpu(adsp1_alg[i].zm));
-
-		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_DM,
-						  adsp1_alg[i].alg.id,
-						  adsp1_alg[i].dm);
-		if (IS_ERR(alg_region)) {
-			ret = PTR_ERR(alg_region);
-			goto out;
-		}
-		if (dsp->fw_ver == 0) {
-			if (i + 1 < n_algs) {
-				len = be32_to_cpu(adsp1_alg[i + 1].dm);
-				len -= be32_to_cpu(adsp1_alg[i].dm);
-				len *= 4;
-				cs_dsp_create_control(dsp, alg_region, 0,
-						      len, NULL, 0, 0,
-						      WMFW_CTL_TYPE_BYTES);
-			} else {
-				cs_dsp_warn(dsp, "Missing length info for region DM with ID %x\n",
-					    be32_to_cpu(adsp1_alg[i].alg.id));
-			}
-		}
-
-		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP1_ZM,
-						  adsp1_alg[i].alg.id,
-						  adsp1_alg[i].zm);
-		if (IS_ERR(alg_region)) {
-			ret = PTR_ERR(alg_region);
-			goto out;
-		}
-		if (dsp->fw_ver == 0) {
-			if (i + 1 < n_algs) {
-				len = be32_to_cpu(adsp1_alg[i + 1].zm);
-				len -= be32_to_cpu(adsp1_alg[i].zm);
-				len *= 4;
-				cs_dsp_create_control(dsp, alg_region, 0,
-						      len, NULL, 0, 0,
-						      WMFW_CTL_TYPE_BYTES);
-			} else {
-				cs_dsp_warn(dsp, "Missing length info for region ZM with ID %x\n",
-					    be32_to_cpu(adsp1_alg[i].alg.id));
-			}
-		}
-	}
-
-out:
-	kfree(adsp1_alg);
-	return ret;
-}
-
-static int cs_dsp_adsp2_setup_algs(struct cs_dsp *dsp)
-{
-	struct wmfw_adsp2_id_hdr adsp2_id;
-	struct wmfw_adsp2_alg_hdr *adsp2_alg;
-	struct cs_dsp_alg_region *alg_region;
-	const struct cs_dsp_region *mem;
-	unsigned int pos, len;
-	size_t n_algs;
-	int i, ret;
-
-	mem = cs_dsp_find_region(dsp, WMFW_ADSP2_XM);
-	if (WARN_ON(!mem))
-		return -EINVAL;
-
-	ret = regmap_raw_read(dsp->regmap, mem->base, &adsp2_id,
-			      sizeof(adsp2_id));
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
-			   ret);
-		return ret;
-	}
-
-	n_algs = be32_to_cpu(adsp2_id.n_algs);
-
-	cs_dsp_parse_wmfw_id_header(dsp, &adsp2_id.fw, n_algs);
-
-	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM,
-					  adsp2_id.fw.id, adsp2_id.xm);
-	if (IS_ERR(alg_region))
-		return PTR_ERR(alg_region);
-
-	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_YM,
-					  adsp2_id.fw.id, adsp2_id.ym);
-	if (IS_ERR(alg_region))
-		return PTR_ERR(alg_region);
-
-	alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_ZM,
-					  adsp2_id.fw.id, adsp2_id.zm);
-	if (IS_ERR(alg_region))
-		return PTR_ERR(alg_region);
-
-	/* Calculate offset and length in DSP words */
-	pos = sizeof(adsp2_id) / sizeof(u32);
-	len = (sizeof(*adsp2_alg) * n_algs) / sizeof(u32);
-
-	adsp2_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
-	if (IS_ERR(adsp2_alg))
-		return PTR_ERR(adsp2_alg);
-
-	for (i = 0; i < n_algs; i++) {
-		cs_dsp_info(dsp,
-			    "%d: ID %x v%d.%d.%d XM@%x YM@%x ZM@%x\n",
-			    i, be32_to_cpu(adsp2_alg[i].alg.id),
-			    (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff0000) >> 16,
-			    (be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff00) >> 8,
-			    be32_to_cpu(adsp2_alg[i].alg.ver) & 0xff,
-			    be32_to_cpu(adsp2_alg[i].xm),
-			    be32_to_cpu(adsp2_alg[i].ym),
-			    be32_to_cpu(adsp2_alg[i].zm));
-
-		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_XM,
-						  adsp2_alg[i].alg.id,
-						  adsp2_alg[i].xm);
-		if (IS_ERR(alg_region)) {
-			ret = PTR_ERR(alg_region);
-			goto out;
-		}
-		if (dsp->fw_ver == 0) {
-			if (i + 1 < n_algs) {
-				len = be32_to_cpu(adsp2_alg[i + 1].xm);
-				len -= be32_to_cpu(adsp2_alg[i].xm);
-				len *= 4;
-				cs_dsp_create_control(dsp, alg_region, 0,
-						      len, NULL, 0, 0,
-						      WMFW_CTL_TYPE_BYTES);
-			} else {
-				cs_dsp_warn(dsp, "Missing length info for region XM with ID %x\n",
-					    be32_to_cpu(adsp2_alg[i].alg.id));
-			}
-		}
-
-		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_YM,
-						  adsp2_alg[i].alg.id,
-						  adsp2_alg[i].ym);
-		if (IS_ERR(alg_region)) {
-			ret = PTR_ERR(alg_region);
-			goto out;
-		}
-		if (dsp->fw_ver == 0) {
-			if (i + 1 < n_algs) {
-				len = be32_to_cpu(adsp2_alg[i + 1].ym);
-				len -= be32_to_cpu(adsp2_alg[i].ym);
-				len *= 4;
-				cs_dsp_create_control(dsp, alg_region, 0,
-						      len, NULL, 0, 0,
-						      WMFW_CTL_TYPE_BYTES);
-			} else {
-				cs_dsp_warn(dsp, "Missing length info for region YM with ID %x\n",
-					    be32_to_cpu(adsp2_alg[i].alg.id));
-			}
-		}
-
-		alg_region = cs_dsp_create_region(dsp, WMFW_ADSP2_ZM,
-						  adsp2_alg[i].alg.id,
-						  adsp2_alg[i].zm);
-		if (IS_ERR(alg_region)) {
-			ret = PTR_ERR(alg_region);
-			goto out;
-		}
-		if (dsp->fw_ver == 0) {
-			if (i + 1 < n_algs) {
-				len = be32_to_cpu(adsp2_alg[i + 1].zm);
-				len -= be32_to_cpu(adsp2_alg[i].zm);
-				len *= 4;
-				cs_dsp_create_control(dsp, alg_region, 0,
-						      len, NULL, 0, 0,
-						      WMFW_CTL_TYPE_BYTES);
-			} else {
-				cs_dsp_warn(dsp, "Missing length info for region ZM with ID %x\n",
-					    be32_to_cpu(adsp2_alg[i].alg.id));
-			}
-		}
-	}
-
-out:
-	kfree(adsp2_alg);
-	return ret;
-}
-
-static int cs_dsp_halo_create_regions(struct cs_dsp *dsp, __be32 id,
-				      __be32 xm_base, __be32 ym_base)
-{
-	static const int types[] = {
-		WMFW_ADSP2_XM, WMFW_HALO_XM_PACKED,
-		WMFW_ADSP2_YM, WMFW_HALO_YM_PACKED
-	};
-	__be32 bases[] = { xm_base, xm_base, ym_base, ym_base };
-
-	return cs_dsp_create_regions(dsp, id, ARRAY_SIZE(types), types, bases);
-}
-
-static int cs_dsp_halo_setup_algs(struct cs_dsp *dsp)
-{
-	struct wmfw_halo_id_hdr halo_id;
-	struct wmfw_halo_alg_hdr *halo_alg;
-	const struct cs_dsp_region *mem;
-	unsigned int pos, len;
-	size_t n_algs;
-	int i, ret;
-
-	mem = cs_dsp_find_region(dsp, WMFW_ADSP2_XM);
-	if (WARN_ON(!mem))
-		return -EINVAL;
-
-	ret = regmap_raw_read(dsp->regmap, mem->base, &halo_id,
-			      sizeof(halo_id));
-	if (ret != 0) {
-		cs_dsp_err(dsp, "Failed to read algorithm info: %d\n",
-			   ret);
-		return ret;
-	}
-
-	n_algs = be32_to_cpu(halo_id.n_algs);
-
-	cs_dsp_parse_wmfw_v3_id_header(dsp, &halo_id.fw, n_algs);
-
-	ret = cs_dsp_halo_create_regions(dsp, halo_id.fw.id,
-					 halo_id.xm_base, halo_id.ym_base);
-	if (ret)
-		return ret;
-
-	/* Calculate offset and length in DSP words */
-	pos = sizeof(halo_id) / sizeof(u32);
-	len = (sizeof(*halo_alg) * n_algs) / sizeof(u32);
-
-	halo_alg = cs_dsp_read_algs(dsp, n_algs, mem, pos, len);
-	if (IS_ERR(halo_alg))
-		return PTR_ERR(halo_alg);
-
-	for (i = 0; i < n_algs; i++) {
-		cs_dsp_info(dsp,
-			    "%d: ID %x v%d.%d.%d XM@%x YM@%x\n",
-			    i, be32_to_cpu(halo_alg[i].alg.id),
-			    (be32_to_cpu(halo_alg[i].alg.ver) & 0xff0000) >> 16,
-			    (be32_to_cpu(halo_alg[i].alg.ver) & 0xff00) >> 8,
-			    be32_to_cpu(halo_alg[i].alg.ver) & 0xff,
-			    be32_to_cpu(halo_alg[i].xm_base),
-			    be32_to_cpu(halo_alg[i].ym_base));
-
-		ret = cs_dsp_halo_create_regions(dsp, halo_alg[i].alg.id,
-						 halo_alg[i].xm_base,
-						 halo_alg[i].ym_base);
-		if (ret)
-			goto out;
-	}
-
-out:
-	kfree(halo_alg);
-	return ret;
-}
-
-static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware,
-			     const char *file)
-{
-	LIST_HEAD(buf_list);
-	struct regmap *regmap = dsp->regmap;
-	struct wmfw_coeff_hdr *hdr;
-	struct wmfw_coeff_item *blk;
-	const struct cs_dsp_region *mem;
-	struct cs_dsp_alg_region *alg_region;
-	const char *region_name;
-	int ret, pos, blocks, type, offset, reg;
-	struct cs_dsp_buf *buf;
-
-	if (!firmware)
-		return 0;
-
-	ret = -EINVAL;
-
-	if (sizeof(*hdr) >= firmware->size) {
-		cs_dsp_err(dsp, "%s: coefficient file too short, %zu bytes\n",
-			   file, firmware->size);
-		goto out_fw;
-	}
-
-	hdr = (void *)&firmware->data[0];
-	if (memcmp(hdr->magic, "WMDR", 4) != 0) {
-		cs_dsp_err(dsp, "%s: invalid coefficient magic\n", file);
-		goto out_fw;
-	}
-
-	switch (be32_to_cpu(hdr->rev) & 0xff) {
-	case 1:
-		break;
-	default:
-		cs_dsp_err(dsp, "%s: Unsupported coefficient file format %d\n",
-			   file, be32_to_cpu(hdr->rev) & 0xff);
-		ret = -EINVAL;
-		goto out_fw;
-	}
-
-	cs_dsp_dbg(dsp, "%s: v%d.%d.%d\n", file,
-		   (le32_to_cpu(hdr->ver) >> 16) & 0xff,
-		   (le32_to_cpu(hdr->ver) >>  8) & 0xff,
-		   le32_to_cpu(hdr->ver) & 0xff);
-
-	pos = le32_to_cpu(hdr->len);
-
-	blocks = 0;
-	while (pos < firmware->size &&
-	       sizeof(*blk) < firmware->size - pos) {
-		blk = (void *)(&firmware->data[pos]);
-
-		type = le16_to_cpu(blk->type);
-		offset = le16_to_cpu(blk->offset);
-
-		cs_dsp_dbg(dsp, "%s.%d: %x v%d.%d.%d\n",
-			   file, blocks, le32_to_cpu(blk->id),
-			   (le32_to_cpu(blk->ver) >> 16) & 0xff,
-			   (le32_to_cpu(blk->ver) >>  8) & 0xff,
-			   le32_to_cpu(blk->ver) & 0xff);
-		cs_dsp_dbg(dsp, "%s.%d: %d bytes at 0x%x in %x\n",
-			   file, blocks, le32_to_cpu(blk->len), offset, type);
-
-		reg = 0;
-		region_name = "Unknown";
-		switch (type) {
-		case (WMFW_NAME_TEXT << 8):
-		case (WMFW_INFO_TEXT << 8):
-		case (WMFW_METADATA << 8):
-			break;
-		case (WMFW_ABSOLUTE << 8):
-			/*
-			 * Old files may use this for global
-			 * coefficients.
-			 */
-			if (le32_to_cpu(blk->id) == dsp->fw_id &&
-			    offset == 0) {
-				region_name = "global coefficients";
-				mem = cs_dsp_find_region(dsp, type);
-				if (!mem) {
-					cs_dsp_err(dsp, "No ZM\n");
-					break;
-				}
-				reg = dsp->ops->region_to_reg(mem, 0);
-
-			} else {
-				region_name = "register";
-				reg = offset;
-			}
-			break;
-
-		case WMFW_ADSP1_DM:
-		case WMFW_ADSP1_ZM:
-		case WMFW_ADSP2_XM:
-		case WMFW_ADSP2_YM:
-		case WMFW_HALO_XM_PACKED:
-		case WMFW_HALO_YM_PACKED:
-		case WMFW_HALO_PM_PACKED:
-			cs_dsp_dbg(dsp, "%s.%d: %d bytes in %x for %x\n",
-				   file, blocks, le32_to_cpu(blk->len),
-				   type, le32_to_cpu(blk->id));
-
-			mem = cs_dsp_find_region(dsp, type);
-			if (!mem) {
-				cs_dsp_err(dsp, "No base for region %x\n", type);
-				break;
-			}
-
-			alg_region = cs_dsp_find_alg_region(dsp, type,
-							    le32_to_cpu(blk->id));
-			if (alg_region) {
-				reg = alg_region->base;
-				reg = dsp->ops->region_to_reg(mem, reg);
-				reg += offset;
-			} else {
-				cs_dsp_err(dsp, "No %x for algorithm %x\n",
-					   type, le32_to_cpu(blk->id));
-			}
-			break;
-
-		default:
-			cs_dsp_err(dsp, "%s.%d: Unknown region type %x at %d\n",
-				   file, blocks, type, pos);
-			break;
-		}
-
-		if (reg) {
-			if (le32_to_cpu(blk->len) >
-			    firmware->size - pos - sizeof(*blk)) {
-				cs_dsp_err(dsp,
-					   "%s.%d: %s region len %d bytes exceeds file length %zu\n",
-					   file, blocks, region_name,
-					   le32_to_cpu(blk->len),
-					   firmware->size);
-				ret = -EINVAL;
-				goto out_fw;
-			}
-
-			buf = cs_dsp_buf_alloc(blk->data,
-					       le32_to_cpu(blk->len),
-					       &buf_list);
-			if (!buf) {
-				cs_dsp_err(dsp, "Out of memory\n");
-				ret = -ENOMEM;
-				goto out_fw;
-			}
-
-			cs_dsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n",
-				   file, blocks, le32_to_cpu(blk->len),
-				   reg);
-			ret = regmap_raw_write_async(regmap, reg, buf->buf,
-						     le32_to_cpu(blk->len));
-			if (ret != 0) {
-				cs_dsp_err(dsp,
-					   "%s.%d: Failed to write to %x in %s: %d\n",
-					   file, blocks, reg, region_name, ret);
-			}
-		}
-
-		pos += (le32_to_cpu(blk->len) + sizeof(*blk) + 3) & ~0x03;
-		blocks++;
-	}
-
-	ret = regmap_async_complete(regmap);
-	if (ret != 0)
-		cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret);
-
-	if (pos > firmware->size)
-		cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n",
-			    file, blocks, pos - firmware->size);
-
-	cs_dsp_debugfs_save_binname(dsp, file);
-
-out_fw:
-	regmap_async_complete(regmap);
-	cs_dsp_buf_free(&buf_list);
-	return ret;
-}
-
-static int cs_dsp_create_name(struct cs_dsp *dsp)
-{
-	if (!dsp->name) {
-		dsp->name = devm_kasprintf(dsp->dev, GFP_KERNEL, "DSP%d",
-					   dsp->num);
-		if (!dsp->name)
-			return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static int cs_dsp_common_init(struct cs_dsp *dsp)
-{
-	int ret;
-
-	ret = cs_dsp_create_name(dsp);
-	if (ret)
-		return ret;
-
-	INIT_LIST_HEAD(&dsp->alg_regions);
-	INIT_LIST_HEAD(&dsp->ctl_list);
-
-	mutex_init(&dsp->pwr_lock);
-
-	return 0;
-}
-
-static int wm_adsp_common_init(struct wm_adsp *dsp)
-{
-	char *p;
-
-	INIT_LIST_HEAD(&dsp->compr_list);
-	INIT_LIST_HEAD(&dsp->buffer_list);
-
-	if (!dsp->fwf_name) {
-		p = devm_kstrdup(dsp->cs_dsp.dev, dsp->cs_dsp.name, GFP_KERNEL);
-		if (!p)
-			return -ENOMEM;
-
-		dsp->fwf_name = p;
-		for (; *p != 0; ++p)
-			*p = tolower(*p);
-	}
-
-	return 0;
-}
-
-static int cs_dsp_adsp1_init(struct cs_dsp *dsp)
-{
-	dsp->ops = &cs_dsp_adsp1_ops;
-
-	return cs_dsp_common_init(dsp);
-}
-
-int wm_adsp1_init(struct wm_adsp *dsp)
-{
-	int ret;
-
-	dsp->cs_dsp.client_ops = &wm_adsp1_client_ops;
-
-	ret = cs_dsp_adsp1_init(&dsp->cs_dsp);
-	if (ret)
-		return ret;
-
-	return wm_adsp_common_init(dsp);
-}
-EXPORT_SYMBOL_GPL(wm_adsp1_init);
-
-static int cs_dsp_adsp1_power_up(struct cs_dsp *dsp,
-				 const struct firmware *wmfw_firmware, char *wmfw_filename,
-				 const struct firmware *coeff_firmware, char *coeff_filename,
-				 const char *fw_name)
-{
-	unsigned int val;
-	int ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	dsp->fw_name = fw_name;
-
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
-			   ADSP1_SYS_ENA, ADSP1_SYS_ENA);
-
-	/*
-	 * For simplicity set the DSP clock rate to be the
-	 * SYSCLK rate rather than making it configurable.
-	 */
-	if (dsp->sysclk_reg) {
-		ret = regmap_read(dsp->regmap, dsp->sysclk_reg, &val);
-		if (ret != 0) {
-			cs_dsp_err(dsp, "Failed to read SYSCLK state: %d\n", ret);
-			goto err_mutex;
-		}
-
-		val = (val & dsp->sysclk_mask) >> dsp->sysclk_shift;
-
-		ret = regmap_update_bits(dsp->regmap,
-					 dsp->base + ADSP1_CONTROL_31,
-					 ADSP1_CLK_SEL_MASK, val);
-		if (ret != 0) {
-			cs_dsp_err(dsp, "Failed to set clock rate: %d\n", ret);
-			goto err_mutex;
-		}
-	}
-
-	ret = cs_dsp_load(dsp, wmfw_firmware, wmfw_filename);
-	if (ret != 0)
-		goto err_ena;
-
-	ret = cs_dsp_adsp1_setup_algs(dsp);
-	if (ret != 0)
-		goto err_ena;
-
-	ret = cs_dsp_load_coeff(dsp, coeff_firmware, coeff_filename);
-	if (ret != 0)
-		goto err_ena;
-
-	/* Initialize caches for enabled and unset controls */
-	ret = cs_dsp_coeff_init_control_caches(dsp);
-	if (ret != 0)
-		goto err_ena;
-
-	/* Sync set controls */
-	ret = cs_dsp_coeff_sync_controls(dsp);
-	if (ret != 0)
-		goto err_ena;
-
-	dsp->booted = true;
-
-	/* Start the core running */
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
-			   ADSP1_CORE_ENA | ADSP1_START,
-			   ADSP1_CORE_ENA | ADSP1_START);
-
-	dsp->running = true;
-
-	mutex_unlock(&dsp->pwr_lock);
-
-	return 0;
-
-err_ena:
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
-			   ADSP1_SYS_ENA, 0);
-err_mutex:
-	mutex_unlock(&dsp->pwr_lock);
-	return ret;
-}
-
-static void cs_dsp_adsp1_power_down(struct cs_dsp *dsp)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	dsp->running = false;
-	dsp->booted = false;
-
-	/* Halt the core */
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
-			   ADSP1_CORE_ENA | ADSP1_START, 0);
-
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_19,
-			   ADSP1_WDMA_BUFFER_LENGTH_MASK, 0);
-
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP1_CONTROL_30,
-			   ADSP1_SYS_ENA, 0);
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list)
-		ctl->enabled = 0;
-
-	cs_dsp_free_alg_regions(dsp);
-
-	mutex_unlock(&dsp->pwr_lock);
-}
-
-int wm_adsp1_event(struct snd_soc_dapm_widget *w,
-		   struct snd_kcontrol *kcontrol,
-		   int event)
-{
-	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
-	struct wm_adsp *dsps = snd_soc_component_get_drvdata(component);
-	struct wm_adsp *dsp = &dsps[w->shift];
-	int ret = 0;
-	char *wmfw_filename = NULL;
-	const struct firmware *wmfw_firmware = NULL;
-	char *coeff_filename = NULL;
-	const struct firmware *coeff_firmware = NULL;
-
-	dsp->component = component;
+	dsp->component = component;
 
 	switch (event) {
 	case SND_SOC_DAPM_POST_PMU:
 		ret = wm_adsp_request_firmware_files(dsp,
 						     &wmfw_firmware, &wmfw_filename,
 						     &coeff_firmware, &coeff_filename);
-		if (ret)
-			break;
-
-		ret = cs_dsp_adsp1_power_up(&dsp->cs_dsp,
-					    wmfw_firmware, wmfw_filename,
-					    coeff_firmware, coeff_filename,
-					    wm_adsp_fw_text[dsp->fw]);
-
-		wm_adsp_release_firmware_files(dsp,
-					       wmfw_firmware, wmfw_filename,
-					       coeff_firmware, coeff_filename);
-		break;
-	case SND_SOC_DAPM_PRE_PMD:
-		cs_dsp_adsp1_power_down(&dsp->cs_dsp);
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(wm_adsp1_event);
-
-static int cs_dsp_adsp2v2_enable_core(struct cs_dsp *dsp)
-{
-	unsigned int val;
-	int ret, count;
-
-	/* Wait for the RAM to start, should be near instantaneous */
-	for (count = 0; count < 10; ++count) {
-		ret = regmap_read(dsp->regmap, dsp->base + ADSP2_STATUS1, &val);
-		if (ret != 0)
-			return ret;
-
-		if (val & ADSP2_RAM_RDY)
-			break;
-
-		usleep_range(250, 500);
-	}
-
-	if (!(val & ADSP2_RAM_RDY)) {
-		cs_dsp_err(dsp, "Failed to start DSP RAM\n");
-		return -EBUSY;
-	}
-
-	cs_dsp_dbg(dsp, "RAM ready after %d polls\n", count);
-
-	return 0;
-}
-
-static int cs_dsp_adsp2_enable_core(struct cs_dsp *dsp)
-{
-	int ret;
-
-	ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL,
-				       ADSP2_SYS_ENA, ADSP2_SYS_ENA);
-	if (ret != 0)
-		return ret;
-
-	return cs_dsp_adsp2v2_enable_core(dsp);
-}
-
-static int cs_dsp_adsp2_lock(struct cs_dsp *dsp, unsigned int lock_regions)
-{
-	struct regmap *regmap = dsp->regmap;
-	unsigned int code0, code1, lock_reg;
-
-	if (!(lock_regions & CS_ADSP2_REGION_ALL))
-		return 0;
-
-	lock_regions &= CS_ADSP2_REGION_ALL;
-	lock_reg = dsp->base + ADSP2_LOCK_REGION_1_LOCK_REGION_0;
-
-	while (lock_regions) {
-		code0 = code1 = 0;
-		if (lock_regions & BIT(0)) {
-			code0 = ADSP2_LOCK_CODE_0;
-			code1 = ADSP2_LOCK_CODE_1;
-		}
-		if (lock_regions & BIT(1)) {
-			code0 |= ADSP2_LOCK_CODE_0 << ADSP2_LOCK_REGION_SHIFT;
-			code1 |= ADSP2_LOCK_CODE_1 << ADSP2_LOCK_REGION_SHIFT;
-		}
-		regmap_write(regmap, lock_reg, code0);
-		regmap_write(regmap, lock_reg, code1);
-		lock_regions >>= 2;
-		lock_reg += 2;
-	}
-
-	return 0;
-}
-
-static int cs_dsp_adsp2_enable_memory(struct cs_dsp *dsp)
-{
-	return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-				  ADSP2_MEM_ENA, ADSP2_MEM_ENA);
-}
-
-static void cs_dsp_adsp2_disable_memory(struct cs_dsp *dsp)
-{
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-			   ADSP2_MEM_ENA, 0);
-}
-
-static void cs_dsp_adsp2_disable_core(struct cs_dsp *dsp)
-{
-	regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0);
-	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0);
-	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_2, 0);
-
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-			   ADSP2_SYS_ENA, 0);
-}
-
-static void cs_dsp_adsp2v2_disable_core(struct cs_dsp *dsp)
-{
-	regmap_write(dsp->regmap, dsp->base + ADSP2_RDMA_CONFIG_1, 0);
-	regmap_write(dsp->regmap, dsp->base + ADSP2_WDMA_CONFIG_1, 0);
-	regmap_write(dsp->regmap, dsp->base + ADSP2V2_WDMA_CONFIG_2, 0);
-}
-
-static int cs_dsp_halo_configure_mpu(struct cs_dsp *dsp, unsigned int lock_regions)
-{
-	struct reg_sequence config[] = {
-		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0x5555 },
-		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0xAAAA },
-		{ dsp->base + HALO_MPU_XMEM_ACCESS_0,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_YMEM_ACCESS_0,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_WINDOW_ACCESS_0, lock_regions },
-		{ dsp->base + HALO_MPU_XREG_ACCESS_0,   lock_regions },
-		{ dsp->base + HALO_MPU_YREG_ACCESS_0,   lock_regions },
-		{ dsp->base + HALO_MPU_XMEM_ACCESS_1,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_YMEM_ACCESS_1,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_WINDOW_ACCESS_1, lock_regions },
-		{ dsp->base + HALO_MPU_XREG_ACCESS_1,   lock_regions },
-		{ dsp->base + HALO_MPU_YREG_ACCESS_1,   lock_regions },
-		{ dsp->base + HALO_MPU_XMEM_ACCESS_2,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_YMEM_ACCESS_2,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_WINDOW_ACCESS_2, lock_regions },
-		{ dsp->base + HALO_MPU_XREG_ACCESS_2,   lock_regions },
-		{ dsp->base + HALO_MPU_YREG_ACCESS_2,   lock_regions },
-		{ dsp->base + HALO_MPU_XMEM_ACCESS_3,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_YMEM_ACCESS_3,   0xFFFFFFFF },
-		{ dsp->base + HALO_MPU_WINDOW_ACCESS_3, lock_regions },
-		{ dsp->base + HALO_MPU_XREG_ACCESS_3,   lock_regions },
-		{ dsp->base + HALO_MPU_YREG_ACCESS_3,   lock_regions },
-		{ dsp->base + HALO_MPU_LOCK_CONFIG,     0 },
-	};
-
-	return regmap_multi_reg_write(dsp->regmap, config, ARRAY_SIZE(config));
-}
+		if (ret)
+			break;
 
-static int cs_dsp_set_dspclk(struct cs_dsp *dsp, unsigned int freq)
-{
-	int ret;
+		ret = cs_dsp_adsp1_power_up(&dsp->cs_dsp,
+					    wmfw_firmware, wmfw_filename,
+					    coeff_firmware, coeff_filename,
+					    wm_adsp_fw_text[dsp->fw]);
 
-	ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CLOCKING,
-				 ADSP2_CLK_SEL_MASK,
-				 freq << ADSP2_CLK_SEL_SHIFT);
-	if (ret)
-		cs_dsp_err(dsp, "Failed to set clock rate: %d\n", ret);
+		wm_adsp_release_firmware_files(dsp,
+					       wmfw_firmware, wmfw_filename,
+					       coeff_firmware, coeff_filename);
+		break;
+	case SND_SOC_DAPM_PRE_PMD:
+		cs_dsp_adsp1_power_down(&dsp->cs_dsp);
+		break;
+	default:
+		break;
+	}
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(wm_adsp1_event);
 
 int wm_adsp2_set_dspclk(struct snd_soc_dapm_widget *w, unsigned int freq)
 {
@@ -3225,104 +928,6 @@ int wm_adsp2_preloader_put(struct snd_kcontrol *kcontrol,
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_preloader_put);
 
-static void cs_dsp_stop_watchdog(struct cs_dsp *dsp)
-{
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_WATCHDOG,
-			   ADSP2_WDT_ENA_MASK, 0);
-}
-
-static void cs_dsp_halo_stop_watchdog(struct cs_dsp *dsp)
-{
-	regmap_update_bits(dsp->regmap, dsp->base + HALO_WDT_CONTROL,
-			   HALO_WDT_EN_MASK, 0);
-}
-
-static int cs_dsp_power_up(struct cs_dsp *dsp,
-			   const struct firmware *wmfw_firmware, char *wmfw_filename,
-			   const struct firmware *coeff_firmware, char *coeff_filename,
-			   const char *fw_name)
-{
-	int ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	dsp->fw_name = fw_name;
-
-	if (dsp->ops->enable_memory) {
-		ret = dsp->ops->enable_memory(dsp);
-		if (ret != 0)
-			goto err_mutex;
-	}
-
-	if (dsp->ops->enable_core) {
-		ret = dsp->ops->enable_core(dsp);
-		if (ret != 0)
-			goto err_mem;
-	}
-
-	ret = cs_dsp_load(dsp, wmfw_firmware, wmfw_filename);
-	if (ret != 0)
-		goto err_ena;
-
-	ret = dsp->ops->setup_algs(dsp);
-	if (ret != 0)
-		goto err_ena;
-
-	ret = cs_dsp_load_coeff(dsp, coeff_firmware, coeff_filename);
-	if (ret != 0)
-		goto err_ena;
-
-	/* Initialize caches for enabled and unset controls */
-	ret = cs_dsp_coeff_init_control_caches(dsp);
-	if (ret != 0)
-		goto err_ena;
-
-	if (dsp->ops->disable_core)
-		dsp->ops->disable_core(dsp);
-
-	dsp->booted = true;
-
-	mutex_unlock(&dsp->pwr_lock);
-
-	return 0;
-err_ena:
-	if (dsp->ops->disable_core)
-		dsp->ops->disable_core(dsp);
-err_mem:
-	if (dsp->ops->disable_memory)
-		dsp->ops->disable_memory(dsp);
-err_mutex:
-	mutex_unlock(&dsp->pwr_lock);
-
-	return ret;
-}
-
-static void cs_dsp_power_down(struct cs_dsp *dsp)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	cs_dsp_debugfs_clear(dsp);
-
-	dsp->fw_id = 0;
-	dsp->fw_id_version = 0;
-
-	dsp->booted = false;
-
-	if (dsp->ops->disable_memory)
-		dsp->ops->disable_memory(dsp);
-
-	list_for_each_entry(ctl, &dsp->ctl_list, list)
-		ctl->enabled = 0;
-
-	cs_dsp_free_alg_regions(dsp);
-
-	mutex_unlock(&dsp->pwr_lock);
-
-	cs_dsp_dbg(dsp, "Shutdown complete\n");
-}
-
 static void wm_adsp_boot_work(struct work_struct *work)
 {
 	struct wm_adsp *dsp = container_of(work,
@@ -3372,19 +977,6 @@ int wm_adsp_early_event(struct snd_soc_dapm_widget *w,
 }
 EXPORT_SYMBOL_GPL(wm_adsp_early_event);
 
-static int cs_dsp_adsp2_start_core(struct cs_dsp *dsp)
-{
-	return regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-				 ADSP2_CORE_ENA | ADSP2_START,
-				 ADSP2_CORE_ENA | ADSP2_START);
-}
-
-static void cs_dsp_adsp2_stop_core(struct cs_dsp *dsp)
-{
-	regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-			   ADSP2_CORE_ENA | ADSP2_START, 0);
-}
-
 static int wm_adsp_event_post_run(struct cs_dsp *cs_dsp)
 {
 	struct wm_adsp *dsp = container_of(cs_dsp, struct wm_adsp, cs_dsp);
@@ -3405,93 +997,6 @@ static void wm_adsp_event_post_stop(struct cs_dsp *cs_dsp)
 	dsp->fatal_error = false;
 }
 
-static int cs_dsp_run(struct cs_dsp *dsp)
-{
-	int ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	if (!dsp->booted) {
-		ret = -EIO;
-		goto err;
-	}
-
-	if (dsp->ops->enable_core) {
-		ret = dsp->ops->enable_core(dsp);
-		if (ret != 0)
-			goto err;
-	}
-
-	/* Sync set controls */
-	ret = cs_dsp_coeff_sync_controls(dsp);
-	if (ret != 0)
-		goto err;
-
-	if (dsp->ops->lock_memory) {
-		ret = dsp->ops->lock_memory(dsp, dsp->lock_regions);
-		if (ret != 0) {
-			cs_dsp_err(dsp, "Error configuring MPU: %d\n", ret);
-			goto err;
-		}
-	}
-
-	if (dsp->ops->start_core) {
-		ret = dsp->ops->start_core(dsp);
-		if (ret != 0)
-			goto err;
-	}
-
-	dsp->running = true;
-
-	if (dsp->client_ops->post_run) {
-		ret = dsp->client_ops->post_run(dsp);
-		if (ret)
-			goto err;
-	}
-
-	mutex_unlock(&dsp->pwr_lock);
-
-	return 0;
-
-err:
-	if (dsp->ops->stop_core)
-		dsp->ops->stop_core(dsp);
-	if (dsp->ops->disable_core)
-		dsp->ops->disable_core(dsp);
-	mutex_unlock(&dsp->pwr_lock);
-
-	return ret;
-}
-
-static void cs_dsp_stop(struct cs_dsp *dsp)
-{
-	/* Tell the firmware to cleanup */
-	cs_dsp_signal_event_controls(dsp, CS_DSP_FW_EVENT_SHUTDOWN);
-
-	if (dsp->ops->stop_watchdog)
-		dsp->ops->stop_watchdog(dsp);
-
-	/* Log firmware state, it can be useful for analysis */
-	if (dsp->ops->show_fw_status)
-		dsp->ops->show_fw_status(dsp);
-
-	mutex_lock(&dsp->pwr_lock);
-
-	dsp->running = false;
-
-	if (dsp->ops->stop_core)
-		dsp->ops->stop_core(dsp);
-	if (dsp->ops->disable_core)
-		dsp->ops->disable_core(dsp);
-
-	if (dsp->client_ops->post_stop)
-		dsp->client_ops->post_stop(dsp);
-
-	mutex_unlock(&dsp->pwr_lock);
-
-	cs_dsp_dbg(dsp, "Execution stopped\n");
-}
-
 int wm_adsp_event(struct snd_soc_dapm_widget *w,
 		  struct snd_kcontrol *kcontrol, int event)
 {
@@ -3516,24 +1021,6 @@ int wm_adsp_event(struct snd_soc_dapm_widget *w,
 }
 EXPORT_SYMBOL_GPL(wm_adsp_event);
 
-static int cs_dsp_halo_start_core(struct cs_dsp *dsp)
-{
-	return regmap_update_bits(dsp->regmap,
-				  dsp->base + HALO_CCM_CORE_CONTROL,
-				  HALO_CORE_RESET | HALO_CORE_EN,
-				  HALO_CORE_RESET | HALO_CORE_EN);
-}
-
-static void cs_dsp_halo_stop_core(struct cs_dsp *dsp)
-{
-	regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL,
-			   HALO_CORE_EN, 0);
-
-	/* reset halo core with CORE_SOFT_RESET */
-	regmap_update_bits(dsp->regmap, dsp->base + HALO_CORE_SOFT_RESET,
-			   HALO_CORE_SOFT_RESET_MASK, 1);
-}
-
 int wm_adsp2_component_probe(struct wm_adsp *dsp, struct snd_soc_component *component)
 {
 	char preload[32];
@@ -3557,37 +1044,6 @@ int wm_adsp2_component_remove(struct wm_adsp *dsp, struct snd_soc_component *com
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_component_remove);
 
-static int cs_dsp_adsp2_init(struct cs_dsp *dsp)
-{
-	int ret;
-
-	switch (dsp->rev) {
-	case 0:
-		/*
-		 * Disable the DSP memory by default when in reset for a small
-		 * power saving.
-		 */
-		ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL,
-					 ADSP2_MEM_ENA, 0);
-		if (ret) {
-			cs_dsp_err(dsp,
-				   "Failed to clear memory retention: %d\n", ret);
-			return ret;
-		}
-
-		dsp->ops = &cs_dsp_adsp2_ops[0];
-		break;
-	case 1:
-		dsp->ops = &cs_dsp_adsp2_ops[1];
-		break;
-	default:
-		dsp->ops = &cs_dsp_adsp2_ops[2];
-		break;
-	}
-
-	return cs_dsp_common_init(dsp);
-}
-
 int wm_adsp2_init(struct wm_adsp *dsp)
 {
 	int ret;
@@ -3605,13 +1061,6 @@ int wm_adsp2_init(struct wm_adsp *dsp)
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_init);
 
-static int cs_dsp_halo_init(struct cs_dsp *dsp)
-{
-	dsp->ops = &cs_dsp_halo_ops;
-
-	return cs_dsp_common_init(dsp);
-}
-
 int wm_halo_init(struct wm_adsp *dsp)
 {
 	int ret;
@@ -3629,21 +1078,6 @@ int wm_halo_init(struct wm_adsp *dsp)
 }
 EXPORT_SYMBOL_GPL(wm_halo_init);
 
-static void cs_dsp_remove(struct cs_dsp *dsp)
-{
-	struct cs_dsp_coeff_ctl *ctl;
-
-	while (!list_empty(&dsp->ctl_list)) {
-		ctl = list_first_entry(&dsp->ctl_list, struct cs_dsp_coeff_ctl, list);
-
-		if (dsp->client_ops->control_remove)
-			dsp->client_ops->control_remove(ctl);
-
-		list_del(&ctl->list);
-		cs_dsp_free_ctl_blk(ctl);
-	}
-}
-
 void wm_adsp2_remove(struct wm_adsp *dsp)
 {
 	cs_dsp_remove(&dsp->cs_dsp);
@@ -3873,57 +1307,6 @@ int wm_adsp_compr_get_caps(struct snd_soc_component *component,
 }
 EXPORT_SYMBOL_GPL(wm_adsp_compr_get_caps);
 
-static int cs_dsp_read_raw_data_block(struct cs_dsp *dsp, int mem_type,
-				      unsigned int mem_addr,
-				      unsigned int num_words, __be32 *data)
-{
-	struct cs_dsp_region const *mem = cs_dsp_find_region(dsp, mem_type);
-	unsigned int reg;
-	int ret;
-
-	if (!mem)
-		return -EINVAL;
-
-	reg = dsp->ops->region_to_reg(mem, mem_addr);
-
-	ret = regmap_raw_read(dsp->regmap, reg, data,
-			      sizeof(*data) * num_words);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int cs_dsp_read_data_word(struct cs_dsp *dsp, int mem_type,
-				 unsigned int mem_addr, u32 *data)
-{
-	__be32 raw;
-	int ret;
-
-	ret = cs_dsp_read_raw_data_block(dsp, mem_type, mem_addr, 1, &raw);
-	if (ret < 0)
-		return ret;
-
-	*data = be32_to_cpu(raw) & 0x00ffffffu;
-
-	return 0;
-}
-
-static int cs_dsp_write_data_word(struct cs_dsp *dsp, int mem_type,
-				  unsigned int mem_addr, u32 data)
-{
-	struct cs_dsp_region const *mem = cs_dsp_find_region(dsp, mem_type);
-	__be32 val = cpu_to_be32(data & 0x00ffffffu);
-	unsigned int reg;
-
-	if (!mem)
-		return -EINVAL;
-
-	reg = dsp->ops->region_to_reg(mem, mem_addr);
-
-	return regmap_raw_write(dsp->regmap, reg, &val, sizeof(val));
-}
-
 static inline int wm_adsp_buffer_read(struct wm_adsp_compr_buf *buf,
 				      unsigned int field_offset, u32 *data)
 {
@@ -3939,25 +1322,6 @@ static inline int wm_adsp_buffer_write(struct wm_adsp_compr_buf *buf,
 				      data);
 }
 
-static void cs_dsp_remove_padding(u32 *buf, int nwords)
-{
-	const __be32 *pack_in = (__be32 *)buf;
-	u8 *pack_out = (u8 *)buf;
-	int i;
-
-	/*
-	 * DSP words from the register map have pad bytes and the data bytes
-	 * are in swapped order. This swaps back to the original little-endian
-	 * order and strips the pad bytes.
-	 */
-	for (i = 0; i < nwords; i++) {
-		u32 word = be32_to_cpu(*pack_in++);
-		*pack_out++ = (u8)word;
-		*pack_out++ = (u8)(word >> 8);
-		*pack_out++ = (u8)(word >> 16);
-	}
-}
-
 static int wm_adsp_buffer_populate(struct wm_adsp_compr_buf *buf)
 {
 	const struct wm_adsp_fw_caps *caps = wm_adsp_fw[buf->dsp->fw].caps;
@@ -4568,69 +1932,6 @@ static void wm_adsp_fatal_error(struct cs_dsp *cs_dsp)
 	}
 }
 
-static void cs_dsp_adsp2_bus_error(struct cs_dsp *dsp)
-{
-	unsigned int val;
-	struct regmap *regmap = dsp->regmap;
-	int ret = 0;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	ret = regmap_read(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL, &val);
-	if (ret) {
-		cs_dsp_err(dsp,
-			   "Failed to read Region Lock Ctrl register: %d\n", ret);
-		goto error;
-	}
-
-	if (val & ADSP2_WDT_TIMEOUT_STS_MASK) {
-		cs_dsp_err(dsp, "watchdog timeout error\n");
-		dsp->ops->stop_watchdog(dsp);
-		if (dsp->client_ops->watchdog_expired)
-			dsp->client_ops->watchdog_expired(dsp);
-	}
-
-	if (val & (ADSP2_ADDR_ERR_MASK | ADSP2_REGION_LOCK_ERR_MASK)) {
-		if (val & ADSP2_ADDR_ERR_MASK)
-			cs_dsp_err(dsp, "bus error: address error\n");
-		else
-			cs_dsp_err(dsp, "bus error: region lock error\n");
-
-		ret = regmap_read(regmap, dsp->base + ADSP2_BUS_ERR_ADDR, &val);
-		if (ret) {
-			cs_dsp_err(dsp,
-				   "Failed to read Bus Err Addr register: %d\n",
-				   ret);
-			goto error;
-		}
-
-		cs_dsp_err(dsp, "bus error address = 0x%x\n",
-			   val & ADSP2_BUS_ERR_ADDR_MASK);
-
-		ret = regmap_read(regmap,
-				  dsp->base + ADSP2_PMEM_ERR_ADDR_XMEM_ERR_ADDR,
-				  &val);
-		if (ret) {
-			cs_dsp_err(dsp,
-				   "Failed to read Pmem Xmem Err Addr register: %d\n",
-				   ret);
-			goto error;
-		}
-
-		cs_dsp_err(dsp, "xmem error address = 0x%x\n",
-			   val & ADSP2_XMEM_ERR_ADDR_MASK);
-		cs_dsp_err(dsp, "pmem error address = 0x%x\n",
-			   (val & ADSP2_PMEM_ERR_ADDR_MASK) >>
-			   ADSP2_PMEM_ERR_ADDR_SHIFT);
-	}
-
-	regmap_update_bits(regmap, dsp->base + ADSP2_LOCK_REGION_CTRL,
-			   ADSP2_CTRL_ERR_EINT, ADSP2_CTRL_ERR_EINT);
-
-error:
-	mutex_unlock(&dsp->pwr_lock);
-}
-
 irqreturn_t wm_adsp2_bus_error(int irq, void *data)
 {
 	struct wm_adsp *dsp = (struct wm_adsp *)data;
@@ -4641,59 +1942,6 @@ irqreturn_t wm_adsp2_bus_error(int irq, void *data)
 }
 EXPORT_SYMBOL_GPL(wm_adsp2_bus_error);
 
-static void cs_dsp_halo_bus_error(struct cs_dsp *dsp)
-{
-	struct regmap *regmap = dsp->regmap;
-	unsigned int fault[6];
-	struct reg_sequence clear[] = {
-		{ dsp->base + HALO_MPU_XM_VIO_STATUS,     0x0 },
-		{ dsp->base + HALO_MPU_YM_VIO_STATUS,     0x0 },
-		{ dsp->base + HALO_MPU_PM_VIO_STATUS,     0x0 },
-	};
-	int ret;
-
-	mutex_lock(&dsp->pwr_lock);
-
-	ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_1,
-			  fault);
-	if (ret) {
-		cs_dsp_warn(dsp, "Failed to read AHB DEBUG_1: %d\n", ret);
-		goto exit_unlock;
-	}
-
-	cs_dsp_warn(dsp, "AHB: STATUS: 0x%x ADDR: 0x%x\n",
-		    *fault & HALO_AHBM_FLAGS_ERR_MASK,
-		    (*fault & HALO_AHBM_CORE_ERR_ADDR_MASK) >>
-		    HALO_AHBM_CORE_ERR_ADDR_SHIFT);
-
-	ret = regmap_read(regmap, dsp->base_sysinfo + HALO_AHBM_WINDOW_DEBUG_0,
-			  fault);
-	if (ret) {
-		cs_dsp_warn(dsp, "Failed to read AHB DEBUG_0: %d\n", ret);
-		goto exit_unlock;
-	}
-
-	cs_dsp_warn(dsp, "AHB: SYS_ADDR: 0x%x\n", *fault);
-
-	ret = regmap_bulk_read(regmap, dsp->base + HALO_MPU_XM_VIO_ADDR,
-			       fault, ARRAY_SIZE(fault));
-	if (ret) {
-		cs_dsp_warn(dsp, "Failed to read MPU fault info: %d\n", ret);
-		goto exit_unlock;
-	}
-
-	cs_dsp_warn(dsp, "XM: STATUS:0x%x ADDR:0x%x\n", fault[1], fault[0]);
-	cs_dsp_warn(dsp, "YM: STATUS:0x%x ADDR:0x%x\n", fault[3], fault[2]);
-	cs_dsp_warn(dsp, "PM: STATUS:0x%x ADDR:0x%x\n", fault[5], fault[4]);
-
-	ret = regmap_multi_reg_write(dsp->regmap, clear, ARRAY_SIZE(clear));
-	if (ret)
-		cs_dsp_warn(dsp, "Failed to clear MPU status: %d\n", ret);
-
-exit_unlock:
-	mutex_unlock(&dsp->pwr_lock);
-}
-
 irqreturn_t wm_halo_bus_error(int irq, void *data)
 {
 	struct wm_adsp *dsp = (struct wm_adsp *)data;
@@ -4704,19 +1952,6 @@ irqreturn_t wm_halo_bus_error(int irq, void *data)
 }
 EXPORT_SYMBOL_GPL(wm_halo_bus_error);
 
-static void cs_dsp_halo_wdt_expire(struct cs_dsp *dsp)
-{
-	mutex_lock(&dsp->pwr_lock);
-
-	cs_dsp_warn(dsp, "WDT Expiry Fault\n");
-
-	dsp->ops->stop_watchdog(dsp);
-	if (dsp->client_ops->watchdog_expired)
-		dsp->client_ops->watchdog_expired(dsp);
-
-	mutex_unlock(&dsp->pwr_lock);
-}
-
 irqreturn_t wm_halo_wdt_expire(int irq, void *data)
 {
 	struct wm_adsp *dsp = data;
@@ -4727,90 +1962,11 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data)
 }
 EXPORT_SYMBOL_GPL(wm_halo_wdt_expire);
 
-static const struct cs_dsp_ops cs_dsp_adsp1_ops = {
-	.validate_version = cs_dsp_validate_version,
-	.parse_sizes = cs_dsp_adsp1_parse_sizes,
-	.region_to_reg = cs_dsp_region_to_reg,
-};
-
 static const struct cs_dsp_client_ops wm_adsp1_client_ops = {
 	.control_add = wm_adsp_control_add,
 	.control_remove = wm_adsp_control_remove,
 };
 
-static const struct cs_dsp_ops cs_dsp_adsp2_ops[] = {
-	{
-		.parse_sizes = cs_dsp_adsp2_parse_sizes,
-		.validate_version = cs_dsp_validate_version,
-		.setup_algs = cs_dsp_adsp2_setup_algs,
-		.region_to_reg = cs_dsp_region_to_reg,
-
-		.show_fw_status = cs_dsp_adsp2_show_fw_status,
-
-		.enable_memory = cs_dsp_adsp2_enable_memory,
-		.disable_memory = cs_dsp_adsp2_disable_memory,
-
-		.enable_core = cs_dsp_adsp2_enable_core,
-		.disable_core = cs_dsp_adsp2_disable_core,
-
-		.start_core = cs_dsp_adsp2_start_core,
-		.stop_core = cs_dsp_adsp2_stop_core,
-
-	},
-	{
-		.parse_sizes = cs_dsp_adsp2_parse_sizes,
-		.validate_version = cs_dsp_validate_version,
-		.setup_algs = cs_dsp_adsp2_setup_algs,
-		.region_to_reg = cs_dsp_region_to_reg,
-
-		.show_fw_status = cs_dsp_adsp2v2_show_fw_status,
-
-		.enable_memory = cs_dsp_adsp2_enable_memory,
-		.disable_memory = cs_dsp_adsp2_disable_memory,
-		.lock_memory = cs_dsp_adsp2_lock,
-
-		.enable_core = cs_dsp_adsp2v2_enable_core,
-		.disable_core = cs_dsp_adsp2v2_disable_core,
-
-		.start_core = cs_dsp_adsp2_start_core,
-		.stop_core = cs_dsp_adsp2_stop_core,
-	},
-	{
-		.parse_sizes = cs_dsp_adsp2_parse_sizes,
-		.validate_version = cs_dsp_validate_version,
-		.setup_algs = cs_dsp_adsp2_setup_algs,
-		.region_to_reg = cs_dsp_region_to_reg,
-
-		.show_fw_status = cs_dsp_adsp2v2_show_fw_status,
-		.stop_watchdog = cs_dsp_stop_watchdog,
-
-		.enable_memory = cs_dsp_adsp2_enable_memory,
-		.disable_memory = cs_dsp_adsp2_disable_memory,
-		.lock_memory = cs_dsp_adsp2_lock,
-
-		.enable_core = cs_dsp_adsp2v2_enable_core,
-		.disable_core = cs_dsp_adsp2v2_disable_core,
-
-		.start_core = cs_dsp_adsp2_start_core,
-		.stop_core = cs_dsp_adsp2_stop_core,
-	},
-};
-
-static const struct cs_dsp_ops cs_dsp_halo_ops = {
-	.parse_sizes = cs_dsp_adsp2_parse_sizes,
-	.validate_version = cs_dsp_halo_validate_version,
-	.setup_algs = cs_dsp_halo_setup_algs,
-	.region_to_reg = cs_dsp_halo_region_to_reg,
-
-	.show_fw_status = cs_dsp_halo_show_fw_status,
-	.stop_watchdog = cs_dsp_halo_stop_watchdog,
-
-	.lock_memory = cs_dsp_halo_configure_mpu,
-
-	.start_core = cs_dsp_halo_start_core,
-	.stop_core = cs_dsp_halo_stop_core,
-};
-
 static const struct cs_dsp_client_ops wm_adsp2_client_ops = {
 	.control_add = wm_adsp_control_add,
 	.control_remove = wm_adsp_control_remove,
diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h
index 25aaef74654c..0e2f113bd342 100644
--- a/sound/soc/codecs/wm_adsp.h
+++ b/sound/soc/codecs/wm_adsp.h
@@ -10,113 +10,19 @@
 #ifndef __WM_ADSP_H
 #define __WM_ADSP_H
 
+#include <linux/firmware/cirrus/cs_dsp.h>
+#include <linux/firmware/cirrus/wmfw.h>
+
 #include <sound/soc.h>
 #include <sound/soc-dapm.h>
 #include <sound/compress_driver.h>
 
-#include "wmfw.h"
-
 /* Return values for wm_adsp_compr_handle_irq */
 #define WM_ADSP_COMPR_OK                 0
 #define WM_ADSP_COMPR_VOICE_TRIGGER      1
 
-#define CS_ADSP2_REGION_0 BIT(0)
-#define CS_ADSP2_REGION_1 BIT(1)
-#define CS_ADSP2_REGION_2 BIT(2)
-#define CS_ADSP2_REGION_3 BIT(3)
-#define CS_ADSP2_REGION_4 BIT(4)
-#define CS_ADSP2_REGION_5 BIT(5)
-#define CS_ADSP2_REGION_6 BIT(6)
-#define CS_ADSP2_REGION_7 BIT(7)
-#define CS_ADSP2_REGION_8 BIT(8)
-#define CS_ADSP2_REGION_9 BIT(9)
-#define CS_ADSP2_REGION_1_9 (CS_ADSP2_REGION_1 | \
-		CS_ADSP2_REGION_2 | CS_ADSP2_REGION_3 | \
-		CS_ADSP2_REGION_4 | CS_ADSP2_REGION_5 | \
-		CS_ADSP2_REGION_6 | CS_ADSP2_REGION_7 | \
-		CS_ADSP2_REGION_8 | CS_ADSP2_REGION_9)
-#define CS_ADSP2_REGION_ALL (CS_ADSP2_REGION_0 | CS_ADSP2_REGION_1_9)
-
-struct cs_dsp_region {
-	int type;
-	unsigned int base;
-};
-
-struct cs_dsp_alg_region {
-	struct list_head list;
-	unsigned int alg;
-	int type;
-	unsigned int base;
-};
-
 struct wm_adsp_compr;
 struct wm_adsp_compr_buf;
-struct cs_dsp_ops;
-struct cs_dsp_client_ops;
-
-struct cs_dsp_coeff_ctl {
-	const char *fw_name;
-	/* Subname is needed to match with firmware */
-	const char *subname;
-	unsigned int subname_len;
-	struct cs_dsp_alg_region alg_region;
-	struct cs_dsp *dsp;
-	unsigned int enabled:1;
-	struct list_head list;
-	void *cache;
-	unsigned int offset;
-	size_t len;
-	unsigned int set:1;
-	unsigned int flags;
-	unsigned int type;
-
-	void *priv;
-};
-
-struct cs_dsp {
-	const char *name;
-	int rev;
-	int num;
-	int type;
-	struct device *dev;
-	struct regmap *regmap;
-
-	const struct cs_dsp_ops *ops;
-	const struct cs_dsp_client_ops *client_ops;
-
-	unsigned int base;
-	unsigned int base_sysinfo;
-	unsigned int sysclk_reg;
-	unsigned int sysclk_mask;
-	unsigned int sysclk_shift;
-
-	struct list_head alg_regions;
-
-	const char *fw_name;
-	unsigned int fw_id;
-	unsigned int fw_id_version;
-	unsigned int fw_vendor_id;
-
-	const struct cs_dsp_region *mem;
-	int num_mems;
-
-	int fw_ver;
-
-	bool booted;
-	bool running;
-
-	struct list_head ctl_list;
-
-	struct mutex pwr_lock;
-
-	unsigned int lock_regions;
-
-#ifdef CONFIG_DEBUG_FS
-	struct dentry *debugfs_root;
-	char *wmfw_file_name;
-	char *bin_file_name;
-#endif
-};
 
 struct wm_adsp {
 	struct cs_dsp cs_dsp;
@@ -137,30 +43,6 @@ struct wm_adsp {
 	struct list_head buffer_list;
 };
 
-struct cs_dsp_ops {
-	bool (*validate_version)(struct cs_dsp *dsp, unsigned int version);
-	unsigned int (*parse_sizes)(struct cs_dsp *dsp,
-				    const char * const file,
-				    unsigned int pos,
-				    const struct firmware *firmware);
-	int (*setup_algs)(struct cs_dsp *dsp);
-	unsigned int (*region_to_reg)(struct cs_dsp_region const *mem,
-				      unsigned int offset);
-
-	void (*show_fw_status)(struct cs_dsp *dsp);
-	void (*stop_watchdog)(struct cs_dsp *dsp);
-
-	int (*enable_memory)(struct cs_dsp *dsp);
-	void (*disable_memory)(struct cs_dsp *dsp);
-	int (*lock_memory)(struct cs_dsp *dsp, unsigned int lock_regions);
-
-	int (*enable_core)(struct cs_dsp *dsp);
-	void (*disable_core)(struct cs_dsp *dsp);
-
-	int (*start_core)(struct cs_dsp *dsp);
-	void (*stop_core)(struct cs_dsp *dsp);
-};
-
 #define WM_ADSP1(wname, num) \
 	SND_SOC_DAPM_PGA_E(wname, SND_SOC_NOPM, num, 0, NULL, 0, \
 		wm_adsp1_event, SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_PRE_PMD)
@@ -239,12 +121,4 @@ int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name,  int type,
 int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name,  int type,
 		      unsigned int alg, void *buf, size_t len);
 
-struct cs_dsp_client_ops {
-	int (*control_add)(struct cs_dsp_coeff_ctl *ctl);
-	void (*control_remove)(struct cs_dsp_coeff_ctl *ctl);
-	int (*post_run)(struct cs_dsp *dsp);
-	void (*post_stop)(struct cs_dsp *dsp);
-	void (*watchdog_expired)(struct cs_dsp *dsp);
-};
-
 #endif
diff --git a/sound/soc/codecs/wmfw.h b/sound/soc/codecs/wmfw.h
deleted file mode 100644
index a19bf7c6fc8b..000000000000
--- a/sound/soc/codecs/wmfw.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * wmfw.h - Wolfson firmware format information
- *
- * Copyright 2012 Wolfson Microelectronics plc
- *
- * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
- */
-
-#ifndef __WMFW_H
-#define __WMFW_H
-
-#include <linux/types.h>
-
-#define WMFW_MAX_ALG_NAME         256
-#define WMFW_MAX_ALG_DESCR_NAME   256
-
-#define WMFW_MAX_COEFF_NAME       256
-#define WMFW_MAX_COEFF_DESCR_NAME 256
-
-#define WMFW_CTL_FLAG_SYS         0x8000
-#define WMFW_CTL_FLAG_VOLATILE    0x0004
-#define WMFW_CTL_FLAG_WRITEABLE   0x0002
-#define WMFW_CTL_FLAG_READABLE    0x0001
-
-#define WMFW_CTL_TYPE_BYTES       0x0004 /* byte control */
-
-/* Non-ALSA coefficient types start at 0x1000 */
-#define WMFW_CTL_TYPE_ACKED       0x1000 /* acked control */
-#define WMFW_CTL_TYPE_HOSTEVENT   0x1001 /* event control */
-#define WMFW_CTL_TYPE_HOST_BUFFER 0x1002 /* host buffer pointer */
-
-struct wmfw_header {
-	char magic[4];
-	__le32 len;
-	__le16 rev;
-	u8 core;
-	u8 ver;
-} __packed;
-
-struct wmfw_footer {
-	__le64 timestamp;
-	__le32 checksum;
-} __packed;
-
-struct wmfw_adsp1_sizes {
-	__le32 dm;
-	__le32 pm;
-	__le32 zm;
-} __packed;
-
-struct wmfw_adsp2_sizes {
-	__le32 xm;
-	__le32 ym;
-	__le32 pm;
-	__le32 zm;
-} __packed;
-
-struct wmfw_region {
-	union {
-		__be32 type;
-		__le32 offset;
-	};
-	__le32 len;
-	u8 data[];
-} __packed;
-
-struct wmfw_id_hdr {
-	__be32 core_id;
-	__be32 core_rev;
-	__be32 id;
-	__be32 ver;
-} __packed;
-
-struct wmfw_v3_id_hdr {
-	__be32 core_id;
-	__be32 block_rev;
-	__be32 vendor_id;
-	__be32 id;
-	__be32 ver;
-} __packed;
-
-struct wmfw_adsp1_id_hdr {
-	struct wmfw_id_hdr fw;
-	__be32 zm;
-	__be32 dm;
-	__be32 n_algs;
-} __packed;
-
-struct wmfw_adsp2_id_hdr {
-	struct wmfw_id_hdr fw;
-	__be32 zm;
-	__be32 xm;
-	__be32 ym;
-	__be32 n_algs;
-} __packed;
-
-struct wmfw_halo_id_hdr {
-	struct wmfw_v3_id_hdr fw;
-	__be32 xm_base;
-	__be32 xm_size;
-	__be32 ym_base;
-	__be32 ym_size;
-	__be32 n_algs;
-} __packed;
-
-struct wmfw_alg_hdr {
-	__be32 id;
-	__be32 ver;
-} __packed;
-
-struct wmfw_adsp1_alg_hdr {
-	struct wmfw_alg_hdr alg;
-	__be32 zm;
-	__be32 dm;
-} __packed;
-
-struct wmfw_adsp2_alg_hdr {
-	struct wmfw_alg_hdr alg;
-	__be32 zm;
-	__be32 xm;
-	__be32 ym;
-} __packed;
-
-struct wmfw_halo_alg_hdr {
-	struct wmfw_alg_hdr alg;
-	__be32 xm_base;
-	__be32 xm_size;
-	__be32 ym_base;
-	__be32 ym_size;
-} __packed;
-
-struct wmfw_adsp_alg_data {
-	__le32 id;
-	u8 name[WMFW_MAX_ALG_NAME];
-	u8 descr[WMFW_MAX_ALG_DESCR_NAME];
-	__le32 ncoeff;
-	u8 data[];
-} __packed;
-
-struct wmfw_adsp_coeff_data {
-	struct {
-		__le16 offset;
-		__le16 type;
-		__le32 size;
-	} hdr;
-	u8 name[WMFW_MAX_COEFF_NAME];
-	u8 descr[WMFW_MAX_COEFF_DESCR_NAME];
-	__le16 ctl_type;
-	__le16 flags;
-	__le32 len;
-	u8 data[];
-} __packed;
-
-struct wmfw_coeff_hdr {
-	u8 magic[4];
-	__le32 len;
-	union {
-		__be32 rev;
-		__le32 ver;
-	};
-	union {
-		__be32 core;
-		__le32 core_ver;
-	};
-	u8 data[];
-} __packed;
-
-struct wmfw_coeff_item {
-	__le16 offset;
-	__le16 type;
-	__le32 id;
-	__le32 ver;
-	__le32 sr;
-	__le32 len;
-	u8 data[];
-} __packed;
-
-#define WMFW_ADSP1 1
-#define WMFW_ADSP2 2
-#define WMFW_HALO 4
-
-#define WMFW_ABSOLUTE         0xf0
-#define WMFW_ALGORITHM_DATA   0xf2
-#define WMFW_METADATA         0xfc
-#define WMFW_NAME_TEXT        0xfe
-#define WMFW_INFO_TEXT        0xff
-
-#define WMFW_ADSP1_PM 2
-#define WMFW_ADSP1_DM 3
-#define WMFW_ADSP1_ZM 4
-
-#define WMFW_ADSP2_PM 2
-#define WMFW_ADSP2_ZM 4
-#define WMFW_ADSP2_XM 5
-#define WMFW_ADSP2_YM 6
-
-#define WMFW_HALO_PM_PACKED 0x10
-#define WMFW_HALO_XM_PACKED 0x11
-#define WMFW_HALO_YM_PACKED 0x12
-
-#endif
-- 
cgit v1.2.3


From c25303281d79299e9f35d4b2e496a8bd134d5715 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 5 Jun 2021 23:20:17 -0400
Subject: mm: Convert get_page_unless_zero() to return bool

atomic_add_unless() returns bool, so remove the widening casts to int
in page_ref_add_unless() and get_page_unless_zero().  This causes gcc
to produce slightly larger code in isolate_migratepages_block(), but
it's not clear that it's worse code.  Net +19 bytes of text.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h       | 2 +-
 include/linux/page_ref.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..1de8864a1e28 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -754,7 +754,7 @@ static inline int put_page_testzero(struct page *page)
  * This can be called when MMU is off so it must not access
  * any of the virtual mappings.
  */
-static inline int get_page_unless_zero(struct page *page)
+static inline bool get_page_unless_zero(struct page *page)
 {
 	return page_ref_add_unless(page, 1, 0);
 }
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 7ad46f45df39..3a799de8ad52 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -161,9 +161,9 @@ static inline int page_ref_dec_return(struct page *page)
 	return ret;
 }
 
-static inline int page_ref_add_unless(struct page *page, int nr, int u)
+static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 {
-	int ret = atomic_add_unless(&page->_refcount, nr, u);
+	bool ret = atomic_add_unless(&page->_refcount, nr, u);
 
 	if (page_ref_tracepoint_active(page_ref_mod_unless))
 		__page_ref_mod_unless(page, nr, ret);
-- 
cgit v1.2.3


From 7b230db3b8d373219f88a3d25c8fbbf12cc7f233 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 6 Dec 2020 22:22:48 -0500
Subject: mm: Introduce struct folio

A struct folio is a new abstraction to replace the venerable struct page.
A function which takes a struct folio argument declares that it will
operate on the entire (possibly compound) page, not just PAGE_SIZE bytes.
In return, the caller guarantees that the pointer it is passing does
not point to a tail page.  No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 Documentation/core-api/mm-api.rst |  1 +
 include/linux/mm.h                | 75 +++++++++++++++++++++++++++++++++++++++
 include/linux/mm_types.h          | 60 +++++++++++++++++++++++++++++++
 include/linux/page-flags.h        | 28 +++++++++++++++
 4 files changed, 164 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index a42f9baddfbf..2a94e6164f80 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -95,6 +95,7 @@ More Memory Management Functions
 .. kernel-doc:: mm/mempolicy.c
 .. kernel-doc:: include/linux/mm_types.h
    :internal:
+.. kernel-doc:: include/linux/page-flags.h
 .. kernel-doc:: include/linux/mm.h
    :internal:
 .. kernel-doc:: include/linux/mmzone.h
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1de8864a1e28..9057b8406acf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -950,6 +950,20 @@ static inline unsigned int compound_order(struct page *page)
 	return page[1].compound_order;
 }
 
+/**
+ * folio_order - The allocation order of a folio.
+ * @folio: The folio.
+ *
+ * A folio is composed of 2^order pages.  See get_order() for the definition
+ * of order.
+ *
+ * Return: The order of the folio.
+ */
+static inline unsigned int folio_order(struct folio *folio)
+{
+	return compound_order(&folio->page);
+}
+
 static inline bool hpage_pincount_available(struct page *page)
 {
 	/*
@@ -1595,6 +1609,66 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
 #endif
 }
 
+/**
+ * folio_nr_pages - The number of pages in the folio.
+ * @folio: The folio.
+ *
+ * Return: A positive power of two.
+ */
+static inline long folio_nr_pages(struct folio *folio)
+{
+	return compound_nr(&folio->page);
+}
+
+/**
+ * folio_next - Move to the next physical folio.
+ * @folio: The folio we're currently operating on.
+ *
+ * If you have physically contiguous memory which may span more than
+ * one folio (eg a &struct bio_vec), use this function to move from one
+ * folio to the next.  Do not use it if the memory is only virtually
+ * contiguous as the folios are almost certainly not adjacent to each
+ * other.  This is the folio equivalent to writing ``page++``.
+ *
+ * Context: We assume that the folios are refcounted and/or locked at a
+ * higher level and do not adjust the reference counts.
+ * Return: The next struct folio.
+ */
+static inline struct folio *folio_next(struct folio *folio)
+{
+	return (struct folio *)folio_page(folio, folio_nr_pages(folio));
+}
+
+/**
+ * folio_shift - The size of the memory described by this folio.
+ * @folio: The folio.
+ *
+ * A folio represents a number of bytes which is a power-of-two in size.
+ * This function tells you which power-of-two the folio is.  See also
+ * folio_size() and folio_order().
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split.  It is not necessary for the folio to be locked.
+ * Return: The base-2 logarithm of the size of this folio.
+ */
+static inline unsigned int folio_shift(struct folio *folio)
+{
+	return PAGE_SHIFT + folio_order(folio);
+}
+
+/**
+ * folio_size - The number of bytes in a folio.
+ * @folio: The folio.
+ *
+ * Context: The caller should have a reference on the folio to prevent
+ * it from being split.  It is not necessary for the folio to be locked.
+ * Return: The number of bytes in this folio.
+ */
+static inline size_t folio_size(struct folio *folio)
+{
+	return PAGE_SIZE << folio_order(folio);
+}
+
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
@@ -1700,6 +1774,7 @@ extern void pagefault_out_of_memory(void);
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 #define offset_in_thp(page, p)	((unsigned long)(p) & (thp_size(page) - 1))
+#define offset_in_folio(folio, p) ((unsigned long)(p) & (folio_size(folio) - 1))
 
 /*
  * Flags passed to show_mem() and show_free_areas() to suppress output in
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..6908186629b4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -239,6 +239,66 @@ struct page {
 #endif
 } _struct_page_alignment;
 
+/**
+ * struct folio - Represents a contiguous set of bytes.
+ * @flags: Identical to the page flags.
+ * @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mapping: The file this page belongs to, or refers to the anon_vma for
+ *    anonymous memory.
+ * @index: Offset within the file, in units of pages.  For anonymous memory,
+ *    this is the index from the beginning of the mmap.
+ * @private: Filesystem per-folio data (see folio_attach_private()).
+ *    Used for swp_entry_t if folio_test_swapcache().
+ * @_mapcount: Do not access this member directly.  Use folio_mapcount() to
+ *    find out how many times this folio is mapped by userspace.
+ * @_refcount: Do not access this member directly.  Use folio_ref_count()
+ *    to find how many references there are to this folio.
+ * @memcg_data: Memory Control Group data.
+ *
+ * A folio is a physically, virtually and logically contiguous set
+ * of bytes.  It is a power-of-two in size, and it is aligned to that
+ * same power-of-two.  It is at least as large as %PAGE_SIZE.  If it is
+ * in the page cache, it is at a file offset which is a multiple of that
+ * power-of-two.  It may be mapped into userspace at an address which is
+ * at an arbitrary page offset, but its kernel virtual address is aligned
+ * to its size.
+ */
+struct folio {
+	/* private: don't document the anon union */
+	union {
+		struct {
+	/* public: */
+			unsigned long flags;
+			struct list_head lru;
+			struct address_space *mapping;
+			pgoff_t index;
+			void *private;
+			atomic_t _mapcount;
+			atomic_t _refcount;
+#ifdef CONFIG_MEMCG
+			unsigned long memcg_data;
+#endif
+	/* private: the union with struct page is transitional */
+		};
+		struct page page;
+	};
+};
+
+static_assert(sizeof(struct page) == sizeof(struct folio));
+#define FOLIO_MATCH(pg, fl)						\
+	static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl))
+FOLIO_MATCH(flags, flags);
+FOLIO_MATCH(lru, lru);
+FOLIO_MATCH(compound_head, lru);
+FOLIO_MATCH(index, index);
+FOLIO_MATCH(private, private);
+FOLIO_MATCH(_mapcount, _mapcount);
+FOLIO_MATCH(_refcount, _refcount);
+#ifdef CONFIG_MEMCG
+FOLIO_MATCH(memcg_data, memcg_data);
+#endif
+#undef FOLIO_MATCH
+
 static inline atomic_t *compound_mapcount_ptr(struct page *page)
 {
 	return &page[1].compound_mapcount;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a558d67ee86f..e9b0723a637d 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -193,6 +193,34 @@ static inline unsigned long _compound_head(const struct page *page)
 
 #define compound_head(page)	((typeof(page))_compound_head(page))
 
+/**
+ * page_folio - Converts from page to folio.
+ * @p: The page.
+ *
+ * Every page is part of a folio.  This function cannot be called on a
+ * NULL pointer.
+ *
+ * Context: No reference, nor lock is required on @page.  If the caller
+ * does not hold a reference, this call may race with a folio split, so
+ * it should re-check the folio still contains this page after gaining
+ * a reference on the folio.
+ * Return: The folio which contains this page.
+ */
+#define page_folio(p)		(_Generic((p),				\
+	const struct page *:	(const struct folio *)_compound_head(p), \
+	struct page *:		(struct folio *)_compound_head(p)))
+
+/**
+ * folio_page - Return a page from a folio.
+ * @folio: The folio.
+ * @n: The page number to return.
+ *
+ * @n is relative to the start of the folio.  This function does not
+ * check that the page number lies within @folio; the caller is presumed
+ * to have a reference to the page.
+ */
+#define folio_page(folio, n)	nth_page(&(folio)->page, n)
+
 static __always_inline int PageTail(struct page *page)
 {
 	return READ_ONCE(page->compound_head) & 1;
-- 
cgit v1.2.3


From 32b8fc486524044f2643c5d3340fa436b761ba71 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 18 Jan 2021 07:40:36 -0500
Subject: mm: Add folio_pgdat(), folio_zone() and folio_zonenum()

These are just convenience wrappers for callers with folios; pgdat and
zone can be reached from tail pages as well as head pages.  No change
to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9057b8406acf..3fc524f8c2a2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1145,6 +1145,11 @@ static inline enum zone_type page_zonenum(const struct page *page)
 	return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
+static inline enum zone_type folio_zonenum(const struct folio *folio)
+{
+	return page_zonenum(&folio->page);
+}
+
 #ifdef CONFIG_ZONE_DEVICE
 static inline bool is_zone_device_page(const struct page *page)
 {
@@ -1560,6 +1565,16 @@ static inline pg_data_t *page_pgdat(const struct page *page)
 	return NODE_DATA(page_to_nid(page));
 }
 
+static inline struct zone *folio_zone(const struct folio *folio)
+{
+	return page_zone(&folio->page);
+}
+
+static inline pg_data_t *folio_pgdat(const struct folio *folio)
+{
+	return page_pgdat(&folio->page);
+}
+
 #ifdef SECTION_IN_PAGE_FLAGS
 static inline void set_page_section(struct page *page, unsigned long section)
 {
-- 
cgit v1.2.3


From a53e17e4e97b4519882d640984a386c522e9fba3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 18 Jan 2021 08:14:00 -0500
Subject: mm/vmstat: Add functions to account folio statistics

Allow page counters to be more readily modified by callers which have
a folio.  Name these wrappers with 'stat' instead of 'state' as requested
by Linus here:
https://lore.kernel.org/linux-mm/CAHk-=wj847SudR-kt+46fT3+xFFgiwpgThvm7DJWGdi4cVrbnQ@mail.gmail.com/
No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/vmstat.h | 107 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d6a6cf53b127..241bd0f53fb9 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -415,6 +415,78 @@ static inline void drain_zonestat(struct zone *zone,
 			struct per_cpu_zonestat *pzstats) { }
 #endif		/* CONFIG_SMP */
 
+static inline void __zone_stat_mod_folio(struct folio *folio,
+		enum zone_stat_item item, long nr)
+{
+	__mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void __zone_stat_add_folio(struct folio *folio,
+		enum zone_stat_item item)
+{
+	__mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __zone_stat_sub_folio(struct folio *folio,
+		enum zone_stat_item item)
+{
+	__mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void zone_stat_mod_folio(struct folio *folio,
+		enum zone_stat_item item, long nr)
+{
+	mod_zone_page_state(folio_zone(folio), item, nr);
+}
+
+static inline void zone_stat_add_folio(struct folio *folio,
+		enum zone_stat_item item)
+{
+	mod_zone_page_state(folio_zone(folio), item, folio_nr_pages(folio));
+}
+
+static inline void zone_stat_sub_folio(struct folio *folio,
+		enum zone_stat_item item)
+{
+	mod_zone_page_state(folio_zone(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void __node_stat_mod_folio(struct folio *folio,
+		enum node_stat_item item, long nr)
+{
+	__mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void __node_stat_add_folio(struct folio *folio,
+		enum node_stat_item item)
+{
+	__mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void __node_stat_sub_folio(struct folio *folio,
+		enum node_stat_item item)
+{
+	__mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
+static inline void node_stat_mod_folio(struct folio *folio,
+		enum node_stat_item item, long nr)
+{
+	mod_node_page_state(folio_pgdat(folio), item, nr);
+}
+
+static inline void node_stat_add_folio(struct folio *folio,
+		enum node_stat_item item)
+{
+	mod_node_page_state(folio_pgdat(folio), item, folio_nr_pages(folio));
+}
+
+static inline void node_stat_sub_folio(struct folio *folio,
+		enum node_stat_item item)
+{
+	mod_node_page_state(folio_pgdat(folio), item, -folio_nr_pages(folio));
+}
+
 static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
 					     int migratetype)
 {
@@ -543,6 +615,24 @@ static inline void __dec_lruvec_page_state(struct page *page,
 	__mod_lruvec_page_state(page, idx, -1);
 }
 
+static inline void __lruvec_stat_mod_folio(struct folio *folio,
+					   enum node_stat_item idx, int val)
+{
+	__mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void __lruvec_stat_add_folio(struct folio *folio,
+					   enum node_stat_item idx)
+{
+	__lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void __lruvec_stat_sub_folio(struct folio *folio,
+					   enum node_stat_item idx)
+{
+	__lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
+
 static inline void inc_lruvec_page_state(struct page *page,
 					 enum node_stat_item idx)
 {
@@ -555,4 +645,21 @@ static inline void dec_lruvec_page_state(struct page *page,
 	mod_lruvec_page_state(page, idx, -1);
 }
 
+static inline void lruvec_stat_mod_folio(struct folio *folio,
+					 enum node_stat_item idx, int val)
+{
+	mod_lruvec_page_state(&folio->page, idx, val);
+}
+
+static inline void lruvec_stat_add_folio(struct folio *folio,
+					 enum node_stat_item idx)
+{
+	lruvec_stat_mod_folio(folio, idx, folio_nr_pages(folio));
+}
+
+static inline void lruvec_stat_sub_folio(struct folio *folio,
+					 enum node_stat_item idx)
+{
+	lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio));
+}
 #endif /* _LINUX_VMSTAT_H */
-- 
cgit v1.2.3


From 9e9edb2094db7eb4c6da21e02ac885955350ecf9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 15 Jan 2021 10:52:37 -0500
Subject: mm/debug: Add VM_BUG_ON_FOLIO() and VM_WARN_ON_ONCE_FOLIO()

These are the folio equivalents of VM_BUG_ON_PAGE and
VM_WARN_ON_ONCE_PAGE.  No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mmdebug.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 1935d4c72d10..d7285f8148a3 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -22,6 +22,13 @@ void dump_mm(const struct mm_struct *mm);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_FOLIO(cond, folio)					\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_page(&folio->page, "VM_BUG_ON_FOLIO(" __stringify(cond)")");\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_BUG_ON_VMA(cond, vma)					\
 	do {								\
 		if (unlikely(cond)) {					\
@@ -47,6 +54,17 @@ void dump_mm(const struct mm_struct *mm);
 	}								\
 	unlikely(__ret_warn_once);					\
 })
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio)	({			\
+	static bool __section(".data.once") __warned;			\
+	int __ret_warn_once = !!(cond);					\
+									\
+	if (unlikely(__ret_warn_once && !__warned)) {			\
+		dump_page(&folio->page, "VM_WARN_ON_ONCE_FOLIO(" __stringify(cond)")");\
+		__warned = true;					\
+		WARN_ON(1);						\
+	}								\
+	unlikely(__ret_warn_once);					\
+})
 
 #define VM_WARN_ON(cond) (void)WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond)
@@ -55,11 +73,13 @@ void dump_mm(const struct mm_struct *mm);
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_FOLIO(cond, folio) VM_BUG_ON(cond)
 #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
 #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE_PAGE(cond, page)  BUILD_BUG_ON_INVALID(cond)
+#define VM_WARN_ON_ONCE_FOLIO(cond, folio)  BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
 #endif
-- 
cgit v1.2.3


From c24016ac3a629655ea164b1129816660187943c0 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 31 Mar 2021 10:39:55 -0400
Subject: mm: Add folio reference count functions

These functions mirror their page reference counterparts.  Also add
the kernel-doc to the mm-api and correct the return type of
page_ref_add_unless() to bool.  No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 Documentation/core-api/mm-api.rst |  1 +
 include/linux/page_ref.h          | 88 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 2a94e6164f80..5c459ee2acce 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -98,4 +98,5 @@ More Memory Management Functions
 .. kernel-doc:: include/linux/page-flags.h
 .. kernel-doc:: include/linux/mm.h
    :internal:
+.. kernel-doc:: include/linux/page_ref.h
 .. kernel-doc:: include/linux/mmzone.h
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 3a799de8ad52..717d53c9ddf1 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -67,9 +67,31 @@ static inline int page_ref_count(const struct page *page)
 	return atomic_read(&page->_refcount);
 }
 
+/**
+ * folio_ref_count - The reference count on this folio.
+ * @folio: The folio.
+ *
+ * The refcount is usually incremented by calls to folio_get() and
+ * decremented by calls to folio_put().  Some typical users of the
+ * folio refcount:
+ *
+ * - Each reference from a page table
+ * - The page cache
+ * - Filesystem private data
+ * - The LRU list
+ * - Pipes
+ * - Direct IO which references this page in the process address space
+ *
+ * Return: The number of references to this folio.
+ */
+static inline int folio_ref_count(const struct folio *folio)
+{
+	return page_ref_count(&folio->page);
+}
+
 static inline int page_count(const struct page *page)
 {
-	return atomic_read(&compound_head(page)->_refcount);
+	return folio_ref_count(page_folio(page));
 }
 
 static inline void set_page_count(struct page *page, int v)
@@ -79,6 +101,11 @@ static inline void set_page_count(struct page *page, int v)
 		__page_ref_set(page, v);
 }
 
+static inline void folio_set_count(struct folio *folio, int v)
+{
+	set_page_count(&folio->page, v);
+}
+
 /*
  * Setup the page count before being freed into the page allocator for
  * the first time (boot or memory hotplug)
@@ -95,6 +122,11 @@ static inline void page_ref_add(struct page *page, int nr)
 		__page_ref_mod(page, nr);
 }
 
+static inline void folio_ref_add(struct folio *folio, int nr)
+{
+	page_ref_add(&folio->page, nr);
+}
+
 static inline void page_ref_sub(struct page *page, int nr)
 {
 	atomic_sub(nr, &page->_refcount);
@@ -102,6 +134,11 @@ static inline void page_ref_sub(struct page *page, int nr)
 		__page_ref_mod(page, -nr);
 }
 
+static inline void folio_ref_sub(struct folio *folio, int nr)
+{
+	page_ref_sub(&folio->page, nr);
+}
+
 static inline int page_ref_sub_return(struct page *page, int nr)
 {
 	int ret = atomic_sub_return(nr, &page->_refcount);
@@ -111,6 +148,11 @@ static inline int page_ref_sub_return(struct page *page, int nr)
 	return ret;
 }
 
+static inline int folio_ref_sub_return(struct folio *folio, int nr)
+{
+	return page_ref_sub_return(&folio->page, nr);
+}
+
 static inline void page_ref_inc(struct page *page)
 {
 	atomic_inc(&page->_refcount);
@@ -118,6 +160,11 @@ static inline void page_ref_inc(struct page *page)
 		__page_ref_mod(page, 1);
 }
 
+static inline void folio_ref_inc(struct folio *folio)
+{
+	page_ref_inc(&folio->page);
+}
+
 static inline void page_ref_dec(struct page *page)
 {
 	atomic_dec(&page->_refcount);
@@ -125,6 +172,11 @@ static inline void page_ref_dec(struct page *page)
 		__page_ref_mod(page, -1);
 }
 
+static inline void folio_ref_dec(struct folio *folio)
+{
+	page_ref_dec(&folio->page);
+}
+
 static inline int page_ref_sub_and_test(struct page *page, int nr)
 {
 	int ret = atomic_sub_and_test(nr, &page->_refcount);
@@ -134,6 +186,11 @@ static inline int page_ref_sub_and_test(struct page *page, int nr)
 	return ret;
 }
 
+static inline int folio_ref_sub_and_test(struct folio *folio, int nr)
+{
+	return page_ref_sub_and_test(&folio->page, nr);
+}
+
 static inline int page_ref_inc_return(struct page *page)
 {
 	int ret = atomic_inc_return(&page->_refcount);
@@ -143,6 +200,11 @@ static inline int page_ref_inc_return(struct page *page)
 	return ret;
 }
 
+static inline int folio_ref_inc_return(struct folio *folio)
+{
+	return page_ref_inc_return(&folio->page);
+}
+
 static inline int page_ref_dec_and_test(struct page *page)
 {
 	int ret = atomic_dec_and_test(&page->_refcount);
@@ -152,6 +214,11 @@ static inline int page_ref_dec_and_test(struct page *page)
 	return ret;
 }
 
+static inline int folio_ref_dec_and_test(struct folio *folio)
+{
+	return page_ref_dec_and_test(&folio->page);
+}
+
 static inline int page_ref_dec_return(struct page *page)
 {
 	int ret = atomic_dec_return(&page->_refcount);
@@ -161,6 +228,11 @@ static inline int page_ref_dec_return(struct page *page)
 	return ret;
 }
 
+static inline int folio_ref_dec_return(struct folio *folio)
+{
+	return page_ref_dec_return(&folio->page);
+}
+
 static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 {
 	bool ret = atomic_add_unless(&page->_refcount, nr, u);
@@ -170,6 +242,11 @@ static inline bool page_ref_add_unless(struct page *page, int nr, int u)
 	return ret;
 }
 
+static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
+{
+	return page_ref_add_unless(&folio->page, nr, u);
+}
+
 static inline int page_ref_freeze(struct page *page, int count)
 {
 	int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
@@ -179,6 +256,11 @@ static inline int page_ref_freeze(struct page *page, int count)
 	return ret;
 }
 
+static inline int folio_ref_freeze(struct folio *folio, int count)
+{
+	return page_ref_freeze(&folio->page, count);
+}
+
 static inline void page_ref_unfreeze(struct page *page, int count)
 {
 	VM_BUG_ON_PAGE(page_count(page) != 0, page);
@@ -189,4 +271,8 @@ static inline void page_ref_unfreeze(struct page *page, int count)
 		__page_ref_unfreeze(page, count);
 }
 
+static inline void folio_ref_unfreeze(struct folio *folio, int count)
+{
+	page_ref_unfreeze(&folio->page, count);
+}
 #endif
-- 
cgit v1.2.3


From b620f63358cd35c8e9084ee9fc460153f64714f6 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 6 Dec 2020 23:04:57 -0500
Subject: mm: Add folio_put()

If we know we have a folio, we can call folio_put() instead of put_page()
and save the overhead of calling compound_head().  Also skips the
devmap checks.

This commit looks like it should be a no-op, but actually saves 684 bytes
of text with the distro-derived config that I'm testing.  Some functions
grow a little while others shrink.  I presume the compiler is making
different inlining decisions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm.h | 33 ++++++++++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3fc524f8c2a2..28a84d82247b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -748,6 +748,11 @@ static inline int put_page_testzero(struct page *page)
 	return page_ref_dec_and_test(page);
 }
 
+static inline int folio_put_testzero(struct folio *folio)
+{
+	return put_page_testzero(&folio->page);
+}
+
 /*
  * Try to grab a ref unless the page has a refcount of zero, return false if
  * that is the case.
@@ -1247,9 +1252,28 @@ static inline __must_check bool try_get_page(struct page *page)
 	return true;
 }
 
+/**
+ * folio_put - Decrement the reference count on a folio.
+ * @folio: The folio.
+ *
+ * If the folio's reference count reaches zero, the memory will be
+ * released back to the page allocator and may be used by another
+ * allocation immediately.  Do not access the memory or the struct folio
+ * after calling folio_put() unless you can be sure that it wasn't the
+ * last reference.
+ *
+ * Context: May be called in process or interrupt context, but not in NMI
+ * context.  May be called while holding a spinlock.
+ */
+static inline void folio_put(struct folio *folio)
+{
+	if (folio_put_testzero(folio))
+		__put_page(&folio->page);
+}
+
 static inline void put_page(struct page *page)
 {
-	page = compound_head(page);
+	struct folio *folio = page_folio(page);
 
 	/*
 	 * For devmap managed pages we need to catch refcount transition from
@@ -1257,13 +1281,12 @@ static inline void put_page(struct page *page)
 	 * need to inform the device driver through callback. See
 	 * include/linux/memremap.h and HMM for details.
 	 */
-	if (page_is_devmap_managed(page)) {
-		put_devmap_managed_page(page);
+	if (page_is_devmap_managed(&folio->page)) {
+		put_devmap_managed_page(&folio->page);
 		return;
 	}
 
-	if (put_page_testzero(page))
-		__put_page(page);
+	folio_put(folio);
 }
 
 /*
-- 
cgit v1.2.3


From 86d234cb0499c6466ccfc45f6501bc0cd4621c60 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 6 Dec 2020 23:04:57 -0500
Subject: mm: Add folio_get()

If we know we have a folio, we can call folio_get() instead
of get_page() and save the overhead of calling compound_head().
No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm.h | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28a84d82247b..63685d953ce0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1224,18 +1224,26 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 }
 
 /* 127: arbitrary random number, small enough to assemble well */
-#define page_ref_zero_or_close_to_overflow(page) \
-	((unsigned int) page_ref_count(page) + 127u <= 127u)
+#define folio_ref_zero_or_close_to_overflow(folio) \
+	((unsigned int) folio_ref_count(folio) + 127u <= 127u)
+
+/**
+ * folio_get - Increment the reference count on a folio.
+ * @folio: The folio.
+ *
+ * Context: May be called in any context, as long as you know that
+ * you have a refcount on the folio.  If you do not already have one,
+ * folio_try_get() may be the right interface for you to use.
+ */
+static inline void folio_get(struct folio *folio)
+{
+	VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
+	folio_ref_inc(folio);
+}
 
 static inline void get_page(struct page *page)
 {
-	page = compound_head(page);
-	/*
-	 * Getting a normal page or the head of a compound page
-	 * requires to already have an elevated page->_refcount.
-	 */
-	VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
-	page_ref_inc(page);
+	folio_get(page_folio(page));
 }
 
 bool __must_check try_grab_page(struct page *page, unsigned int flags);
-- 
cgit v1.2.3


From 020853b6f5ead2849b055e9873ff7267ce584256 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 10 May 2021 16:33:22 -0400
Subject: mm: Add folio_try_get_rcu()

This is the equivalent of page_cache_get_speculative().  Also add
folio_ref_try_add_rcu (the equivalent of page_cache_add_speculative)
and folio_get_unless_zero() (the equivalent of get_page_unless_zero()).

The new kernel-doc attempts to explain from the user's point of view
when to use folio_try_get_rcu() and when to use folio_get_unless_zero(),
because there seems to be some confusion currently between the users of
page_cache_get_speculative() and get_page_unless_zero().

Reimplement page_cache_add_speculative() and page_cache_get_speculative()
as wrappers around the folio equivalents, but leave get_page_unless_zero()
alone for now.  This commit reduces text size by 3 bytes due to slightly
different register allocation & instruction selections.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/page_ref.h | 66 +++++++++++++++++++++++++++++++++++++
 include/linux/pagemap.h  | 84 +++---------------------------------------------
 mm/filemap.c             | 20 ++++++++++++
 3 files changed, 90 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 717d53c9ddf1..2e677e6ad09f 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -247,6 +247,72 @@ static inline bool folio_ref_add_unless(struct folio *folio, int nr, int u)
 	return page_ref_add_unless(&folio->page, nr, u);
 }
 
+/**
+ * folio_try_get - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * If you do not already have a reference to a folio, you can attempt to
+ * get one using this function.  It may fail if, for example, the folio
+ * has been freed since you found a pointer to it, or it is frozen for
+ * the purposes of splitting or migration.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get(struct folio *folio)
+{
+	return folio_ref_add_unless(folio, 1, 0);
+}
+
+static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)
+{
+#ifdef CONFIG_TINY_RCU
+	/*
+	 * The caller guarantees the folio will not be freed from interrupt
+	 * context, so (on !SMP) we only need preemption to be disabled
+	 * and TINY_RCU does that for us.
+	 */
+# ifdef CONFIG_PREEMPT_COUNT
+	VM_BUG_ON(!in_atomic() && !irqs_disabled());
+# endif
+	VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);
+	folio_ref_add(folio, count);
+#else
+	if (unlikely(!folio_ref_add_unless(folio, count, 0))) {
+		/* Either the folio has been freed, or will be freed. */
+		return false;
+	}
+#endif
+	return true;
+}
+
+/**
+ * folio_try_get_rcu - Attempt to increase the refcount on a folio.
+ * @folio: The folio.
+ *
+ * This is a version of folio_try_get() optimised for non-SMP kernels.
+ * If you are still holding the rcu_read_lock() after looking up the
+ * page and know that the page cannot have its refcount decreased to
+ * zero in interrupt context, you can use this instead of folio_try_get().
+ *
+ * Example users include get_user_pages_fast() (as pages are not unmapped
+ * from interrupt context) and the page cache lookups (as pages are not
+ * truncated from interrupt context).  We also know that pages are not
+ * frozen in interrupt context for the purposes of splitting or migration.
+ *
+ * You can also use this function if you're holding a lock that prevents
+ * pages being frozen & removed; eg the i_pages lock for the page cache
+ * or the mmap_sem or page table lock for page tables.  In this case,
+ * it will always succeed, and you could have used a plain folio_get(),
+ * but it's sometimes more convenient to have a common function called
+ * from both locked and RCU-protected contexts.
+ *
+ * Return: True if the reference count was successfully incremented.
+ */
+static inline bool folio_try_get_rcu(struct folio *folio)
+{
+	return folio_ref_try_add_rcu(folio, 1);
+}
+
 static inline int page_ref_freeze(struct page *page, int count)
 {
 	int ret = likely(atomic_cmpxchg(&page->_refcount, count, 0) == count);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..f3ec26551082 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -172,91 +172,15 @@ static inline struct address_space *page_mapping_file(struct page *page)
 	return page_mapping(page);
 }
 
-/*
- * speculatively take a reference to a page.
- * If the page is free (_refcount == 0), then _refcount is untouched, and 0
- * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
- *
- * This function must be called inside the same rcu_read_lock() section as has
- * been used to lookup the page in the pagecache radix-tree (or page table):
- * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
- *
- * Unless an RCU grace period has passed, the count of all pages coming out
- * of the allocator must be considered unstable. page_count may return higher
- * than expected, and put_page must be able to do the right thing when the
- * page has been finished with, no matter what it is subsequently allocated
- * for (because put_page is what is used here to drop an invalid speculative
- * reference).
- *
- * This is the interesting part of the lockless pagecache (and lockless
- * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
- * has the following pattern:
- * 1. find page in radix tree
- * 2. conditionally increment refcount
- * 3. check the page is still in pagecache (if no, goto 1)
- *
- * Remove-side that cares about stability of _refcount (eg. reclaim) has the
- * following (with the i_pages lock held):
- * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
- * B. remove page from pagecache
- * C. free the page
- *
- * There are 2 critical interleavings that matter:
- * - 2 runs before A: in this case, A sees elevated refcount and bails out
- * - A runs before 2: in this case, 2 sees zero refcount and retries;
- *   subsequently, B will complete and 1 will find no page, causing the
- *   lookup to return NULL.
- *
- * It is possible that between 1 and 2, the page is removed then the exact same
- * page is inserted into the same position in pagecache. That's OK: the
- * old find_get_page using a lock could equally have run before or after
- * such a re-insertion, depending on order that locks are granted.
- *
- * Lookups racing against pagecache insertion isn't a big problem: either 1
- * will find the page or it will not. Likewise, the old find_get_page could run
- * either before the insertion or afterwards, depending on timing.
- */
-static inline int __page_cache_add_speculative(struct page *page, int count)
+static inline bool page_cache_add_speculative(struct page *page, int count)
 {
-#ifdef CONFIG_TINY_RCU
-# ifdef CONFIG_PREEMPT_COUNT
-	VM_BUG_ON(!in_atomic() && !irqs_disabled());
-# endif
-	/*
-	 * Preempt must be disabled here - we rely on rcu_read_lock doing
-	 * this for us.
-	 *
-	 * Pagecache won't be truncated from interrupt context, so if we have
-	 * found a page in the radix tree here, we have pinned its refcount by
-	 * disabling preempt, and hence no need for the "speculative get" that
-	 * SMP requires.
-	 */
-	VM_BUG_ON_PAGE(page_count(page) == 0, page);
-	page_ref_add(page, count);
-
-#else
-	if (unlikely(!page_ref_add_unless(page, count, 0))) {
-		/*
-		 * Either the page has been freed, or will be freed.
-		 * In either case, retry here and the caller should
-		 * do the right thing (see comments above).
-		 */
-		return 0;
-	}
-#endif
 	VM_BUG_ON_PAGE(PageTail(page), page);
-
-	return 1;
-}
-
-static inline int page_cache_get_speculative(struct page *page)
-{
-	return __page_cache_add_speculative(page, 1);
+	return folio_ref_try_add_rcu((struct folio *)page, count);
 }
 
-static inline int page_cache_add_speculative(struct page *page, int count)
+static inline bool page_cache_get_speculative(struct page *page)
 {
-	return __page_cache_add_speculative(page, count);
+	return page_cache_add_speculative(page, 1);
 }
 
 /**
diff --git a/mm/filemap.c b/mm/filemap.c
index dae481293b5d..9fcc3d94cfcd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1801,6 +1801,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 }
 EXPORT_SYMBOL(page_cache_prev_miss);
 
+/*
+ * Lockless page cache protocol:
+ * On the lookup side:
+ * 1. Load the folio from i_pages
+ * 2. Increment the refcount if it's not zero
+ * 3. If the folio is not found by xas_reload(), put the refcount and retry
+ *
+ * On the removal side:
+ * A. Freeze the page (by zeroing the refcount if nobody else has a reference)
+ * B. Remove the page from i_pages
+ * C. Return the page to the page allocator
+ *
+ * This means that any page may have its reference count temporarily
+ * increased by a speculative page cache (or fast GUP) lookup as it can
+ * be allocated by another user before the RCU grace period expires.
+ * Because the refcount temporarily acquired here may end up being the
+ * last refcount on the page, any page allocation must be freeable by
+ * folio_put().
+ */
+
 /*
  * mapping_get_entry - Get a page cache entry.
  * @mapping: the address_space to search
-- 
cgit v1.2.3


From d389a4a8115518e2cc232649b012b72113fe8b67 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 7 Dec 2020 15:42:09 -0500
Subject: mm: Add folio flag manipulation functions

These new functions are the folio analogues of the various PageFlags
functions.  If CONFIG_DEBUG_VM_PGFLAGS is enabled, we check the folio
is not a tail page at every invocation.  This will also catch the
PagePoisoned case as a poisoned page has every bit set, which would
include PageTail.

This saves 1684 bytes of text with the distro-derived config that
I'm testing due to removing a double call to compound_head() in
PageSwapCache().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/page-flags.h | 219 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 156 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e9b0723a637d..2491e84b8e52 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -143,6 +143,8 @@ enum pageflags {
 #endif
 	__NR_PAGEFLAGS,
 
+	PG_readahead = PG_reclaim,
+
 	/* Filesystems */
 	PG_checked = PG_owner_priv_1,
 
@@ -245,6 +247,15 @@ static inline void page_init_poison(struct page *page, size_t size)
 }
 #endif
 
+static unsigned long *folio_flags(struct folio *folio, unsigned n)
+{
+	struct page *page = &folio->page;
+
+	VM_BUG_ON_PGFLAGS(PageTail(page), page);
+	VM_BUG_ON_PGFLAGS(n > 0 && !test_bit(PG_head, &page->flags), page);
+	return &page[n].flags;
+}
+
 /*
  * Page flags policies wrt compound pages
  *
@@ -289,36 +300,64 @@ static inline void page_init_poison(struct page *page, size_t size)
 		VM_BUG_ON_PGFLAGS(!PageHead(page), page);		\
 		PF_POISONED_CHECK(&page[1]); })
 
+/* Which page is the flag stored in */
+#define FOLIO_PF_ANY		0
+#define FOLIO_PF_HEAD		0
+#define FOLIO_PF_ONLY_HEAD	0
+#define FOLIO_PF_NO_TAIL	0
+#define FOLIO_PF_NO_COMPOUND	0
+#define FOLIO_PF_SECOND		1
+
 /*
  * Macros to create function definitions for page flags
  */
 #define TESTPAGEFLAG(uname, lname, policy)				\
+static __always_inline bool folio_test_##lname(struct folio *folio)	\
+{ return test_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }	\
 static __always_inline int Page##uname(struct page *page)		\
-	{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
+{ return test_bit(PG_##lname, &policy(page, 0)->flags); }
 
 #define SETPAGEFLAG(uname, lname, policy)				\
+static __always_inline							\
+void folio_set_##lname(struct folio *folio)				\
+{ set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void SetPage##uname(struct page *page)		\
-	{ set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define CLEARPAGEFLAG(uname, lname, policy)				\
+static __always_inline							\
+void folio_clear_##lname(struct folio *folio)				\
+{ clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void ClearPage##uname(struct page *page)		\
-	{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __SETPAGEFLAG(uname, lname, policy)				\
+static __always_inline							\
+void __folio_set_##lname(struct folio *folio)				\
+{ __set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }		\
 static __always_inline void __SetPage##uname(struct page *page)		\
-	{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define __CLEARPAGEFLAG(uname, lname, policy)				\
+static __always_inline							\
+void __folio_clear_##lname(struct folio *folio)				\
+{ __clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); }	\
 static __always_inline void __ClearPage##uname(struct page *page)	\
-	{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ __clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTSETFLAG(uname, lname, policy)				\
+static __always_inline							\
+bool folio_test_set_##lname(struct folio *folio)			\
+{ return test_and_set_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
 static __always_inline int TestSetPage##uname(struct page *page)	\
-	{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_set_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define TESTCLEARFLAG(uname, lname, policy)				\
+static __always_inline							\
+bool folio_test_clear_##lname(struct folio *folio)			\
+{ return test_and_clear_bit(PG_##lname, folio_flags(folio, FOLIO_##policy)); } \
 static __always_inline int TestClearPage##uname(struct page *page)	\
-	{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
+{ return test_and_clear_bit(PG_##lname, &policy(page, 1)->flags); }
 
 #define PAGEFLAG(uname, lname, policy)					\
 	TESTPAGEFLAG(uname, lname, policy)				\
@@ -334,29 +373,37 @@ static __always_inline int TestClearPage##uname(struct page *page)	\
 	TESTSETFLAG(uname, lname, policy)				\
 	TESTCLEARFLAG(uname, lname, policy)
 
-#define TESTPAGEFLAG_FALSE(uname)					\
+#define TESTPAGEFLAG_FALSE(uname, lname)				\
+static inline bool folio_test_##lname(const struct folio *folio) { return 0; } \
 static inline int Page##uname(const struct page *page) { return 0; }
 
-#define SETPAGEFLAG_NOOP(uname)						\
+#define SETPAGEFLAG_NOOP(uname, lname)					\
+static inline void folio_set_##lname(struct folio *folio) { }		\
 static inline void SetPage##uname(struct page *page) {  }
 
-#define CLEARPAGEFLAG_NOOP(uname)					\
+#define CLEARPAGEFLAG_NOOP(uname, lname)				\
+static inline void folio_clear_##lname(struct folio *folio) { }		\
 static inline void ClearPage##uname(struct page *page) {  }
 
-#define __CLEARPAGEFLAG_NOOP(uname)					\
+#define __CLEARPAGEFLAG_NOOP(uname, lname)				\
+static inline void __folio_clear_##lname(struct folio *folio) { }	\
 static inline void __ClearPage##uname(struct page *page) {  }
 
-#define TESTSETFLAG_FALSE(uname)					\
+#define TESTSETFLAG_FALSE(uname, lname)					\
+static inline bool folio_test_set_##lname(struct folio *folio)		\
+{ return 0; }								\
 static inline int TestSetPage##uname(struct page *page) { return 0; }
 
-#define TESTCLEARFLAG_FALSE(uname)					\
+#define TESTCLEARFLAG_FALSE(uname, lname)				\
+static inline bool folio_test_clear_##lname(struct folio *folio)	\
+{ return 0; }								\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
-#define PAGEFLAG_FALSE(uname) TESTPAGEFLAG_FALSE(uname)			\
-	SETPAGEFLAG_NOOP(uname) CLEARPAGEFLAG_NOOP(uname)
+#define PAGEFLAG_FALSE(uname, lname) TESTPAGEFLAG_FALSE(uname, lname)	\
+	SETPAGEFLAG_NOOP(uname, lname) CLEARPAGEFLAG_NOOP(uname, lname)
 
-#define TESTSCFLAG_FALSE(uname)						\
-	TESTSETFLAG_FALSE(uname) TESTCLEARFLAG_FALSE(uname)
+#define TESTSCFLAG_FALSE(uname, lname)					\
+	TESTSETFLAG_FALSE(uname, lname) TESTCLEARFLAG_FALSE(uname, lname)
 
 __PAGEFLAG(Locked, locked, PF_NO_TAIL)
 PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD)
@@ -412,8 +459,8 @@ PAGEFLAG(MappedToDisk, mappedtodisk, PF_NO_TAIL)
 /* PG_readahead is only used for reads; PG_reclaim is only for writes */
 PAGEFLAG(Reclaim, reclaim, PF_NO_TAIL)
 	TESTCLEARFLAG(Reclaim, reclaim, PF_NO_TAIL)
-PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
-	TESTCLEARFLAG(Readahead, reclaim, PF_NO_COMPOUND)
+PAGEFLAG(Readahead, readahead, PF_NO_COMPOUND)
+	TESTCLEARFLAG(Readahead, readahead, PF_NO_COMPOUND)
 
 #ifdef CONFIG_HIGHMEM
 /*
@@ -422,22 +469,25 @@ PAGEFLAG(Readahead, reclaim, PF_NO_COMPOUND)
  */
 #define PageHighMem(__p) is_highmem_idx(page_zonenum(__p))
 #else
-PAGEFLAG_FALSE(HighMem)
+PAGEFLAG_FALSE(HighMem, highmem)
 #endif
 
 #ifdef CONFIG_SWAP
-static __always_inline int PageSwapCache(struct page *page)
+static __always_inline bool folio_test_swapcache(struct folio *folio)
 {
-#ifdef CONFIG_THP_SWAP
-	page = compound_head(page);
-#endif
-	return PageSwapBacked(page) && test_bit(PG_swapcache, &page->flags);
+	return folio_test_swapbacked(folio) &&
+			test_bit(PG_swapcache, folio_flags(folio, 0));
+}
 
+static __always_inline bool PageSwapCache(struct page *page)
+{
+	return folio_test_swapcache(page_folio(page));
 }
+
 SETPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
 CLEARPAGEFLAG(SwapCache, swapcache, PF_NO_TAIL)
 #else
-PAGEFLAG_FALSE(SwapCache)
+PAGEFLAG_FALSE(SwapCache, swapcache)
 #endif
 
 PAGEFLAG(Unevictable, unevictable, PF_HEAD)
@@ -449,14 +499,14 @@ PAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
 	__CLEARPAGEFLAG(Mlocked, mlocked, PF_NO_TAIL)
 	TESTSCFLAG(Mlocked, mlocked, PF_NO_TAIL)
 #else
-PAGEFLAG_FALSE(Mlocked) __CLEARPAGEFLAG_NOOP(Mlocked)
-	TESTSCFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked, mlocked) __CLEARPAGEFLAG_NOOP(Mlocked, mlocked)
+	TESTSCFLAG_FALSE(Mlocked, mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
 PAGEFLAG(Uncached, uncached, PF_NO_COMPOUND)
 #else
-PAGEFLAG_FALSE(Uncached)
+PAGEFLAG_FALSE(Uncached, uncached)
 #endif
 
 #ifdef CONFIG_MEMORY_FAILURE
@@ -465,7 +515,7 @@ TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
 extern bool take_page_off_buddy(struct page *page);
 #else
-PAGEFLAG_FALSE(HWPoison)
+PAGEFLAG_FALSE(HWPoison, hwpoison)
 #define __PG_HWPOISON 0
 #endif
 
@@ -479,7 +529,7 @@ PAGEFLAG(Idle, idle, PF_ANY)
 #ifdef CONFIG_KASAN_HW_TAGS
 PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD)
 #else
-PAGEFLAG_FALSE(SkipKASanPoison)
+PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
 #endif
 
 /*
@@ -517,10 +567,14 @@ static __always_inline int PageMappingFlags(struct page *page)
 	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) != 0;
 }
 
-static __always_inline int PageAnon(struct page *page)
+static __always_inline bool folio_test_anon(struct folio *folio)
+{
+	return ((unsigned long)folio->mapping & PAGE_MAPPING_ANON) != 0;
+}
+
+static __always_inline bool PageAnon(struct page *page)
 {
-	page = compound_head(page);
-	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
+	return folio_test_anon(page_folio(page));
 }
 
 static __always_inline int __PageMovable(struct page *page)
@@ -536,30 +590,32 @@ static __always_inline int __PageMovable(struct page *page)
  * is found in VM_MERGEABLE vmas.  It's a PageAnon page, pointing not to any
  * anon_vma, but to that page's node of the stable tree.
  */
-static __always_inline int PageKsm(struct page *page)
+static __always_inline bool folio_test_ksm(struct folio *folio)
 {
-	page = compound_head(page);
-	return ((unsigned long)page->mapping & PAGE_MAPPING_FLAGS) ==
+	return ((unsigned long)folio->mapping & PAGE_MAPPING_FLAGS) ==
 				PAGE_MAPPING_KSM;
 }
+
+static __always_inline bool PageKsm(struct page *page)
+{
+	return folio_test_ksm(page_folio(page));
+}
 #else
-TESTPAGEFLAG_FALSE(Ksm)
+TESTPAGEFLAG_FALSE(Ksm, ksm)
 #endif
 
 u64 stable_page_flags(struct page *page);
 
-static inline int PageUptodate(struct page *page)
+static inline bool folio_test_uptodate(struct folio *folio)
 {
-	int ret;
-	page = compound_head(page);
-	ret = test_bit(PG_uptodate, &(page)->flags);
+	bool ret = test_bit(PG_uptodate, folio_flags(folio, 0));
 	/*
-	 * Must ensure that the data we read out of the page is loaded
-	 * _after_ we've loaded page->flags to check for PageUptodate.
-	 * We can skip the barrier if the page is not uptodate, because
+	 * Must ensure that the data we read out of the folio is loaded
+	 * _after_ we've loaded folio->flags to check the uptodate bit.
+	 * We can skip the barrier if the folio is not uptodate, because
 	 * we wouldn't be reading anything from it.
 	 *
-	 * See SetPageUptodate() for the other side of the story.
+	 * See folio_mark_uptodate() for the other side of the story.
 	 */
 	if (ret)
 		smp_rmb();
@@ -567,23 +623,36 @@ static inline int PageUptodate(struct page *page)
 	return ret;
 }
 
-static __always_inline void __SetPageUptodate(struct page *page)
+static inline int PageUptodate(struct page *page)
+{
+	return folio_test_uptodate(page_folio(page));
+}
+
+static __always_inline void __folio_mark_uptodate(struct folio *folio)
 {
-	VM_BUG_ON_PAGE(PageTail(page), page);
 	smp_wmb();
-	__set_bit(PG_uptodate, &page->flags);
+	__set_bit(PG_uptodate, folio_flags(folio, 0));
 }
 
-static __always_inline void SetPageUptodate(struct page *page)
+static __always_inline void folio_mark_uptodate(struct folio *folio)
 {
-	VM_BUG_ON_PAGE(PageTail(page), page);
 	/*
 	 * Memory barrier must be issued before setting the PG_uptodate bit,
-	 * so that all previous stores issued in order to bring the page
-	 * uptodate are actually visible before PageUptodate becomes true.
+	 * so that all previous stores issued in order to bring the folio
+	 * uptodate are actually visible before folio_test_uptodate becomes true.
 	 */
 	smp_wmb();
-	set_bit(PG_uptodate, &page->flags);
+	set_bit(PG_uptodate, folio_flags(folio, 0));
+}
+
+static __always_inline void __SetPageUptodate(struct page *page)
+{
+	__folio_mark_uptodate((struct folio *)page);
+}
+
+static __always_inline void SetPageUptodate(struct page *page)
+{
+	folio_mark_uptodate((struct folio *)page);
 }
 
 CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
@@ -608,6 +677,17 @@ static inline void set_page_writeback_keepwrite(struct page *page)
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
+/* Whether there are one or multiple pages in a folio */
+static inline bool folio_test_single(struct folio *folio)
+{
+	return !folio_test_head(folio);
+}
+
+static inline bool folio_test_multi(struct folio *folio)
+{
+	return folio_test_head(folio);
+}
+
 static __always_inline void set_compound_head(struct page *page, struct page *head)
 {
 	WRITE_ONCE(page->compound_head, (unsigned long)head + 1);
@@ -631,12 +711,15 @@ static inline void ClearPageCompound(struct page *page)
 #ifdef CONFIG_HUGETLB_PAGE
 int PageHuge(struct page *page);
 int PageHeadHuge(struct page *page);
+static inline bool folio_test_hugetlb(struct folio *folio)
+{
+	return PageHeadHuge(&folio->page);
+}
 #else
-TESTPAGEFLAG_FALSE(Huge)
-TESTPAGEFLAG_FALSE(HeadHuge)
+TESTPAGEFLAG_FALSE(Huge, hugetlb)
+TESTPAGEFLAG_FALSE(HeadHuge, headhuge)
 #endif
 
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * PageHuge() only returns true for hugetlbfs pages, but not for
@@ -652,6 +735,11 @@ static inline int PageTransHuge(struct page *page)
 	return PageHead(page);
 }
 
+static inline bool folio_test_transhuge(struct folio *folio)
+{
+	return folio_test_head(folio);
+}
+
 /*
  * PageTransCompound returns true for both transparent huge pages
  * and hugetlbfs pages, so it should only be called when it's known
@@ -688,12 +776,12 @@ static inline int PageTransTail(struct page *page)
 PAGEFLAG(DoubleMap, double_map, PF_SECOND)
 	TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
 #else
-TESTPAGEFLAG_FALSE(TransHuge)
-TESTPAGEFLAG_FALSE(TransCompound)
-TESTPAGEFLAG_FALSE(TransCompoundMap)
-TESTPAGEFLAG_FALSE(TransTail)
-PAGEFLAG_FALSE(DoubleMap)
-	TESTSCFLAG_FALSE(DoubleMap)
+TESTPAGEFLAG_FALSE(TransHuge, transhuge)
+TESTPAGEFLAG_FALSE(TransCompound, transcompound)
+TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
+TESTPAGEFLAG_FALSE(TransTail, transtail)
+PAGEFLAG_FALSE(DoubleMap, double_map)
+	TESTSCFLAG_FALSE(DoubleMap, double_map)
 #endif
 
 /*
@@ -877,6 +965,11 @@ static inline int page_has_private(struct page *page)
 	return !!(page->flags & PAGE_FLAGS_PRIVATE);
 }
 
+static inline bool folio_has_private(struct folio *folio)
+{
+	return page_has_private(&folio->page);
+}
+
 #undef PF_ANY
 #undef PF_HEAD
 #undef PF_ONLY_HEAD
-- 
cgit v1.2.3


From 889a3747b3b7661b089ba4eae081a3b6bb351a23 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 25 Feb 2021 09:47:41 -0500
Subject: mm/lru: Add folio LRU functions

Handle arbitrary-order folios being added to the LRU.  By definition,
all pages being added to the LRU were already head or base pages, but
call page_folio() on them anyway to get the type right and avoid the
buried calls to compound_head().

Saves 783 bytes of kernel text; no functions grow.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Yu Zhao <yuzhao@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 Documentation/core-api/mm-api.rst |   1 +
 include/linux/mm_inline.h         | 103 +++++++++++++++++++++++++-------------
 include/trace/events/pagemap.h    |   2 +-
 3 files changed, 69 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 5c459ee2acce..971149f5d241 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -95,6 +95,7 @@ More Memory Management Functions
 .. kernel-doc:: mm/mempolicy.c
 .. kernel-doc:: include/linux/mm_types.h
    :internal:
+.. kernel-doc:: include/linux/mm_inline.h
 .. kernel-doc:: include/linux/page-flags.h
 .. kernel-doc:: include/linux/mm.h
    :internal:
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 355ea1ee32bd..e2ec68b0515c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -6,27 +6,33 @@
 #include <linux/swap.h>
 
 /**
- * page_is_file_lru - should the page be on a file LRU or anon LRU?
- * @page: the page to test
- *
- * Returns 1 if @page is a regular filesystem backed page cache page or a lazily
- * freed anonymous page (e.g. via MADV_FREE).  Returns 0 if @page is a normal
- * anonymous page, a tmpfs page or otherwise ram or swap backed page.  Used by
- * functions that manipulate the LRU lists, to sort a page onto the right LRU
- * list.
+ * folio_is_file_lru - Should the folio be on a file LRU or anon LRU?
+ * @folio: The folio to test.
  *
  * We would like to get this info without a page flag, but the state
- * needs to survive until the page is last deleted from the LRU, which
+ * needs to survive until the folio is last deleted from the LRU, which
  * could be as far down as __page_cache_release.
+ *
+ * Return: An integer (not a boolean!) used to sort a folio onto the
+ * right LRU list and to account folios correctly.
+ * 1 if @folio is a regular filesystem backed page cache folio
+ * or a lazily freed anonymous folio (e.g. via MADV_FREE).
+ * 0 if @folio is a normal anonymous folio, a tmpfs folio or otherwise
+ * ram or swap backed folio.
  */
+static inline int folio_is_file_lru(struct folio *folio)
+{
+	return !folio_test_swapbacked(folio);
+}
+
 static inline int page_is_file_lru(struct page *page)
 {
-	return !PageSwapBacked(page);
+	return folio_is_file_lru(page_folio(page));
 }
 
 static __always_inline void update_lru_size(struct lruvec *lruvec,
 				enum lru_list lru, enum zone_type zid,
-				int nr_pages)
+				long nr_pages)
 {
 	struct pglist_data *pgdat = lruvec_pgdat(lruvec);
 
@@ -39,69 +45,94 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
 }
 
 /**
- * __clear_page_lru_flags - clear page lru flags before releasing a page
- * @page: the page that was on lru and now has a zero reference
+ * __folio_clear_lru_flags - Clear page lru flags before releasing a page.
+ * @folio: The folio that was on lru and now has a zero reference.
  */
-static __always_inline void __clear_page_lru_flags(struct page *page)
+static __always_inline void __folio_clear_lru_flags(struct folio *folio)
 {
-	VM_BUG_ON_PAGE(!PageLRU(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_lru(folio), folio);
 
-	__ClearPageLRU(page);
+	__folio_clear_lru(folio);
 
 	/* this shouldn't happen, so leave the flags to bad_page() */
-	if (PageActive(page) && PageUnevictable(page))
+	if (folio_test_active(folio) && folio_test_unevictable(folio))
 		return;
 
-	__ClearPageActive(page);
-	__ClearPageUnevictable(page);
+	__folio_clear_active(folio);
+	__folio_clear_unevictable(folio);
+}
+
+static __always_inline void __clear_page_lru_flags(struct page *page)
+{
+	__folio_clear_lru_flags(page_folio(page));
 }
 
 /**
- * page_lru - which LRU list should a page be on?
- * @page: the page to test
+ * folio_lru_list - Which LRU list should a folio be on?
+ * @folio: The folio to test.
  *
- * Returns the LRU list a page should be on, as an index
+ * Return: The LRU list a folio should be on, as an index
  * into the array of LRU lists.
  */
-static __always_inline enum lru_list page_lru(struct page *page)
+static __always_inline enum lru_list folio_lru_list(struct folio *folio)
 {
 	enum lru_list lru;
 
-	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+	VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
 
-	if (PageUnevictable(page))
+	if (folio_test_unevictable(folio))
 		return LRU_UNEVICTABLE;
 
-	lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
-	if (PageActive(page))
+	lru = folio_is_file_lru(folio) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+	if (folio_test_active(folio))
 		lru += LRU_ACTIVE;
 
 	return lru;
 }
 
+static __always_inline
+void lruvec_add_folio(struct lruvec *lruvec, struct folio *folio)
+{
+	enum lru_list lru = folio_lru_list(folio);
+
+	update_lru_size(lruvec, lru, folio_zonenum(folio),
+			folio_nr_pages(folio));
+	list_add(&folio->lru, &lruvec->lists[lru]);
+}
+
 static __always_inline void add_page_to_lru_list(struct page *page,
 				struct lruvec *lruvec)
 {
-	enum lru_list lru = page_lru(page);
+	lruvec_add_folio(lruvec, page_folio(page));
+}
 
-	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
-	list_add(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_add_folio_tail(struct lruvec *lruvec, struct folio *folio)
+{
+	enum lru_list lru = folio_lru_list(folio);
+
+	update_lru_size(lruvec, lru, folio_zonenum(folio),
+			folio_nr_pages(folio));
+	list_add_tail(&folio->lru, &lruvec->lists[lru]);
 }
 
 static __always_inline void add_page_to_lru_list_tail(struct page *page,
 				struct lruvec *lruvec)
 {
-	enum lru_list lru = page_lru(page);
+	lruvec_add_folio_tail(lruvec, page_folio(page));
+}
 
-	update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
-	list_add_tail(&page->lru, &lruvec->lists[lru]);
+static __always_inline
+void lruvec_del_folio(struct lruvec *lruvec, struct folio *folio)
+{
+	list_del(&folio->lru);
+	update_lru_size(lruvec, folio_lru_list(folio), folio_zonenum(folio),
+			-folio_nr_pages(folio));
 }
 
 static __always_inline void del_page_from_lru_list(struct page *page,
 				struct lruvec *lruvec)
 {
-	list_del(&page->lru);
-	update_lru_size(lruvec, page_lru(page), page_zonenum(page),
-			-thp_nr_pages(page));
+	lruvec_del_folio(lruvec, page_folio(page));
 }
 #endif
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index 1d28431e85bd..92ad176210ff 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -41,7 +41,7 @@ TRACE_EVENT(mm_lru_insertion,
 	TP_fast_assign(
 		__entry->page	= page;
 		__entry->pfn	= page_to_pfn(page);
-		__entry->lru	= page_lru(page);
+		__entry->lru	= folio_lru_list(page_folio(page));
 		__entry->flags	= trace_pagemap_flags(page);
 	),
 
-- 
cgit v1.2.3


From 85d0a2ed3747da7f9aedacb72478bbedf06f9f2e Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 11 Jan 2021 10:04:40 -0500
Subject: mm: Handle per-folio private data

Add folio_get_private() which mirrors page_private() -- ie folio private
data is the same as page private data.  The only difference is that these
return a void * instead of an unsigned long, which matches the majority
of users.

Turn attach_page_private() into folio_attach_private() and reimplement
attach_page_private() as a wrapper.  No filesystem which uses page private
data currently supports compound pages, so we're free to define the rules.
attach_page_private() may only be called on a head page; if you want
to add private data to a tail page, you can call set_page_private()
directly (and shouldn't increment the page refcount!  That should be
done when adding private data to the head page / folio).

This saves 813 bytes of text with the distro-derived config that I'm
testing due to removing the calls to compound_head() in get_page()
& put_page().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm_types.h | 11 +++++++++++
 include/linux/pagemap.h  | 48 +++++++++++++++++++++++++++++-------------------
 2 files changed, 40 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6908186629b4..5ebcb86ac934 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -317,6 +317,12 @@ static inline atomic_t *compound_pincount_ptr(struct page *page)
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 
+/*
+ * page_private can be used on tail pages.  However, PagePrivate is only
+ * checked by the VM on the head page.  So page_private on the tail pages
+ * should be used for data that's ancillary to the head page (eg attaching
+ * buffer heads to tail pages after attaching buffer heads to the head page)
+ */
 #define page_private(page)		((page)->private)
 
 static inline void set_page_private(struct page *page, unsigned long private)
@@ -324,6 +330,11 @@ static inline void set_page_private(struct page *page, unsigned long private)
 	page->private = private;
 }
 
+static inline void *folio_get_private(struct folio *folio)
+{
+	return folio->private;
+}
+
 struct page_frag_cache {
 	void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index f3ec26551082..b0f5bf1cb540 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -184,42 +184,52 @@ static inline bool page_cache_get_speculative(struct page *page)
 }
 
 /**
- * attach_page_private - Attach private data to a page.
- * @page: Page to attach data to.
- * @data: Data to attach to page.
+ * folio_attach_private - Attach private data to a folio.
+ * @folio: Folio to attach data to.
+ * @data: Data to attach to folio.
  *
- * Attaching private data to a page increments the page's reference count.
- * The data must be detached before the page will be freed.
+ * Attaching private data to a folio increments the page's reference count.
+ * The data must be detached before the folio will be freed.
  */
-static inline void attach_page_private(struct page *page, void *data)
+static inline void folio_attach_private(struct folio *folio, void *data)
 {
-	get_page(page);
-	set_page_private(page, (unsigned long)data);
-	SetPagePrivate(page);
+	folio_get(folio);
+	folio->private = data;
+	folio_set_private(folio);
 }
 
 /**
- * detach_page_private - Detach private data from a page.
- * @page: Page to detach data from.
+ * folio_detach_private - Detach private data from a folio.
+ * @folio: Folio to detach data from.
  *
- * Removes the data that was previously attached to the page and decrements
+ * Removes the data that was previously attached to the folio and decrements
  * the refcount on the page.
  *
- * Return: Data that was attached to the page.
+ * Return: Data that was attached to the folio.
  */
-static inline void *detach_page_private(struct page *page)
+static inline void *folio_detach_private(struct folio *folio)
 {
-	void *data = (void *)page_private(page);
+	void *data = folio_get_private(folio);
 
-	if (!PagePrivate(page))
+	if (!folio_test_private(folio))
 		return NULL;
-	ClearPagePrivate(page);
-	set_page_private(page, 0);
-	put_page(page);
+	folio_clear_private(folio);
+	folio->private = NULL;
+	folio_put(folio);
 
 	return data;
 }
 
+static inline void attach_page_private(struct page *page, void *data)
+{
+	folio_attach_private(page_folio(page), data);
+}
+
+static inline void *detach_page_private(struct page *page)
+{
+	return folio_detach_private(page_folio(page));
+}
+
 #ifdef CONFIG_NUMA
 extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
-- 
cgit v1.2.3


From 9257e15677384d1ccdfe0619c4455e34cb0342c7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 15 Jan 2021 23:39:21 -0500
Subject: mm/filemap: Add folio_index(), folio_file_page() and folio_contains()

folio_index() is the equivalent of page_index() for folios.
folio_file_page() is the equivalent of find_subpage().
folio_contains() is the equivalent of thp_contains().

No changes to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h | 56 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b0f5bf1cb540..e7b46223239a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -386,6 +386,62 @@ static inline bool thp_contains(struct page *head, pgoff_t index)
 	return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
 }
 
+#define swapcache_index(folio)	__page_file_index(&(folio)->page)
+
+/**
+ * folio_index - File index of a folio.
+ * @folio: The folio.
+ *
+ * For a folio which is either in the page cache or the swap cache,
+ * return its index within the address_space it belongs to.  If you know
+ * the page is definitely in the page cache, you can look at the folio's
+ * index directly.
+ *
+ * Return: The index (offset in units of pages) of a folio in its file.
+ */
+static inline pgoff_t folio_index(struct folio *folio)
+{
+        if (unlikely(folio_test_swapcache(folio)))
+                return swapcache_index(folio);
+        return folio->index;
+}
+
+/**
+ * folio_file_page - The page for a particular index.
+ * @folio: The folio which contains this index.
+ * @index: The index we want to look up.
+ *
+ * Sometimes after looking up a folio in the page cache, we need to
+ * obtain the specific page for an index (eg a page fault).
+ *
+ * Return: The page containing the file data for this index.
+ */
+static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
+{
+	/* HugeTLBfs indexes the page cache in units of hpage_size */
+	if (folio_test_hugetlb(folio))
+		return &folio->page;
+	return folio_page(folio, index & (folio_nr_pages(folio) - 1));
+}
+
+/**
+ * folio_contains - Does this folio contain this index?
+ * @folio: The folio.
+ * @index: The page index within the file.
+ *
+ * Context: The caller should have the page locked in order to prevent
+ * (eg) shmem from moving the page between the page cache and swap cache
+ * and changing its index in the middle of the operation.
+ * Return: true or false.
+ */
+static inline bool folio_contains(struct folio *folio, pgoff_t index)
+{
+	/* HugeTLBfs indexes the page cache in units of hpage_size */
+	if (folio_test_hugetlb(folio))
+		return folio->index == index;
+	return index - folio_index(folio) < folio_nr_pages(folio);
+}
+
 /*
  * Given the page we found in the page cache, return the page corresponding
  * to this index in the file
-- 
cgit v1.2.3


From f94b18f6653ab6d3aa503213c2e1e79b18f05a30 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 21 Mar 2021 16:24:31 -0400
Subject: mm/filemap: Add folio_next_index()

This helper returns the page index of the next folio in the file (ie
the end of this folio, plus one).

No changes to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e7b46223239a..95e83a9e73c7 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -406,6 +406,17 @@ static inline pgoff_t folio_index(struct folio *folio)
         return folio->index;
 }
 
+/**
+ * folio_next_index - Get the index of the next folio.
+ * @folio: The current folio.
+ *
+ * Return: The index of the folio which follows this folio in the file.
+ */
+static inline pgoff_t folio_next_index(struct folio *folio)
+{
+	return folio->index + folio_nr_pages(folio);
+}
+
 /**
  * folio_file_page - The page for a particular index.
  * @folio: The folio which contains this index.
-- 
cgit v1.2.3


From 352b47a6984470954e1e262126b1da5312c40b09 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 24 Dec 2020 07:25:19 -0500
Subject: mm/filemap: Add folio_pos() and folio_file_pos()

These are just wrappers around page_offset() and page_file_offset()
respectively.  No change to generated code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/pagemap.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 95e83a9e73c7..3354117a1dae 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -561,6 +561,27 @@ static inline loff_t page_file_offset(struct page *page)
 	return ((loff_t)page_index(page)) << PAGE_SHIFT;
 }
 
+/**
+ * folio_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ */
+static inline loff_t folio_pos(struct folio *folio)
+{
+	return page_offset(&folio->page);
+}
+
+/**
+ * folio_file_pos - Returns the byte position of this folio in its file.
+ * @folio: The folio.
+ *
+ * This differs from folio_pos() for folios which belong to a swap file.
+ * NFS is the only filesystem today which needs to use folio_file_pos().
+ */
+static inline loff_t folio_file_pos(struct folio *folio)
+{
+	return page_file_offset(&folio->page);
+}
+
 extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
 				     unsigned long address);
 
-- 
cgit v1.2.3


From 2f52578f9c64d7d9a96ab81c243cc20804fabf2b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 10 Dec 2020 10:55:05 -0500
Subject: mm/util: Add folio_mapping() and folio_file_mapping()

These are the folio equivalent of page_mapping() and page_file_mapping().
Add an out-of-line page_mapping() wrapper around folio_mapping()
in order to prevent the page_folio() call from bloating every caller
of page_mapping().  Adjust page_file_mapping() and page_mapping_file()
to use folios internally.  Rename __page_file_mapping() to
swapcache_mapping() and change it to take a folio.

This ends up saving 122 bytes of text overall.  folio_mapping() is
45 bytes shorter than page_mapping() was, but the new page_mapping()
wrapper is 30 bytes.  The major reduction is a few bytes less in dozens
of nfs functions (which call page_file_mapping()).  Most of these appear
to be a slight change in gcc's register allocation decisions, which allow:

   48 8b 56 08         mov    0x8(%rsi),%rdx
   48 8d 42 ff         lea    -0x1(%rdx),%rax
   83 e2 01            and    $0x1,%edx
   48 0f 44 c6         cmove  %rsi,%rax

to become:

   48 8b 46 08         mov    0x8(%rsi),%rax
   48 8d 78 ff         lea    -0x1(%rax),%rdi
   a8 01               test   $0x1,%al
   48 0f 44 fe         cmove  %rsi,%rdi

for a reduction of a single byte.  Once the NFS client is converted to
use folios, this entire sequence will disappear.

Also add folio_mapping() documentation.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 Documentation/core-api/mm-api.rst |  2 ++
 include/linux/mm.h                | 14 --------------
 include/linux/pagemap.h           | 35 +++++++++++++++++++++++++++++++++--
 include/linux/swap.h              |  6 ++++++
 mm/Makefile                       |  2 +-
 mm/folio-compat.c                 | 13 +++++++++++++
 mm/swapfile.c                     |  8 ++++----
 mm/util.c                         | 30 ++++++++++++++++++------------
 8 files changed, 77 insertions(+), 33 deletions(-)
 create mode 100644 mm/folio-compat.c

(limited to 'include/linux')

diff --git a/Documentation/core-api/mm-api.rst b/Documentation/core-api/mm-api.rst
index 971149f5d241..395835f9289f 100644
--- a/Documentation/core-api/mm-api.rst
+++ b/Documentation/core-api/mm-api.rst
@@ -101,3 +101,5 @@ More Memory Management Functions
    :internal:
 .. kernel-doc:: include/linux/page_ref.h
 .. kernel-doc:: include/linux/mmzone.h
+.. kernel-doc:: mm/util.c
+   :functions: folio_mapping
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 63685d953ce0..bc5c38e1f780 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1755,19 +1755,6 @@ void page_address_init(void);
 
 extern void *page_rmapping(struct page *page);
 extern struct anon_vma *page_anon_vma(struct page *page);
-extern struct address_space *page_mapping(struct page *page);
-
-extern struct address_space *__page_file_mapping(struct page *);
-
-static inline
-struct address_space *page_file_mapping(struct page *page)
-{
-	if (unlikely(PageSwapCache(page)))
-		return __page_file_mapping(page);
-
-	return page->mapping;
-}
-
 extern pgoff_t __page_file_index(struct page *page);
 
 /*
@@ -1782,7 +1769,6 @@ static inline pgoff_t page_index(struct page *page)
 }
 
 bool page_mapped(struct page *page);
-struct address_space *page_mapping(struct page *page);
 
 /*
  * Return true only if the page has been allocated with
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3354117a1dae..0ca96a40fabe 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -162,14 +162,45 @@ static inline void filemap_nr_thps_dec(struct address_space *mapping)
 
 void release_pages(struct page **pages, int nr);
 
+struct address_space *page_mapping(struct page *);
+struct address_space *folio_mapping(struct folio *);
+struct address_space *swapcache_mapping(struct folio *);
+
+/**
+ * folio_file_mapping - Find the mapping this folio belongs to.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to.  Folios in the swap cache return the mapping of the
+ * swap file or swap device where the data is stored.  This is different
+ * from the mapping returned by folio_mapping().  The only reason to
+ * use it is if, like NFS, you return 0 from ->activate_swapfile.
+ *
+ * Do not call this for folios which aren't in the page cache or swap cache.
+ */
+static inline struct address_space *folio_file_mapping(struct folio *folio)
+{
+	if (unlikely(folio_test_swapcache(folio)))
+		return swapcache_mapping(folio);
+
+	return folio->mapping;
+}
+
+static inline struct address_space *page_file_mapping(struct page *page)
+{
+	return folio_file_mapping(page_folio(page));
+}
+
 /*
  * For file cache pages, return the address_space, otherwise return NULL
  */
 static inline struct address_space *page_mapping_file(struct page *page)
 {
-	if (unlikely(PageSwapCache(page)))
+	struct folio *folio = page_folio(page);
+
+	if (unlikely(folio_test_swapcache(folio)))
 		return NULL;
-	return page_mapping(page);
+	return folio_mapping(folio);
 }
 
 static inline bool page_cache_add_speculative(struct page *page, int count)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba52f3a3478e..85607c6c0cba 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -320,6 +320,12 @@ struct vma_swap_readahead {
 #endif
 };
 
+static inline swp_entry_t folio_swap_entry(struct folio *folio)
+{
+	swp_entry_t entry = { .val = page_private(&folio->page) };
+	return entry;
+}
+
 /* linux/mm/workingset.c */
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
diff --git a/mm/Makefile b/mm/Makefile
index fc60a40ce954..d6c0042e3aa0 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -46,7 +46,7 @@ mmu-$(CONFIG_MMU)	+= process_vm_access.o
 endif
 
 obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
-			   maccess.o page-writeback.o \
+			   maccess.o page-writeback.o folio-compat.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o percpu.o slab_common.o \
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
new file mode 100644
index 000000000000..5e107aa30a62
--- /dev/null
+++ b/mm/folio-compat.c
@@ -0,0 +1,13 @@
+/*
+ * Compatibility functions which bloat the callers too much to make inline.
+ * All of the callers of these functions should be converted to use folios
+ * eventually.
+ */
+
+#include <linux/pagemap.h>
+
+struct address_space *page_mapping(struct page *page)
+{
+	return folio_mapping(page_folio(page));
+}
+EXPORT_SYMBOL(page_mapping);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 22d10f713848..e3dcaeecc50f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3534,13 +3534,13 @@ struct swap_info_struct *page_swap_info(struct page *page)
 }
 
 /*
- * out-of-line __page_file_ methods to avoid include hell.
+ * out-of-line methods to avoid include hell.
  */
-struct address_space *__page_file_mapping(struct page *page)
+struct address_space *swapcache_mapping(struct folio *folio)
 {
-	return page_swap_info(page)->swap_file->f_mapping;
+	return page_swap_info(&folio->page)->swap_file->f_mapping;
 }
-EXPORT_SYMBOL_GPL(__page_file_mapping);
+EXPORT_SYMBOL_GPL(swapcache_mapping);
 
 pgoff_t __page_file_index(struct page *page)
 {
diff --git a/mm/util.c b/mm/util.c
index bacabe446906..6c1fe9bee30a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -705,30 +705,36 @@ struct anon_vma *page_anon_vma(struct page *page)
 	return __page_rmapping(page);
 }
 
-struct address_space *page_mapping(struct page *page)
+/**
+ * folio_mapping - Find the mapping where this folio is stored.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the mapping that this
+ * page belongs to.  Folios in the swap cache return the swap mapping
+ * this page is stored in (which is different from the mapping for the
+ * swap file or swap device where the data is stored).
+ *
+ * You can call this for folios which aren't in the swap cache or page
+ * cache and it will return NULL.
+ */
+struct address_space *folio_mapping(struct folio *folio)
 {
 	struct address_space *mapping;
 
-	page = compound_head(page);
-
 	/* This happens if someone calls flush_dcache_page on slab page */
-	if (unlikely(PageSlab(page)))
+	if (unlikely(folio_test_slab(folio)))
 		return NULL;
 
-	if (unlikely(PageSwapCache(page))) {
-		swp_entry_t entry;
-
-		entry.val = page_private(page);
-		return swap_address_space(entry);
-	}
+	if (unlikely(folio_test_swapcache(folio)))
+		return swap_address_space(folio_swap_entry(folio));
 
-	mapping = page->mapping;
+	mapping = folio->mapping;
 	if ((unsigned long)mapping & PAGE_MAPPING_ANON)
 		return NULL;
 
 	return (void *)((unsigned long)mapping & ~PAGE_MAPPING_FLAGS);
 }
-EXPORT_SYMBOL(page_mapping);
+EXPORT_SYMBOL(folio_mapping);
 
 /* Slow path of page_mapcount() for compound pages */
 int __page_mapcount(struct page *page)
-- 
cgit v1.2.3


From 4e1364286d0a2dd384bceb6db6185b99c0e2c0bc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 7 Dec 2020 15:44:35 -0500
Subject: mm/filemap: Add folio_unlock()

Convert unlock_page() to call folio_unlock().  By using a folio we
avoid a call to compound_head().  This shortens the function from 39
bytes to 25 and removes 4 instructions on x86-64.  Because we still
have unlock_page(), it's a net increase of 16 bytes of text for the
kernel as a whole, but any path that uses folio_unlock() will execute
4 fewer instructions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h |  3 ++-
 mm/filemap.c            | 29 ++++++++++++-----------------
 mm/folio-compat.c       |  6 ++++++
 3 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0ca96a40fabe..8087921641a3 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -657,7 +657,8 @@ extern int __lock_page_killable(struct page *page);
 extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				unsigned int flags);
-extern void unlock_page(struct page *page);
+void unlock_page(struct page *page);
+void folio_unlock(struct folio *folio);
 
 /*
  * Return true if the page was successfully locked
diff --git a/mm/filemap.c b/mm/filemap.c
index 9fcc3d94cfcd..191d7d39e838 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1490,29 +1490,24 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
 #endif
 
 /**
- * unlock_page - unlock a locked page
- * @page: the page
+ * folio_unlock - Unlock a locked folio.
+ * @folio: The folio.
  *
- * Unlocks the page and wakes up sleepers in wait_on_page_locked().
- * Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechanism between PageLocked pages and PageWriteback pages is shared.
- * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
+ * Unlocks the folio and wakes up any thread sleeping on the page lock.
  *
- * Note that this depends on PG_waiters being the sign bit in the byte
- * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
- * clear the PG_locked bit and test PG_waiters at the same time fairly
- * portably (architectures that do LL/SC can test any bit, while x86 can
- * test the sign bit).
+ * Context: May be called from interrupt or process context.  May not be
+ * called from NMI context.
  */
-void unlock_page(struct page *page)
+void folio_unlock(struct folio *folio)
 {
+	/* Bit 7 allows x86 to check the byte's sign bit */
 	BUILD_BUG_ON(PG_waiters != 7);
-	page = compound_head(page);
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
-		wake_up_page_bit(page, PG_locked);
+	BUILD_BUG_ON(PG_locked > 7);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
+		wake_up_page_bit(&folio->page, PG_locked);
 }
-EXPORT_SYMBOL(unlock_page);
+EXPORT_SYMBOL(folio_unlock);
 
 /**
  * end_page_private_2 - Clear PG_private_2 and release any waiters
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 5e107aa30a62..91b3d00a92f7 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -11,3 +11,9 @@ struct address_space *page_mapping(struct page *page)
 	return folio_mapping(page_folio(page));
 }
 EXPORT_SYMBOL(page_mapping);
+
+void unlock_page(struct page *page)
+{
+	return folio_unlock(page_folio(page));
+}
+EXPORT_SYMBOL(unlock_page);
-- 
cgit v1.2.3


From 7c23c782d5d57df97509ed2fc17f9b9490f18f1b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 1 Mar 2021 19:38:25 -0500
Subject: mm/filemap: Add folio_lock()

This is like lock_page() but for use by callers who know they have a folio.
Convert __lock_page() to be __folio_lock().  This saves one call to
compound_head() per contended call to lock_page().

Saves 455 bytes of text; mostly from improved register allocation and
inlining decisions.  __folio_lock is 59 bytes while __lock_page was 79.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h | 24 +++++++++++++++++++-----
 mm/filemap.c            | 29 +++++++++++++++--------------
 2 files changed, 34 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8087921641a3..6481a431ea40 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -652,7 +652,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
 	return true;
 }
 
-extern void __lock_page(struct page *page);
+void __folio_lock(struct folio *folio);
 extern int __lock_page_killable(struct page *page);
 extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
@@ -660,13 +660,24 @@ extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 void unlock_page(struct page *page);
 void folio_unlock(struct folio *folio);
 
+static inline bool folio_trylock(struct folio *folio)
+{
+	return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
+}
+
 /*
  * Return true if the page was successfully locked
  */
 static inline int trylock_page(struct page *page)
 {
-	page = compound_head(page);
-	return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
+	return folio_trylock(page_folio(page));
+}
+
+static inline void folio_lock(struct folio *folio)
+{
+	might_sleep();
+	if (!folio_trylock(folio))
+		__folio_lock(folio);
 }
 
 /*
@@ -674,9 +685,12 @@ static inline int trylock_page(struct page *page)
  */
 static inline void lock_page(struct page *page)
 {
+	struct folio *folio;
 	might_sleep();
-	if (!trylock_page(page))
-		__lock_page(page);
+
+	folio = page_folio(page);
+	if (!folio_trylock(folio))
+		__folio_lock(folio);
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index 191d7d39e838..a1d3f67e1b49 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1242,7 +1242,7 @@ static void wake_up_page(struct page *page, int bit)
  */
 enum behavior {
 	EXCLUSIVE,	/* Hold ref to page and take the bit when woken, like
-			 * __lock_page() waiting on then setting PG_locked.
+			 * __folio_lock() waiting on then setting PG_locked.
 			 */
 	SHARED,		/* Hold ref to page and check the bit when woken, like
 			 * wait_on_page_writeback() waiting on PG_writeback.
@@ -1633,17 +1633,16 @@ void page_endio(struct page *page, bool is_write, int err)
 EXPORT_SYMBOL_GPL(page_endio);
 
 /**
- * __lock_page - get a lock on the page, assuming we need to sleep to get it
- * @__page: the page to lock
+ * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
+ * @folio: The folio to lock
  */
-void __lock_page(struct page *__page)
+void __folio_lock(struct folio *folio)
 {
-	struct page *page = compound_head(__page);
-	wait_queue_head_t *q = page_waitqueue(page);
-	wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
+	wait_queue_head_t *q = page_waitqueue(&folio->page);
+	wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE,
 				EXCLUSIVE);
 }
-EXPORT_SYMBOL(__lock_page);
+EXPORT_SYMBOL(__folio_lock);
 
 int __lock_page_killable(struct page *__page)
 {
@@ -1718,10 +1717,10 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			return 0;
 		}
 	} else {
-		__lock_page(page);
+		__folio_lock(page_folio(page));
 	}
-	return 1;
 
+	return 1;
 }
 
 /**
@@ -2915,7 +2914,9 @@ unlock:
 static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
 				     struct file **fpin)
 {
-	if (trylock_page(page))
+	struct folio *folio = page_folio(page);
+
+	if (folio_trylock(folio))
 		return 1;
 
 	/*
@@ -2928,7 +2929,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
 
 	*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
 	if (vmf->flags & FAULT_FLAG_KILLABLE) {
-		if (__lock_page_killable(page)) {
+		if (__lock_page_killable(&folio->page)) {
 			/*
 			 * We didn't have the right flags to drop the mmap_lock,
 			 * but all fault_handlers only check for fatal signals
@@ -2940,11 +2941,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
 			return 0;
 		}
 	} else
-		__lock_page(page);
+		__folio_lock(folio);
+
 	return 1;
 }
 
-
 /*
  * Synchronous readahead happens when we don't even find a page in the page
  * cache at all.  We don't want to perform IO under the mmap sem, so if we have
-- 
cgit v1.2.3


From af7f29d9e1a7bda1429923327421367b69aa2e70 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Dec 2020 00:07:31 -0500
Subject: mm/filemap: Add folio_lock_killable()

This is like lock_page_killable() but for use by callers who
know they have a folio.  Convert __lock_page_killable() to be
__folio_lock_killable().  This saves one call to compound_head() per
contended call to lock_page_killable().

__folio_lock_killable() is 19 bytes smaller than __lock_page_killable()
was.  filemap_fault() shrinks by 74 bytes and __lock_page_or_retry()
shrinks by 71 bytes.  That's a total of 164 bytes of text saved.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/pagemap.h | 15 ++++++++++-----
 mm/filemap.c            | 17 +++++++++--------
 2 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6481a431ea40..cf0ebd8c9e86 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -653,7 +653,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
 }
 
 void __folio_lock(struct folio *folio);
-extern int __lock_page_killable(struct page *page);
+int __folio_lock_killable(struct folio *folio);
 extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				unsigned int flags);
@@ -693,6 +693,14 @@ static inline void lock_page(struct page *page)
 		__folio_lock(folio);
 }
 
+static inline int folio_lock_killable(struct folio *folio)
+{
+	might_sleep();
+	if (!folio_trylock(folio))
+		return __folio_lock_killable(folio);
+	return 0;
+}
+
 /*
  * lock_page_killable is like lock_page but can be interrupted by fatal
  * signals.  It returns 0 if it locked the page and -EINTR if it was
@@ -700,10 +708,7 @@ static inline void lock_page(struct page *page)
  */
 static inline int lock_page_killable(struct page *page)
 {
-	might_sleep();
-	if (!trylock_page(page))
-		return __lock_page_killable(page);
-	return 0;
+	return folio_lock_killable(page_folio(page));
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index a1d3f67e1b49..17803f785a34 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1644,14 +1644,13 @@ void __folio_lock(struct folio *folio)
 }
 EXPORT_SYMBOL(__folio_lock);
 
-int __lock_page_killable(struct page *__page)
+int __folio_lock_killable(struct folio *folio)
 {
-	struct page *page = compound_head(__page);
-	wait_queue_head_t *q = page_waitqueue(page);
-	return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
+	wait_queue_head_t *q = page_waitqueue(&folio->page);
+	return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE,
 					EXCLUSIVE);
 }
-EXPORT_SYMBOL_GPL(__lock_page_killable);
+EXPORT_SYMBOL_GPL(__folio_lock_killable);
 
 int __lock_page_async(struct page *page, struct wait_page_queue *wait)
 {
@@ -1693,6 +1692,8 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait)
 int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 			 unsigned int flags)
 {
+	struct folio *folio = page_folio(page);
+
 	if (fault_flag_allow_retry_first(flags)) {
 		/*
 		 * CAUTION! In this case, mmap_lock is not released
@@ -1711,13 +1712,13 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 	if (flags & FAULT_FLAG_KILLABLE) {
 		int ret;
 
-		ret = __lock_page_killable(page);
+		ret = __folio_lock_killable(folio);
 		if (ret) {
 			mmap_read_unlock(mm);
 			return 0;
 		}
 	} else {
-		__folio_lock(page_folio(page));
+		__folio_lock(folio);
 	}
 
 	return 1;
@@ -2929,7 +2930,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
 
 	*fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
 	if (vmf->flags & FAULT_FLAG_KILLABLE) {
-		if (__lock_page_killable(&folio->page)) {
+		if (__folio_lock_killable(folio)) {
 			/*
 			 * We didn't have the right flags to drop the mmap_lock,
 			 * but all fault_handlers only check for fatal signals
-- 
cgit v1.2.3


From ffdc8dabf20b1b894eda63e7ec9ca15ab0b7292c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 30 Dec 2020 17:58:40 -0500
Subject: mm/filemap: Add __folio_lock_async()

There aren't any actual callers of lock_page_async(), so remove it.
Convert filemap_update_page() to call __folio_lock_async().

__folio_lock_async() is 21 bytes smaller than __lock_page_async(),
but the real savings come from using a folio in filemap_update_page(),
shrinking it from 515 bytes to 404 bytes, saving 110 bytes.  The text
shrinks by 132 bytes in total.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 fs/io_uring.c           |  2 +-
 include/linux/pagemap.h | 17 -----------------
 mm/filemap.c            | 27 ++++++++++++++-------------
 3 files changed, 15 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 82f867983bb3..2c913698e428 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3364,7 +3364,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 }
 
 /*
- * This is our waitqueue callback handler, registered through lock_page_async()
+ * This is our waitqueue callback handler, registered through __folio_lock_async()
  * when we initially tried to do the IO with the iocb armed our waitqueue.
  * This gets called when the page is unlocked, and we generally expect that to
  * happen when the page IO is completed and the page is now uptodate. This will
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf0ebd8c9e86..5b5e8bd0b3fb 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -654,7 +654,6 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
 
 void __folio_lock(struct folio *folio);
 int __folio_lock_killable(struct folio *folio);
-extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
 extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				unsigned int flags);
 void unlock_page(struct page *page);
@@ -711,22 +710,6 @@ static inline int lock_page_killable(struct page *page)
 	return folio_lock_killable(page_folio(page));
 }
 
-/*
- * lock_page_async - Lock the page, unless this would block. If the page
- * is already locked, then queue a callback when the page becomes unlocked.
- * This callback can then retry the operation.
- *
- * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
- * was already locked and the callback defined in 'wait' was queued.
- */
-static inline int lock_page_async(struct page *page,
-				  struct wait_page_queue *wait)
-{
-	if (!trylock_page(page))
-		return __lock_page_async(page, wait);
-	return 0;
-}
-
 /*
  * lock_page_or_retry - Lock the page, unless this would block and the
  * caller indicated that it can handle a retry.
diff --git a/mm/filemap.c b/mm/filemap.c
index 17803f785a34..655145d27fff 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1652,18 +1652,18 @@ int __folio_lock_killable(struct folio *folio)
 }
 EXPORT_SYMBOL_GPL(__folio_lock_killable);
 
-int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
 {
-	struct wait_queue_head *q = page_waitqueue(page);
+	struct wait_queue_head *q = page_waitqueue(&folio->page);
 	int ret = 0;
 
-	wait->page = page;
+	wait->page = &folio->page;
 	wait->bit_nr = PG_locked;
 
 	spin_lock_irq(&q->lock);
 	__add_wait_queue_entry_tail(q, &wait->wait);
-	SetPageWaiters(page);
-	ret = !trylock_page(page);
+	folio_set_waiters(folio);
+	ret = !folio_trylock(folio);
 	/*
 	 * If we were successful now, we know we're still on the
 	 * waitqueue as we're still under the lock. This means it's
@@ -2436,6 +2436,7 @@ static int filemap_update_page(struct kiocb *iocb,
 		struct address_space *mapping, struct iov_iter *iter,
 		struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	int error;
 
 	if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2445,40 +2446,40 @@ static int filemap_update_page(struct kiocb *iocb,
 		filemap_invalidate_lock_shared(mapping);
 	}
 
-	if (!trylock_page(page)) {
+	if (!folio_trylock(folio)) {
 		error = -EAGAIN;
 		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
 			goto unlock_mapping;
 		if (!(iocb->ki_flags & IOCB_WAITQ)) {
 			filemap_invalidate_unlock_shared(mapping);
-			put_and_wait_on_page_locked(page, TASK_KILLABLE);
+			put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
 			return AOP_TRUNCATED_PAGE;
 		}
-		error = __lock_page_async(page, iocb->ki_waitq);
+		error = __folio_lock_async(folio, iocb->ki_waitq);
 		if (error)
 			goto unlock_mapping;
 	}
 
 	error = AOP_TRUNCATED_PAGE;
-	if (!page->mapping)
+	if (!folio->mapping)
 		goto unlock;
 
 	error = 0;
-	if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
+	if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
 		goto unlock;
 
 	error = -EAGAIN;
 	if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
 		goto unlock;
 
-	error = filemap_read_page(iocb->ki_filp, mapping, page);
+	error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
 	goto unlock_mapping;
 unlock:
-	unlock_page(page);
+	folio_unlock(folio);
 unlock_mapping:
 	filemap_invalidate_unlock_shared(mapping);
 	if (error == AOP_TRUNCATED_PAGE)
-		put_page(page);
+		folio_put(folio);
 	return error;
 }
 
-- 
cgit v1.2.3


From 6baa8d602e84d97a7541ed94ccaeb6a3f9763111 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 4 Mar 2021 10:21:02 -0500
Subject: mm/filemap: Add folio_wait_locked()

Also add folio_wait_locked_killable().  Turn wait_on_page_locked() and
wait_on_page_locked_killable() into wrappers.  This eliminates a call
to compound_head() from each call-site, reducing text size by 193 bytes
for me.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h | 26 ++++++++++++++++++--------
 mm/filemap.c            |  4 ++--
 2 files changed, 20 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5b5e8bd0b3fb..77dbb7f625af 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -732,23 +732,33 @@ extern void wait_on_page_bit(struct page *page, int bit_nr);
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
 
 /* 
- * Wait for a page to be unlocked.
+ * Wait for a folio to be unlocked.
  *
- * This must be called with the caller "holding" the page,
- * ie with increased "page->count" so that the page won't
+ * This must be called with the caller "holding" the folio,
+ * ie with increased "page->count" so that the folio won't
  * go away during the wait..
  */
+static inline void folio_wait_locked(struct folio *folio)
+{
+	if (folio_test_locked(folio))
+		wait_on_page_bit(&folio->page, PG_locked);
+}
+
+static inline int folio_wait_locked_killable(struct folio *folio)
+{
+	if (!folio_test_locked(folio))
+		return 0;
+	return wait_on_page_bit_killable(&folio->page, PG_locked);
+}
+
 static inline void wait_on_page_locked(struct page *page)
 {
-	if (PageLocked(page))
-		wait_on_page_bit(compound_head(page), PG_locked);
+	folio_wait_locked(page_folio(page));
 }
 
 static inline int wait_on_page_locked_killable(struct page *page)
 {
-	if (!PageLocked(page))
-		return 0;
-	return wait_on_page_bit_killable(compound_head(page), PG_locked);
+	return folio_wait_locked_killable(page_folio(page));
 }
 
 int put_and_wait_on_page_locked(struct page *page, int state);
diff --git a/mm/filemap.c b/mm/filemap.c
index 655145d27fff..b17b58c29ed7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1704,9 +1704,9 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 
 		mmap_read_unlock(mm);
 		if (flags & FAULT_FLAG_KILLABLE)
-			wait_on_page_locked_killable(page);
+			folio_wait_locked_killable(folio);
 		else
-			wait_on_page_locked(page);
+			folio_wait_locked(folio);
 		return 0;
 	}
 	if (flags & FAULT_FLAG_KILLABLE) {
-- 
cgit v1.2.3


From 9138e47ed425246102317dbe452f7f0d4a54c4a2 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 18 Mar 2021 21:39:45 -0400
Subject: mm/filemap: Add __folio_lock_or_retry()

Convert __lock_page_or_retry() to __folio_lock_or_retry().  This actually
saves 4 bytes in the only caller of lock_page_or_retry() (due to better
register allocation) and saves the 14 byte cost of calling page_folio()
in __folio_lock_or_retry() for a total saving of 18 bytes.  Also use
a bool for the return type.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h | 11 +++++++----
 mm/filemap.c            | 22 ++++++++++------------
 mm/memory.c             |  8 ++++----
 3 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 77dbb7f625af..7cf140f98910 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -654,7 +654,7 @@ static inline bool wake_page_match(struct wait_page_queue *wait_page,
 
 void __folio_lock(struct folio *folio);
 int __folio_lock_killable(struct folio *folio);
-extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
 				unsigned int flags);
 void unlock_page(struct page *page);
 void folio_unlock(struct folio *folio);
@@ -715,13 +715,16 @@ static inline int lock_page_killable(struct page *page)
  * caller indicated that it can handle a retry.
  *
  * Return value and mmap_lock implications depend on flags; see
- * __lock_page_or_retry().
+ * __folio_lock_or_retry().
  */
-static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
+static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
 				     unsigned int flags)
 {
+	struct folio *folio;
 	might_sleep();
-	return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
+
+	folio = page_folio(page);
+	return folio_trylock(folio) || __folio_lock_or_retry(folio, mm, flags);
 }
 
 /*
diff --git a/mm/filemap.c b/mm/filemap.c
index b17b58c29ed7..706d121ca9cc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1680,48 +1680,46 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
 
 /*
  * Return values:
- * 1 - page is locked; mmap_lock is still held.
- * 0 - page is not locked.
+ * true - folio is locked; mmap_lock is still held.
+ * false - folio is not locked.
  *     mmap_lock has been released (mmap_read_unlock(), unless flags had both
  *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
  *     which case mmap_lock is still held.
  *
- * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
- * with the page locked and the mmap_lock unperturbed.
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
+ * with the folio locked and the mmap_lock unperturbed.
  */
-int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
 			 unsigned int flags)
 {
-	struct folio *folio = page_folio(page);
-
 	if (fault_flag_allow_retry_first(flags)) {
 		/*
 		 * CAUTION! In this case, mmap_lock is not released
 		 * even though return 0.
 		 */
 		if (flags & FAULT_FLAG_RETRY_NOWAIT)
-			return 0;
+			return false;
 
 		mmap_read_unlock(mm);
 		if (flags & FAULT_FLAG_KILLABLE)
 			folio_wait_locked_killable(folio);
 		else
 			folio_wait_locked(folio);
-		return 0;
+		return false;
 	}
 	if (flags & FAULT_FLAG_KILLABLE) {
-		int ret;
+		bool ret;
 
 		ret = __folio_lock_killable(folio);
 		if (ret) {
 			mmap_read_unlock(mm);
-			return 0;
+			return false;
 		}
 	} else {
 		__folio_lock(folio);
 	}
 
-	return 1;
+	return true;
 }
 
 /**
diff --git a/mm/memory.c b/mm/memory.c
index adf9b9ef8277..269992ba3fa3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4258,7 +4258,7 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
  * We enter with non-exclusive mmap_lock (to exclude vma changes,
  * but allow concurrent faults).
  * The mmap_lock may have been released depending on flags and our
- * return value.  See filemap_fault() and __lock_page_or_retry().
+ * return value.  See filemap_fault() and __folio_lock_or_retry().
  * If mmap_lock is released, vma may become invalid (for example
  * by other thread calling munmap()).
  */
@@ -4499,7 +4499,7 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
  * concurrent faults).
  *
  * The mmap_lock may have been released depending on flags and our return value.
- * See filemap_fault() and __lock_page_or_retry().
+ * See filemap_fault() and __folio_lock_or_retry().
  */
 static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 {
@@ -4603,7 +4603,7 @@ unlock:
  * By the time we get here, we already hold the mm semaphore
  *
  * The mmap_lock may have been released depending on flags and our
- * return value.  See filemap_fault() and __lock_page_or_retry().
+ * return value.  See filemap_fault() and __folio_lock_or_retry().
  */
 static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
@@ -4759,7 +4759,7 @@ static inline void mm_account_fault(struct pt_regs *regs,
  * By the time we get here, we already hold the mm semaphore
  *
  * The mmap_lock may have been released depending on flags and our
- * return value.  See filemap_fault() and __lock_page_or_retry().
+ * return value.  See filemap_fault() and __folio_lock_or_retry().
  */
 vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 			   unsigned int flags, struct pt_regs *regs)
-- 
cgit v1.2.3


From 575ced1c8b0d3b578b933a68ce67ddaff3df9506 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Dec 2020 01:25:39 -0500
Subject: mm/swap: Add folio_rotate_reclaimable()

Convert rotate_reclaimable_page() to folio_rotate_reclaimable().  This
eliminates all five of the calls to compound_head() in this function,
saving 75 bytes at the cost of adding 15 bytes to its one caller,
end_page_writeback().  We also save 36 bytes from pagevec_move_tail_fn()
due to using folios there.  Net 96 bytes savings.

Also move its declaration to mm/internal.h as it's only used by filemap.c.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/swap.h |  1 -
 mm/filemap.c         |  3 ++-
 mm/internal.h        |  1 +
 mm/page_io.c         |  4 ++--
 mm/swap.c            | 30 ++++++++++++++++--------------
 5 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 85607c6c0cba..c7ecd3ad8e2e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -371,7 +371,6 @@ extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_cpu_zone(struct zone *zone);
 extern void lru_add_drain_all(void);
-extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
 extern void deactivate_page(struct page *page);
 extern void mark_page_lazyfree(struct page *page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 706d121ca9cc..6a5bdb4f7c73 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1584,8 +1584,9 @@ void end_page_writeback(struct page *page)
 	 * ever page writeback.
 	 */
 	if (PageReclaim(page)) {
+		struct folio *folio = page_folio(page);
 		ClearPageReclaim(page);
-		rotate_reclaimable_page(page);
+		folio_rotate_reclaimable(folio);
 	}
 
 	/*
diff --git a/mm/internal.h b/mm/internal.h
index cf3cb933eba3..1a84484f8650 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -35,6 +35,7 @@
 void page_writeback_init(void);
 
 vm_fault_t do_swap_page(struct vm_fault *vmf);
+void folio_rotate_reclaimable(struct folio *folio);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/page_io.c b/mm/page_io.c
index c493ce9ebcf5..d597bc6e6e45 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -38,7 +38,7 @@ void end_swap_bio_write(struct bio *bio)
 		 * Also print a dire warning that things will go BAD (tm)
 		 * very quickly.
 		 *
-		 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
+		 * Also clear PG_reclaim to avoid folio_rotate_reclaimable()
 		 */
 		set_page_dirty(page);
 		pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
@@ -317,7 +317,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 			 * temporary failure if the system has limited
 			 * memory for allocating transmit buffers.
 			 * Mark the page dirty and avoid
-			 * rotate_reclaimable_page but rate-limit the
+			 * folio_rotate_reclaimable but rate-limit the
 			 * messages but do not flag PageError like
 			 * the normal direct-to-bio case as it could
 			 * be temporary.
diff --git a/mm/swap.c b/mm/swap.c
index af3cad4e5378..0edbcb9c8876 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -206,11 +206,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 
 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
 {
-	if (!PageUnevictable(page)) {
-		del_page_from_lru_list(page, lruvec);
-		ClearPageActive(page);
-		add_page_to_lru_list_tail(page, lruvec);
-		__count_vm_events(PGROTATED, thp_nr_pages(page));
+	struct folio *folio = page_folio(page);
+
+	if (!folio_test_unevictable(folio)) {
+		lruvec_del_folio(lruvec, folio);
+		folio_clear_active(folio);
+		lruvec_add_folio_tail(lruvec, folio);
+		__count_vm_events(PGROTATED, folio_nr_pages(folio));
 	}
 }
 
@@ -227,23 +229,23 @@ static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page)
 }
 
 /*
- * Writeback is about to end against a page which has been marked for immediate
- * reclaim.  If it still appears to be reclaimable, move it to the tail of the
- * inactive list.
+ * Writeback is about to end against a folio which has been marked for
+ * immediate reclaim.  If it still appears to be reclaimable, move it
+ * to the tail of the inactive list.
  *
- * rotate_reclaimable_page() must disable IRQs, to prevent nasty races.
+ * folio_rotate_reclaimable() must disable IRQs, to prevent nasty races.
  */
-void rotate_reclaimable_page(struct page *page)
+void folio_rotate_reclaimable(struct folio *folio)
 {
-	if (!PageLocked(page) && !PageDirty(page) &&
-	    !PageUnevictable(page) && PageLRU(page)) {
+	if (!folio_test_locked(folio) && !folio_test_dirty(folio) &&
+	    !folio_test_unevictable(folio) && folio_test_lru(folio)) {
 		struct pagevec *pvec;
 		unsigned long flags;
 
-		get_page(page);
+		folio_get(folio);
 		local_lock_irqsave(&lru_rotate.lock, flags);
 		pvec = this_cpu_ptr(&lru_rotate.pvec);
-		if (pagevec_add_and_need_flush(pvec, page))
+		if (pagevec_add_and_need_flush(pvec, &folio->page))
 			pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
 		local_unlock_irqrestore(&lru_rotate.lock, flags);
 	}
-- 
cgit v1.2.3


From 4268b48077e55a93959f368aa9d3103ede5d3f0f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 3 Mar 2021 15:21:55 -0500
Subject: mm/filemap: Add folio_end_writeback()

Add an end_page_writeback() wrapper function for users that are not yet
converted to folios.

folio_end_writeback() is less than half the size of end_page_writeback()
at just 105 bytes compared to 228 bytes, due to removing all the
compound_head() calls.  The 30 byte wrapper function makes this a net
saving of 93 bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h |  3 ++-
 mm/filemap.c            | 43 +++++++++++++++++++++----------------------
 mm/folio-compat.c       |  6 ++++++
 3 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7cf140f98910..d9dcd335cc18 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -767,7 +767,8 @@ static inline int wait_on_page_locked_killable(struct page *page)
 int put_and_wait_on_page_locked(struct page *page, int state);
 void wait_on_page_writeback(struct page *page);
 int wait_on_page_writeback_killable(struct page *page);
-extern void end_page_writeback(struct page *page);
+void end_page_writeback(struct page *page);
+void folio_end_writeback(struct folio *folio);
 void wait_for_stable_page(struct page *page);
 
 void __set_page_dirty(struct page *, struct address_space *, int warn);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6a5bdb4f7c73..4e41bfdb6555 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1230,11 +1230,11 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 
-static void wake_up_page(struct page *page, int bit)
+static void folio_wake(struct folio *folio, int bit)
 {
-	if (!PageWaiters(page))
+	if (!folio_test_waiters(folio))
 		return;
-	wake_up_page_bit(page, bit);
+	wake_up_page_bit(&folio->page, bit);
 }
 
 /*
@@ -1571,39 +1571,38 @@ int wait_on_page_private_2_killable(struct page *page)
 EXPORT_SYMBOL(wait_on_page_private_2_killable);
 
 /**
- * end_page_writeback - end writeback against a page
- * @page: the page
+ * folio_end_writeback - End writeback against a folio.
+ * @folio: The folio.
  */
-void end_page_writeback(struct page *page)
+void folio_end_writeback(struct folio *folio)
 {
 	/*
-	 * TestClearPageReclaim could be used here but it is an atomic
-	 * operation and overkill in this particular case. Failing to
-	 * shuffle a page marked for immediate reclaim is too mild to
-	 * justify taking an atomic operation penalty at the end of
-	 * ever page writeback.
+	 * folio_test_clear_reclaim() could be used here but it is an
+	 * atomic operation and overkill in this particular case. Failing
+	 * to shuffle a folio marked for immediate reclaim is too mild
+	 * a gain to justify taking an atomic operation penalty at the
+	 * end of every folio writeback.
 	 */
-	if (PageReclaim(page)) {
-		struct folio *folio = page_folio(page);
-		ClearPageReclaim(page);
+	if (folio_test_reclaim(folio)) {
+		folio_clear_reclaim(folio);
 		folio_rotate_reclaimable(folio);
 	}
 
 	/*
-	 * Writeback does not hold a page reference of its own, relying
+	 * Writeback does not hold a folio reference of its own, relying
 	 * on truncation to wait for the clearing of PG_writeback.
-	 * But here we must make sure that the page is not freed and
-	 * reused before the wake_up_page().
+	 * But here we must make sure that the folio is not freed and
+	 * reused before the folio_wake().
 	 */
-	get_page(page);
-	if (!test_clear_page_writeback(page))
+	folio_get(folio);
+	if (!test_clear_page_writeback(&folio->page))
 		BUG();
 
 	smp_mb__after_atomic();
-	wake_up_page(page, PG_writeback);
-	put_page(page);
+	folio_wake(folio, PG_writeback);
+	folio_put(folio);
 }
-EXPORT_SYMBOL(end_page_writeback);
+EXPORT_SYMBOL(folio_end_writeback);
 
 /*
  * After completing I/O on a page, call this routine to update the page
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 91b3d00a92f7..526843d03d58 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -17,3 +17,9 @@ void unlock_page(struct page *page)
 	return folio_unlock(page_folio(page));
 }
 EXPORT_SYMBOL(unlock_page);
+
+void end_page_writeback(struct page *page)
+{
+	return folio_end_writeback(page_folio(page));
+}
+EXPORT_SYMBOL(end_page_writeback);
-- 
cgit v1.2.3


From 490e016f229a79dc7551e7f0e989d2304416c189 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 4 Mar 2021 11:09:17 -0500
Subject: mm/writeback: Add folio_wait_writeback()

wait_on_page_writeback_killable() only has one caller, so convert it to
call folio_wait_writeback_killable().  For the wait_on_page_writeback()
callers, add a compatibility wrapper around folio_wait_writeback().

Turning PageWriteback() into folio_test_writeback() eliminates a call
to compound_head() which saves 8 bytes and 15 bytes in the two
functions.  Unfortunately, that is more than offset by adding the
wait_on_page_writeback compatibility wrapper for a net increase in text
of 7 bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 fs/afs/write.c          |  9 +++++----
 include/linux/pagemap.h |  3 ++-
 mm/folio-compat.c       |  6 ++++++
 mm/page-writeback.c     | 48 +++++++++++++++++++++++++++++++++---------------
 4 files changed, 46 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 2dfe3b3a53d6..5103328e528d 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -861,7 +861,8 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  */
 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
-	struct page *page = thp_head(vmf->page);
+	struct folio *folio = page_folio(vmf->page);
+	struct page *page = &folio->page;
 	struct file *file = vmf->vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -884,7 +885,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 		goto out;
 #endif
 
-	if (wait_on_page_writeback_killable(page))
+	if (folio_wait_writeback_killable(folio))
 		goto out;
 
 	if (lock_page_killable(page) < 0)
@@ -894,8 +895,8 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	 * details the portion of the page we need to write back and we might
 	 * need to redirty the page if there's a problem.
 	 */
-	if (wait_on_page_writeback_killable(page) < 0) {
-		unlock_page(page);
+	if (folio_wait_writeback_killable(folio) < 0) {
+		folio_unlock(folio);
 		goto out;
 	}
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index d9dcd335cc18..e6013c063986 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -766,7 +766,8 @@ static inline int wait_on_page_locked_killable(struct page *page)
 
 int put_and_wait_on_page_locked(struct page *page, int state);
 void wait_on_page_writeback(struct page *page);
-int wait_on_page_writeback_killable(struct page *page);
+void folio_wait_writeback(struct folio *folio);
+int folio_wait_writeback_killable(struct folio *folio);
 void end_page_writeback(struct page *page);
 void folio_end_writeback(struct folio *folio);
 void wait_for_stable_page(struct page *page);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 526843d03d58..41275dac7a92 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -23,3 +23,9 @@ void end_page_writeback(struct page *page)
 	return folio_end_writeback(page_folio(page));
 }
 EXPORT_SYMBOL(end_page_writeback);
+
+void wait_on_page_writeback(struct page *page)
+{
+	return folio_wait_writeback(page_folio(page));
+}
+EXPORT_SYMBOL_GPL(wait_on_page_writeback);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4812a17b288c..c7f40c954217 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2873,33 +2873,51 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 }
 EXPORT_SYMBOL(__test_set_page_writeback);
 
-/*
- * Wait for a page to complete writeback
+/**
+ * folio_wait_writeback - Wait for a folio to finish writeback.
+ * @folio: The folio to wait for.
+ *
+ * If the folio is currently being written back to storage, wait for the
+ * I/O to complete.
+ *
+ * Context: Sleeps.  Must be called in process context and with
+ * no spinlocks held.  Caller should hold a reference on the folio.
+ * If the folio is not locked, writeback may start again after writeback
+ * has finished.
  */
-void wait_on_page_writeback(struct page *page)
+void folio_wait_writeback(struct folio *folio)
 {
-	while (PageWriteback(page)) {
-		trace_wait_on_page_writeback(page, page_mapping(page));
-		wait_on_page_bit(page, PG_writeback);
+	while (folio_test_writeback(folio)) {
+		trace_wait_on_page_writeback(&folio->page, folio_mapping(folio));
+		wait_on_page_bit(&folio->page, PG_writeback);
 	}
 }
-EXPORT_SYMBOL_GPL(wait_on_page_writeback);
+EXPORT_SYMBOL_GPL(folio_wait_writeback);
 
-/*
- * Wait for a page to complete writeback.  Returns -EINTR if we get a
- * fatal signal while waiting.
+/**
+ * folio_wait_writeback_killable - Wait for a folio to finish writeback.
+ * @folio: The folio to wait for.
+ *
+ * If the folio is currently being written back to storage, wait for the
+ * I/O to complete or a fatal signal to arrive.
+ *
+ * Context: Sleeps.  Must be called in process context and with
+ * no spinlocks held.  Caller should hold a reference on the folio.
+ * If the folio is not locked, writeback may start again after writeback
+ * has finished.
+ * Return: 0 on success, -EINTR if we get a fatal signal while waiting.
  */
-int wait_on_page_writeback_killable(struct page *page)
+int folio_wait_writeback_killable(struct folio *folio)
 {
-	while (PageWriteback(page)) {
-		trace_wait_on_page_writeback(page, page_mapping(page));
-		if (wait_on_page_bit_killable(page, PG_writeback))
+	while (folio_test_writeback(folio)) {
+		trace_wait_on_page_writeback(&folio->page, folio_mapping(folio));
+		if (wait_on_page_bit_killable(&folio->page, PG_writeback))
 			return -EINTR;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable);
+EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
 
 /**
  * wait_for_stable_page() - wait for writeback to finish, if necessary.
-- 
cgit v1.2.3


From a49d0c507759214a7cfd26555382c314db486792 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 4 Mar 2021 11:25:25 -0500
Subject: mm/writeback: Add folio_wait_stable()

Move wait_for_stable_page() into the folio compatibility file.
folio_wait_stable() avoids a call to compound_head() and is 14 bytes
smaller than wait_for_stable_page() was.  The net text size grows by 16
bytes as a result of this patch.  We can also remove thp_head() as this
was the last user.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/huge_mm.h | 15 ---------------
 include/linux/pagemap.h |  1 +
 mm/folio-compat.c       |  6 ++++++
 mm/page-writeback.c     | 24 ++++++++++++++----------
 4 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index f123e15d966e..f280f33ff223 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -250,15 +250,6 @@ static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
 		return NULL;
 }
 
-/**
- * thp_head - Head page of a transparent huge page.
- * @page: Any page (tail, head or regular) found in the page cache.
- */
-static inline struct page *thp_head(struct page *page)
-{
-	return compound_head(page);
-}
-
 /**
  * thp_order - Order of a transparent huge page.
  * @page: Head page of a transparent huge page.
@@ -336,12 +327,6 @@ static inline struct list_head *page_deferred_list(struct page *page)
 #define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; })
 #define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; })
 
-static inline struct page *thp_head(struct page *page)
-{
-	VM_BUG_ON_PGFLAGS(PageTail(page), page);
-	return page;
-}
-
 static inline unsigned int thp_order(struct page *page)
 {
 	VM_BUG_ON_PGFLAGS(PageTail(page), page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e6013c063986..ee39ad7b42f1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -771,6 +771,7 @@ int folio_wait_writeback_killable(struct folio *folio);
 void end_page_writeback(struct page *page);
 void folio_end_writeback(struct folio *folio);
 void wait_for_stable_page(struct page *page);
+void folio_wait_stable(struct folio *folio);
 
 void __set_page_dirty(struct page *, struct address_space *, int warn);
 int __set_page_dirty_nobuffers(struct page *page);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 41275dac7a92..3c83f03b80d7 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -29,3 +29,9 @@ void wait_on_page_writeback(struct page *page)
 	return folio_wait_writeback(page_folio(page));
 }
 EXPORT_SYMBOL_GPL(wait_on_page_writeback);
+
+void wait_for_stable_page(struct page *page)
+{
+	return folio_wait_stable(page_folio(page));
+}
+EXPORT_SYMBOL_GPL(wait_for_stable_page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c7f40c954217..0b0b7cd81a93 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2920,17 +2920,21 @@ int folio_wait_writeback_killable(struct folio *folio)
 EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
 
 /**
- * wait_for_stable_page() - wait for writeback to finish, if necessary.
- * @page:	The page to wait on.
+ * folio_wait_stable() - wait for writeback to finish, if necessary.
+ * @folio: The folio to wait on.
  *
- * This function determines if the given page is related to a backing device
- * that requires page contents to be held stable during writeback.  If so, then
- * it will wait for any pending writeback to complete.
+ * This function determines if the given folio is related to a backing
+ * device that requires folio contents to be held stable during writeback.
+ * If so, then it will wait for any pending writeback to complete.
+ *
+ * Context: Sleeps.  Must be called in process context and with
+ * no spinlocks held.  Caller should hold a reference on the folio.
+ * If the folio is not locked, writeback may start again after writeback
+ * has finished.
  */
-void wait_for_stable_page(struct page *page)
+void folio_wait_stable(struct folio *folio)
 {
-	page = thp_head(page);
-	if (page->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
-		wait_on_page_writeback(page);
+	if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
+		folio_wait_writeback(folio);
 }
-EXPORT_SYMBOL_GPL(wait_for_stable_page);
+EXPORT_SYMBOL_GPL(folio_wait_stable);
-- 
cgit v1.2.3


From 101c0bf67f50ca0e8b9da97b26f8dc7cb232b4d3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 4 Mar 2021 12:02:54 -0500
Subject: mm/filemap: Add folio_wait_bit()

Rename wait_on_page_bit() to folio_wait_bit().  We must always wait on
the folio, otherwise we won't be woken up due to the tail page hashing
to a different bucket from the head page.

This commit shrinks the kernel by 770 bytes, mostly due to moving
the page waitqueue lookup into folio_wait_bit_common().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/pagemap.h | 10 +++----
 mm/filemap.c            | 77 +++++++++++++++++++++++--------------------------
 mm/page-writeback.c     |  4 +--
 3 files changed, 43 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ee39ad7b42f1..2f481327dee8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -728,11 +728,11 @@ static inline bool lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
+ * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
  * and should not be used directly.
  */
-extern void wait_on_page_bit(struct page *page, int bit_nr);
-extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+void folio_wait_bit(struct folio *folio, int bit_nr);
+int folio_wait_bit_killable(struct folio *folio, int bit_nr);
 
 /* 
  * Wait for a folio to be unlocked.
@@ -744,14 +744,14 @@ extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
 static inline void folio_wait_locked(struct folio *folio)
 {
 	if (folio_test_locked(folio))
-		wait_on_page_bit(&folio->page, PG_locked);
+		folio_wait_bit(folio, PG_locked);
 }
 
 static inline int folio_wait_locked_killable(struct folio *folio)
 {
 	if (!folio_test_locked(folio))
 		return 0;
-	return wait_on_page_bit_killable(&folio->page, PG_locked);
+	return folio_wait_bit_killable(folio, PG_locked);
 }
 
 static inline void wait_on_page_locked(struct page *page)
diff --git a/mm/filemap.c b/mm/filemap.c
index 4e41bfdb6555..13663f0cfd51 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1157,7 +1157,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 	 *
 	 * So update the flags atomically, and wake up the waiter
 	 * afterwards to avoid any races. This store-release pairs
-	 * with the load-acquire in wait_on_page_bit_common().
+	 * with the load-acquire in folio_wait_bit_common().
 	 */
 	smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
 	wake_up_state(wait->private, mode);
@@ -1238,7 +1238,7 @@ static void folio_wake(struct folio *folio, int bit)
 }
 
 /*
- * A choice of three behaviors for wait_on_page_bit_common():
+ * A choice of three behaviors for folio_wait_bit_common():
  */
 enum behavior {
 	EXCLUSIVE,	/* Hold ref to page and take the bit when woken, like
@@ -1253,16 +1253,16 @@ enum behavior {
 };
 
 /*
- * Attempt to check (or get) the page bit, and mark us done
+ * Attempt to check (or get) the folio flag, and mark us done
  * if successful.
  */
-static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
 					struct wait_queue_entry *wait)
 {
 	if (wait->flags & WQ_FLAG_EXCLUSIVE) {
-		if (test_and_set_bit(bit_nr, &page->flags))
+		if (test_and_set_bit(bit_nr, &folio->flags))
 			return false;
-	} else if (test_bit(bit_nr, &page->flags))
+	} else if (test_bit(bit_nr, &folio->flags))
 		return false;
 
 	wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
@@ -1272,9 +1272,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
 /* How many times do we accept lock stealing from under a waiter? */
 int sysctl_page_lock_unfairness = 5;
 
-static inline int wait_on_page_bit_common(wait_queue_head_t *q,
-	struct page *page, int bit_nr, int state, enum behavior behavior)
+static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
+		int state, enum behavior behavior)
 {
+	wait_queue_head_t *q = page_waitqueue(&folio->page);
 	int unfairness = sysctl_page_lock_unfairness;
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
@@ -1283,8 +1284,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	unsigned long pflags;
 
 	if (bit_nr == PG_locked &&
-	    !PageUptodate(page) && PageWorkingset(page)) {
-		if (!PageSwapBacked(page)) {
+	    !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+		if (!folio_test_swapbacked(folio)) {
 			delayacct_thrashing_start();
 			delayacct = true;
 		}
@@ -1294,7 +1295,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
 	init_wait(wait);
 	wait->func = wake_page_function;
-	wait_page.page = page;
+	wait_page.page = &folio->page;
 	wait_page.bit_nr = bit_nr;
 
 repeat:
@@ -1309,7 +1310,7 @@ repeat:
 	 * Do one last check whether we can get the
 	 * page bit synchronously.
 	 *
-	 * Do the SetPageWaiters() marking before that
+	 * Do the folio_set_waiters() marking before that
 	 * to let any waker we _just_ missed know they
 	 * need to wake us up (otherwise they'll never
 	 * even go to the slow case that looks at the
@@ -1320,8 +1321,8 @@ repeat:
 	 * lock to avoid races.
 	 */
 	spin_lock_irq(&q->lock);
-	SetPageWaiters(page);
-	if (!trylock_page_bit_common(page, bit_nr, wait))
+	folio_set_waiters(folio);
+	if (!folio_trylock_flag(folio, bit_nr, wait))
 		__add_wait_queue_entry_tail(q, wait);
 	spin_unlock_irq(&q->lock);
 
@@ -1331,10 +1332,10 @@ repeat:
 	 * see whether the page bit testing has already
 	 * been done by the wake function.
 	 *
-	 * We can drop our reference to the page.
+	 * We can drop our reference to the folio.
 	 */
 	if (behavior == DROP)
-		put_page(page);
+		folio_put(folio);
 
 	/*
 	 * Note that until the "finish_wait()", or until
@@ -1371,7 +1372,7 @@ repeat:
 		 *
 		 * And if that fails, we'll have to retry this all.
 		 */
-		if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
+		if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0))))
 			goto repeat;
 
 		wait->flags |= WQ_FLAG_DONE;
@@ -1380,7 +1381,7 @@ repeat:
 
 	/*
 	 * If a signal happened, this 'finish_wait()' may remove the last
-	 * waiter from the wait-queues, but the PageWaiters bit will remain
+	 * waiter from the wait-queues, but the folio waiters bit will remain
 	 * set. That's ok. The next wakeup will take care of it, and trying
 	 * to do it here would be difficult and prone to races.
 	 */
@@ -1411,19 +1412,17 @@ repeat:
 	return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
 }
 
-void wait_on_page_bit(struct page *page, int bit_nr)
+void folio_wait_bit(struct folio *folio, int bit_nr)
 {
-	wait_queue_head_t *q = page_waitqueue(page);
-	wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
+	folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
 }
-EXPORT_SYMBOL(wait_on_page_bit);
+EXPORT_SYMBOL(folio_wait_bit);
 
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+int folio_wait_bit_killable(struct folio *folio, int bit_nr)
 {
-	wait_queue_head_t *q = page_waitqueue(page);
-	return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
+	return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED);
 }
-EXPORT_SYMBOL(wait_on_page_bit_killable);
+EXPORT_SYMBOL(folio_wait_bit_killable);
 
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
@@ -1440,11 +1439,8 @@ EXPORT_SYMBOL(wait_on_page_bit_killable);
  */
 int put_and_wait_on_page_locked(struct page *page, int state)
 {
-	wait_queue_head_t *q;
-
-	page = compound_head(page);
-	q = page_waitqueue(page);
-	return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
+	return folio_wait_bit_common(page_folio(page), PG_locked, state,
+			DROP);
 }
 
 /**
@@ -1538,9 +1534,10 @@ EXPORT_SYMBOL(end_page_private_2);
  */
 void wait_on_page_private_2(struct page *page)
 {
-	page = compound_head(page);
-	while (PagePrivate2(page))
-		wait_on_page_bit(page, PG_private_2);
+	struct folio *folio = page_folio(page);
+
+	while (folio_test_private_2(folio))
+		folio_wait_bit(folio, PG_private_2);
 }
 EXPORT_SYMBOL(wait_on_page_private_2);
 
@@ -1557,11 +1554,11 @@ EXPORT_SYMBOL(wait_on_page_private_2);
  */
 int wait_on_page_private_2_killable(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	int ret = 0;
 
-	page = compound_head(page);
-	while (PagePrivate2(page)) {
-		ret = wait_on_page_bit_killable(page, PG_private_2);
+	while (folio_test_private_2(folio)) {
+		ret = folio_wait_bit_killable(folio, PG_private_2);
 		if (ret < 0)
 			break;
 	}
@@ -1638,16 +1635,14 @@ EXPORT_SYMBOL_GPL(page_endio);
  */
 void __folio_lock(struct folio *folio)
 {
-	wait_queue_head_t *q = page_waitqueue(&folio->page);
-	wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_UNINTERRUPTIBLE,
+	folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE,
 				EXCLUSIVE);
 }
 EXPORT_SYMBOL(__folio_lock);
 
 int __folio_lock_killable(struct folio *folio)
 {
-	wait_queue_head_t *q = page_waitqueue(&folio->page);
-	return wait_on_page_bit_common(q, &folio->page, PG_locked, TASK_KILLABLE,
+	return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE,
 					EXCLUSIVE);
 }
 EXPORT_SYMBOL_GPL(__folio_lock_killable);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b0b7cd81a93..1d8f2ee2e065 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2889,7 +2889,7 @@ void folio_wait_writeback(struct folio *folio)
 {
 	while (folio_test_writeback(folio)) {
 		trace_wait_on_page_writeback(&folio->page, folio_mapping(folio));
-		wait_on_page_bit(&folio->page, PG_writeback);
+		folio_wait_bit(folio, PG_writeback);
 	}
 }
 EXPORT_SYMBOL_GPL(folio_wait_writeback);
@@ -2911,7 +2911,7 @@ int folio_wait_writeback_killable(struct folio *folio)
 {
 	while (folio_test_writeback(folio)) {
 		trace_wait_on_page_writeback(&folio->page, folio_mapping(folio));
-		if (wait_on_page_bit_killable(&folio->page, PG_writeback))
+		if (folio_wait_bit_killable(folio, PG_writeback))
 			return -EINTR;
 	}
 
-- 
cgit v1.2.3


From df4d4f12739495332e0d1f916ef4270f7d25d207 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 16 Jan 2021 11:22:14 -0500
Subject: mm/filemap: Convert page wait queues to be folios

Reinforce that page flags are actually in the head page by changing the
type from page to folio.  Increases the size of cachefiles by two bytes,
but the kernel core is unchanged in size.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 fs/cachefiles/rdwr.c    | 16 ++++++++--------
 include/linux/pagemap.h |  8 ++++----
 mm/filemap.c            | 38 +++++++++++++++++++-------------------
 3 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 8ffc40e84a59..fcf4f3b72923 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -25,20 +25,20 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
 	struct cachefiles_object *object;
 	struct fscache_retrieval *op = monitor->op;
 	struct wait_page_key *key = _key;
-	struct page *page = wait->private;
+	struct folio *folio = wait->private;
 
 	ASSERT(key);
 
 	_enter("{%lu},%u,%d,{%p,%u}",
 	       monitor->netfs_page->index, mode, sync,
-	       key->page, key->bit_nr);
+	       key->folio, key->bit_nr);
 
-	if (key->page != page || key->bit_nr != PG_locked)
+	if (key->folio != folio || key->bit_nr != PG_locked)
 		return 0;
 
-	_debug("--- monitor %p %lx ---", page, page->flags);
+	_debug("--- monitor %p %lx ---", folio, folio->flags);
 
-	if (!PageUptodate(page) && !PageError(page)) {
+	if (!folio_test_uptodate(folio) && !folio_test_error(folio)) {
 		/* unlocked, not uptodate and not erronous? */
 		_debug("page probably truncated");
 	}
@@ -107,7 +107,7 @@ static int cachefiles_read_reissue(struct cachefiles_object *object,
 	put_page(backpage2);
 
 	INIT_LIST_HEAD(&monitor->op_link);
-	add_page_wait_queue(backpage, &monitor->monitor);
+	folio_add_wait_queue(page_folio(backpage), &monitor->monitor);
 
 	if (trylock_page(backpage)) {
 		ret = -EIO;
@@ -294,7 +294,7 @@ monitor_backing_page:
 	get_page(backpage);
 	monitor->back_page = backpage;
 	monitor->monitor.private = backpage;
-	add_page_wait_queue(backpage, &monitor->monitor);
+	folio_add_wait_queue(page_folio(backpage), &monitor->monitor);
 	monitor = NULL;
 
 	/* but the page may have been read before the monitor was installed, so
@@ -548,7 +548,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
 		get_page(backpage);
 		monitor->back_page = backpage;
 		monitor->monitor.private = backpage;
-		add_page_wait_queue(backpage, &monitor->monitor);
+		folio_add_wait_queue(page_folio(backpage), &monitor->monitor);
 		monitor = NULL;
 
 		/* but the page may have been read before the monitor was
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2f481327dee8..ebc62e9e453b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -628,13 +628,13 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 }
 
 struct wait_page_key {
-	struct page *page;
+	struct folio *folio;
 	int bit_nr;
 	int page_match;
 };
 
 struct wait_page_queue {
-	struct page *page;
+	struct folio *folio;
 	int bit_nr;
 	wait_queue_entry_t wait;
 };
@@ -642,7 +642,7 @@ struct wait_page_queue {
 static inline bool wake_page_match(struct wait_page_queue *wait_page,
 				  struct wait_page_key *key)
 {
-	if (wait_page->page != key->page)
+	if (wait_page->folio != key->folio)
 	       return false;
 	key->page_match = 1;
 
@@ -802,7 +802,7 @@ int wait_on_page_private_2_killable(struct page *page);
 /*
  * Add an arbitrary waiter to a page's wait queue
  */
-extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
 
 /*
  * Fault everything in given userspace address range in.
diff --git a/mm/filemap.c b/mm/filemap.c
index a896348ff4de..1f8c00c2a4b7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1074,11 +1074,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two);
  */
 #define PAGE_WAIT_TABLE_BITS 8
 #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
-static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
 
-static wait_queue_head_t *page_waitqueue(struct page *page)
+static wait_queue_head_t *folio_waitqueue(struct folio *folio)
 {
-	return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
+	return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
 }
 
 void __init pagecache_init(void)
@@ -1086,7 +1086,7 @@ void __init pagecache_init(void)
 	int i;
 
 	for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
-		init_waitqueue_head(&page_wait_table[i]);
+		init_waitqueue_head(&folio_wait_table[i]);
 
 	page_writeback_init();
 }
@@ -1141,10 +1141,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 	 */
 	flags = wait->flags;
 	if (flags & WQ_FLAG_EXCLUSIVE) {
-		if (test_bit(key->bit_nr, &key->page->flags))
+		if (test_bit(key->bit_nr, &key->folio->flags))
 			return -1;
 		if (flags & WQ_FLAG_CUSTOM) {
-			if (test_and_set_bit(key->bit_nr, &key->page->flags))
+			if (test_and_set_bit(key->bit_nr, &key->folio->flags))
 				return -1;
 			flags |= WQ_FLAG_DONE;
 		}
@@ -1178,12 +1178,12 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
 
 static void folio_wake_bit(struct folio *folio, int bit_nr)
 {
-	wait_queue_head_t *q = page_waitqueue(&folio->page);
+	wait_queue_head_t *q = folio_waitqueue(folio);
 	struct wait_page_key key;
 	unsigned long flags;
 	wait_queue_entry_t bookmark;
 
-	key.page = &folio->page;
+	key.folio = folio;
 	key.bit_nr = bit_nr;
 	key.page_match = 0;
 
@@ -1275,7 +1275,7 @@ int sysctl_page_lock_unfairness = 5;
 static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 		int state, enum behavior behavior)
 {
-	wait_queue_head_t *q = page_waitqueue(&folio->page);
+	wait_queue_head_t *q = folio_waitqueue(folio);
 	int unfairness = sysctl_page_lock_unfairness;
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
@@ -1295,7 +1295,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
 
 	init_wait(wait);
 	wait->func = wake_page_function;
-	wait_page.page = &folio->page;
+	wait_page.folio = folio;
 	wait_page.bit_nr = bit_nr;
 
 repeat:
@@ -1444,23 +1444,23 @@ int put_and_wait_on_page_locked(struct page *page, int state)
 }
 
 /**
- * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page: Page defining the wait queue of interest
+ * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue
+ * @folio: Folio defining the wait queue of interest
  * @waiter: Waiter to add to the queue
  *
- * Add an arbitrary @waiter to the wait queue for the nominated @page.
+ * Add an arbitrary @waiter to the wait queue for the nominated @folio.
  */
-void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
 {
-	wait_queue_head_t *q = page_waitqueue(page);
+	wait_queue_head_t *q = folio_waitqueue(folio);
 	unsigned long flags;
 
 	spin_lock_irqsave(&q->lock, flags);
 	__add_wait_queue_entry_tail(q, waiter);
-	SetPageWaiters(page);
+	folio_set_waiters(folio);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
-EXPORT_SYMBOL_GPL(add_page_wait_queue);
+EXPORT_SYMBOL_GPL(folio_add_wait_queue);
 
 #ifndef clear_bit_unlock_is_negative_byte
 
@@ -1650,10 +1650,10 @@ EXPORT_SYMBOL_GPL(__folio_lock_killable);
 
 static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
 {
-	struct wait_queue_head *q = page_waitqueue(&folio->page);
+	struct wait_queue_head *q = folio_waitqueue(folio);
 	int ret = 0;
 
-	wait->page = &folio->page;
+	wait->folio = folio;
 	wait->bit_nr = PG_locked;
 
 	spin_lock_irq(&q->lock);
-- 
cgit v1.2.3


From b47393f8448ade8bafe09ed302ce2a15093e9718 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 22 Apr 2021 22:58:32 -0400
Subject: mm/filemap: Add folio private_2 functions

end_page_private_2() becomes folio_end_private_2(),
wait_on_page_private_2() becomes folio_wait_private_2() and
wait_on_page_private_2_killable() becomes folio_wait_private_2_killable().

Adjust the fscache equivalents to call page_folio() before calling these
functions to avoid adding wrappers.  Ends up costing 1 byte of text
in ceph & netfs, but the core shrinks by three calls to page_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/netfs.h   |  6 +++---
 include/linux/pagemap.h |  6 +++---
 mm/filemap.c            | 41 ++++++++++++++++++-----------------------
 3 files changed, 24 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5d6a4158a9a6..3d4cbf2f7dc4 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -55,7 +55,7 @@ static inline void set_page_fscache(struct page *page)
  */
 static inline void end_page_fscache(struct page *page)
 {
-	end_page_private_2(page);
+	folio_end_private_2(page_folio(page));
 }
 
 /**
@@ -66,7 +66,7 @@ static inline void end_page_fscache(struct page *page)
  */
 static inline void wait_on_page_fscache(struct page *page)
 {
-	wait_on_page_private_2(page);
+	folio_wait_private_2(page_folio(page));
 }
 
 /**
@@ -82,7 +82,7 @@ static inline void wait_on_page_fscache(struct page *page)
  */
 static inline int wait_on_page_fscache_killable(struct page *page)
 {
-	return wait_on_page_private_2_killable(page);
+	return folio_wait_private_2_killable(page_folio(page));
 }
 
 enum netfs_read_source {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index ebc62e9e453b..05f91bfc048d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -795,9 +795,9 @@ static inline void set_page_private_2(struct page *page)
 	SetPagePrivate2(page);
 }
 
-void end_page_private_2(struct page *page);
-void wait_on_page_private_2(struct page *page);
-int wait_on_page_private_2_killable(struct page *page);
+void folio_end_private_2(struct folio *folio);
+void folio_wait_private_2(struct folio *folio);
+int folio_wait_private_2_killable(struct folio *folio);
 
 /*
  * Add an arbitrary waiter to a page's wait queue
diff --git a/mm/filemap.c b/mm/filemap.c
index 1f8c00c2a4b7..d74be9fb3aa2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1506,56 +1506,51 @@ void folio_unlock(struct folio *folio)
 EXPORT_SYMBOL(folio_unlock);
 
 /**
- * end_page_private_2 - Clear PG_private_2 and release any waiters
- * @page: The page
+ * folio_end_private_2 - Clear PG_private_2 and wake any waiters.
+ * @folio: The folio.
  *
- * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
- * this.  The page ref held for PG_private_2 being set is released.
+ * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for
+ * it.  The folio reference held for PG_private_2 being set is released.
  *
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
+ * This is, for example, used when a netfs folio is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same folio to be
  * serialised.
  */
-void end_page_private_2(struct page *page)
+void folio_end_private_2(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-
 	VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio);
 	clear_bit_unlock(PG_private_2, folio_flags(folio, 0));
 	folio_wake_bit(folio, PG_private_2);
 	folio_put(folio);
 }
-EXPORT_SYMBOL(end_page_private_2);
+EXPORT_SYMBOL(folio_end_private_2);
 
 /**
- * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
  *
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio.
  */
-void wait_on_page_private_2(struct page *page)
+void folio_wait_private_2(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-
 	while (folio_test_private_2(folio))
 		folio_wait_bit(folio, PG_private_2);
 }
-EXPORT_SYMBOL(wait_on_page_private_2);
+EXPORT_SYMBOL(folio_wait_private_2);
 
 /**
- * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
  *
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a
  * fatal signal is received by the calling task.
  *
  * Return:
  * - 0 if successful.
  * - -EINTR if a fatal signal was encountered.
  */
-int wait_on_page_private_2_killable(struct page *page)
+int folio_wait_private_2_killable(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
 	int ret = 0;
 
 	while (folio_test_private_2(folio)) {
@@ -1566,7 +1561,7 @@ int wait_on_page_private_2_killable(struct page *page)
 
 	return ret;
 }
-EXPORT_SYMBOL(wait_on_page_private_2_killable);
+EXPORT_SYMBOL(folio_wait_private_2_killable);
 
 /**
  * folio_end_writeback - End writeback against a folio.
-- 
cgit v1.2.3


From 6abbaa5b01730a1f0883d199cf5a90ae5c5dccea Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 27 Apr 2021 14:24:30 -0400
Subject: fs/netfs: Add folio fscache functions

Match the page writeback functions by adding
folio_start_fscache(), folio_end_fscache(), folio_wait_fscache() and
folio_wait_fscache_killable().  Remove set_page_private_2().  Also rewrite
the kernel-doc to describe when to use the function rather than what the
function does, and include the kernel-doc in the appropriate rst file.
Saves 31 bytes of text in netfs_rreq_unlock() due to set_page_fscache()
calling page_folio() once instead of three times.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 Documentation/filesystems/netfs_library.rst |  2 +
 include/linux/netfs.h                       | 75 ++++++++++++++++++-----------
 include/linux/pagemap.h                     | 16 ------
 3 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst
index 57a641847818..bb68d39f03b7 100644
--- a/Documentation/filesystems/netfs_library.rst
+++ b/Documentation/filesystems/netfs_library.rst
@@ -524,3 +524,5 @@ Note that these methods are passed a pointer to the cache resource structure,
 not the read request structure as they could be used in other situations where
 there isn't a read request structure as well, such as writing dirty data to the
 cache.
+
+.. kernel-doc:: include/linux/netfs.h
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 3d4cbf2f7dc4..12c4177f7703 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -22,6 +22,7 @@
  * Overload PG_private_2 to give us PG_fscache - this is used to indicate that
  * a page is currently backed by a local disk cache
  */
+#define folio_test_fscache(folio)	folio_test_private_2(folio)
 #define PageFsCache(page)		PagePrivate2((page))
 #define SetPageFsCache(page)		SetPagePrivate2((page))
 #define ClearPageFsCache(page)		ClearPagePrivate2((page))
@@ -29,57 +30,77 @@
 #define TestClearPageFsCache(page)	TestClearPagePrivate2((page))
 
 /**
- * set_page_fscache - Set PG_fscache on a page and take a ref
- * @page: The page.
+ * folio_start_fscache - Start an fscache write on a folio.
+ * @folio: The folio.
  *
- * Set the PG_fscache (PG_private_2) flag on a page and take the reference
- * needed for the VM to handle its lifetime correctly.  This sets the flag and
- * takes the reference unconditionally, so care must be taken not to set the
- * flag again if it's already set.
+ * Call this function before writing a folio to a local cache.  Starting a
+ * second write before the first one finishes is not allowed.
  */
-static inline void set_page_fscache(struct page *page)
+static inline void folio_start_fscache(struct folio *folio)
 {
-	set_page_private_2(page);
+	VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
+	folio_get(folio);
+	folio_set_private_2(folio);
 }
 
 /**
- * end_page_fscache - Clear PG_fscache and release any waiters
- * @page: The page
- *
- * Clear the PG_fscache (PG_private_2) bit on a page and wake up any sleepers
- * waiting for this.  The page ref held for PG_private_2 being set is released.
+ * folio_end_fscache - End an fscache write on a folio.
+ * @folio: The folio.
  *
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
- * serialised.
+ * Call this function after the folio has been written to the local cache.
+ * This will wake any sleepers waiting on this folio.
  */
-static inline void end_page_fscache(struct page *page)
+static inline void folio_end_fscache(struct folio *folio)
 {
-	folio_end_private_2(page_folio(page));
+	folio_end_private_2(folio);
 }
 
 /**
- * wait_on_page_fscache - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
  *
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page.
+ * If this folio is currently being written to a local cache, wait for
+ * the write to finish.  Another write may start after this one finishes,
+ * unless the caller holds the folio lock.
  */
-static inline void wait_on_page_fscache(struct page *page)
+static inline void folio_wait_fscache(struct folio *folio)
 {
-	folio_wait_private_2(page_folio(page));
+	folio_wait_private_2(folio);
 }
 
 /**
- * wait_on_page_fscache_killable - Wait for PG_fscache to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
+ * @folio: The folio.
  *
- * Wait for PG_fscache (aka PG_private_2) to be cleared on a page or until a
- * fatal signal is received by the calling task.
+ * If this folio is currently being written to a local cache, wait
+ * for the write to finish or for a fatal signal to be received.
+ * Another write may start after this one finishes, unless the caller
+ * holds the folio lock.
  *
  * Return:
  * - 0 if successful.
  * - -EINTR if a fatal signal was encountered.
  */
+static inline int folio_wait_fscache_killable(struct folio *folio)
+{
+	return folio_wait_private_2_killable(folio);
+}
+
+static inline void set_page_fscache(struct page *page)
+{
+	folio_start_fscache(page_folio(page));
+}
+
+static inline void end_page_fscache(struct page *page)
+{
+	folio_end_private_2(page_folio(page));
+}
+
+static inline void wait_on_page_fscache(struct page *page)
+{
+	folio_wait_private_2(page_folio(page));
+}
+
 static inline int wait_on_page_fscache_killable(struct page *page)
 {
 	return folio_wait_private_2_killable(page_folio(page));
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 05f91bfc048d..bdbd7be67812 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -779,22 +779,6 @@ int __set_page_dirty_no_writeback(struct page *page);
 
 void page_endio(struct page *page, bool is_write, int err);
 
-/**
- * set_page_private_2 - Set PG_private_2 on a page and take a ref
- * @page: The page.
- *
- * Set the PG_private_2 flag on a page and take the reference needed for the VM
- * to handle its lifetime correctly.  This sets the flag and takes the
- * reference unconditionally, so care must be taken not to set the flag again
- * if it's already set.
- */
-static inline void set_page_private_2(struct page *page)
-{
-	page = compound_head(page);
-	get_page(page);
-	SetPagePrivate2(page);
-}
-
 void folio_end_private_2(struct folio *folio);
 void folio_wait_private_2(struct folio *folio);
 int folio_wait_private_2_killable(struct folio *folio);
-- 
cgit v1.2.3


From dd10ab049beb479dc83bb14a7b5cd68c363983ce Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 12 Apr 2021 16:45:17 -0400
Subject: mm: Add folio_mapped()

This function is the equivalent of page_mapped().  It is slightly
shorter as we do not need to handle the PageTail() case.  Reimplement
page_mapped() as a wrapper around folio_mapped().  folio_mapped()
is 13 bytes smaller than page_mapped(), but the page_mapped() wrapper
is 30 bytes, for a net increase of 17 bytes of text.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
---
 include/linux/mm.h       |  1 +
 include/linux/mm_types.h |  6 ++++++
 mm/folio-compat.c        |  6 ++++++
 mm/util.c                | 29 ++++++++++++++++-------------
 4 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc5c38e1f780..95e36d0475ac 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1769,6 +1769,7 @@ static inline pgoff_t page_index(struct page *page)
 }
 
 bool page_mapped(struct page *page);
+bool folio_mapped(struct folio *folio);
 
 /*
  * Return true only if the page has been allocated with
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5ebcb86ac934..82dab23205c3 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -299,6 +299,12 @@ FOLIO_MATCH(memcg_data, memcg_data);
 #endif
 #undef FOLIO_MATCH
 
+static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
+{
+	struct page *tail = &folio->page + 1;
+	return &tail->compound_mapcount;
+}
+
 static inline atomic_t *compound_mapcount_ptr(struct page *page)
 {
 	return &page[1].compound_mapcount;
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 3c83f03b80d7..7044fcc8a8aa 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -35,3 +35,9 @@ void wait_for_stable_page(struct page *page)
 	return folio_wait_stable(page_folio(page));
 }
 EXPORT_SYMBOL_GPL(wait_for_stable_page);
+
+bool page_mapped(struct page *page)
+{
+	return folio_mapped(page_folio(page));
+}
+EXPORT_SYMBOL(page_mapped);
diff --git a/mm/util.c b/mm/util.c
index 6c1fe9bee30a..e322a42090e5 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -671,28 +671,31 @@ void *page_rmapping(struct page *page)
 	return __page_rmapping(page);
 }
 
-/*
- * Return true if this page is mapped into pagetables.
- * For compound page it returns true if any subpage of compound page is mapped.
+/**
+ * folio_mapped - Is this folio mapped into userspace?
+ * @folio: The folio.
+ *
+ * Return: True if any page in this folio is referenced by user page tables.
  */
-bool page_mapped(struct page *page)
+bool folio_mapped(struct folio *folio)
 {
-	int i;
+	long i, nr;
 
-	if (likely(!PageCompound(page)))
-		return atomic_read(&page->_mapcount) >= 0;
-	page = compound_head(page);
-	if (atomic_read(compound_mapcount_ptr(page)) >= 0)
+	if (folio_test_single(folio))
+		return atomic_read(&folio->_mapcount) >= 0;
+	if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
 		return true;
-	if (PageHuge(page))
+	if (folio_test_hugetlb(folio))
 		return false;
-	for (i = 0; i < compound_nr(page); i++) {
-		if (atomic_read(&page[i]._mapcount) >= 0)
+
+	nr = folio_nr_pages(folio);
+	for (i = 0; i < nr; i++) {
+		if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0)
 			return true;
 	}
 	return false;
 }
-EXPORT_SYMBOL(page_mapped);
+EXPORT_SYMBOL(folio_mapped);
 
 struct anon_vma *page_anon_vma(struct page *page)
 {
-- 
cgit v1.2.3


From 874fd90cafdca9d678bceb88f91322af8c9a9d2d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 25 Jun 2021 09:27:29 -0400
Subject: mm: Add folio_nid()

This is the folio equivalent of page_to_nid().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 95e36d0475ac..04e41d4d85ea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1429,6 +1429,11 @@ static inline int page_to_nid(const struct page *page)
 }
 #endif
 
+static inline int folio_nid(const struct folio *folio)
+{
+	return page_to_nid(&folio->page);
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 static inline int cpu_pid_to_cpupid(int cpu, int pid)
 {
-- 
cgit v1.2.3


From 1b7e4464d43a488e383843bf96ec62d12393bff1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 14:59:26 -0400
Subject: mm/memcg: Add folio_memcg() and related functions

memcg information is only stored in the head page, so the memcg
subsystem needs to assure that all accesses are to the head page.
The first step is converting page_memcg() to folio_memcg().

The callers of page_memcg() and PageMemcgKmem() are not yet ready to be
converted to use folios, so retain them as wrappers around folio_memcg()
and folio_memcg_kmem().  They will be converted in a later patch set.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 110 ++++++++++++++++++++++++++-------------------
 mm/memcontrol.c            |  21 +++++----
 2 files changed, 77 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3096c9a0ee01..06659670db32 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -369,7 +369,7 @@ enum page_memcg_data_flags {
 
 #define MEMCG_DATA_FLAGS_MASK (__NR_MEMCG_DATA_FLAGS - 1)
 
-static inline bool PageMemcgKmem(struct page *page);
+static inline bool folio_memcg_kmem(struct folio *folio);
 
 /*
  * After the initialization objcg->memcg is always pointing at
@@ -384,73 +384,77 @@ static inline struct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
 }
 
 /*
- * __page_memcg - get the memory cgroup associated with a non-kmem page
- * @page: a pointer to the page struct
+ * __folio_memcg - Get the memory cgroup associated with a non-kmem folio
+ * @folio: Pointer to the folio.
  *
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
  * proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * kmem pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * kmem folios.
  */
-static inline struct mem_cgroup *__page_memcg(struct page *page)
+static inline struct mem_cgroup *__folio_memcg(struct folio *folio)
 {
-	unsigned long memcg_data = page->memcg_data;
+	unsigned long memcg_data = folio->memcg_data;
 
-	VM_BUG_ON_PAGE(PageSlab(page), page);
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_KMEM, page);
+	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_KMEM, folio);
 
 	return (struct mem_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
 /*
- * __page_objcg - get the object cgroup associated with a kmem page
- * @page: a pointer to the page struct
+ * __folio_objcg - get the object cgroup associated with a kmem folio.
+ * @folio: Pointer to the folio.
  *
- * Returns a pointer to the object cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the object cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
  * proper object cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages or
- * LRU pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios or
+ * LRU folios.
  */
-static inline struct obj_cgroup *__page_objcg(struct page *page)
+static inline struct obj_cgroup *__folio_objcg(struct folio *folio)
 {
-	unsigned long memcg_data = page->memcg_data;
+	unsigned long memcg_data = folio->memcg_data;
 
-	VM_BUG_ON_PAGE(PageSlab(page), page);
-	VM_BUG_ON_PAGE(memcg_data & MEMCG_DATA_OBJCGS, page);
-	VM_BUG_ON_PAGE(!(memcg_data & MEMCG_DATA_KMEM), page);
+	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
+	VM_BUG_ON_FOLIO(memcg_data & MEMCG_DATA_OBJCGS, folio);
+	VM_BUG_ON_FOLIO(!(memcg_data & MEMCG_DATA_KMEM), folio);
 
 	return (struct obj_cgroup *)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
 /*
- * page_memcg - get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+ * folio_memcg - Get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
  *
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * Returns a pointer to the memory cgroup associated with the folio,
+ * or NULL. This function assumes that the folio is known to have a
  * proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
  *
- * For a non-kmem page any of the following ensures page and memcg binding
+ * For a non-kmem folio any of the following ensures folio and memcg binding
  * stability:
  *
- * - the page lock
+ * - the folio lock
  * - LRU isolation
  * - lock_page_memcg()
  * - exclusive reference
  *
- * For a kmem page a caller should hold an rcu read lock to protect memcg
- * associated with a kmem page from being released.
+ * For a kmem folio a caller should hold an rcu read lock to protect memcg
+ * associated with a kmem folio from being released.
  */
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+	if (folio_memcg_kmem(folio))
+		return obj_cgroup_memcg(__folio_objcg(folio));
+	return __folio_memcg(folio);
+}
+
 static inline struct mem_cgroup *page_memcg(struct page *page)
 {
-	if (PageMemcgKmem(page))
-		return obj_cgroup_memcg(__page_objcg(page));
-	else
-		return __page_memcg(page);
+	return folio_memcg(page_folio(page));
 }
 
 /*
@@ -523,17 +527,18 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
 
 #ifdef CONFIG_MEMCG_KMEM
 /*
- * PageMemcgKmem - check if the page has MemcgKmem flag set
- * @page: a pointer to the page struct
+ * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set.
+ * @folio: Pointer to the folio.
  *
- * Checks if the page has MemcgKmem flag set. The caller must ensure that
- * the page has an associated memory cgroup. It's not safe to call this function
- * against some types of pages, e.g. slab pages.
+ * Checks if the folio has MemcgKmem flag set. The caller must ensure
+ * that the folio has an associated memory cgroup. It's not safe to call
+ * this function against some types of folios, e.g. slab folios.
  */
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
 {
-	VM_BUG_ON_PAGE(page->memcg_data & MEMCG_DATA_OBJCGS, page);
-	return page->memcg_data & MEMCG_DATA_KMEM;
+	VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
+	VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJCGS, folio);
+	return folio->memcg_data & MEMCG_DATA_KMEM;
 }
 
 /*
@@ -577,7 +582,7 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 }
 
 #else
-static inline bool PageMemcgKmem(struct page *page)
+static inline bool folio_memcg_kmem(struct folio *folio)
 {
 	return false;
 }
@@ -593,6 +598,11 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 }
 #endif
 
+static inline bool PageMemcgKmem(struct page *page)
+{
+	return folio_memcg_kmem(page_folio(page));
+}
+
 static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 {
 	return (memcg == root_mem_cgroup);
@@ -1115,6 +1125,11 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 #define MEM_CGROUP_ID_SHIFT	0
 #define MEM_CGROUP_ID_MAX	0
 
+static inline struct mem_cgroup *folio_memcg(struct folio *folio)
+{
+	return NULL;
+}
+
 static inline struct mem_cgroup *page_memcg(struct page *page)
 {
 	return NULL;
@@ -1131,6 +1146,11 @@ static inline struct mem_cgroup *page_memcg_check(struct page *page)
 	return NULL;
 }
 
+static inline bool folio_memcg_kmem(struct folio *folio)
+{
+	return false;
+}
+
 static inline bool PageMemcgKmem(struct page *page)
 {
 	return false;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a064a85d51da..1385ac6f688e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2992,15 +2992,16 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
  */
 void __memcg_kmem_uncharge_page(struct page *page, int order)
 {
+	struct folio *folio = page_folio(page);
 	struct obj_cgroup *objcg;
 	unsigned int nr_pages = 1 << order;
 
-	if (!PageMemcgKmem(page))
+	if (!folio_memcg_kmem(folio))
 		return;
 
-	objcg = __page_objcg(page);
+	objcg = __folio_objcg(folio);
 	obj_cgroup_uncharge_pages(objcg, nr_pages);
-	page->memcg_data = 0;
+	folio->memcg_data = 0;
 	obj_cgroup_put(objcg);
 }
 
@@ -3234,17 +3235,18 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
  */
 void split_page_memcg(struct page *head, unsigned int nr)
 {
-	struct mem_cgroup *memcg = page_memcg(head);
+	struct folio *folio = page_folio(head);
+	struct mem_cgroup *memcg = folio_memcg(folio);
 	int i;
 
 	if (mem_cgroup_disabled() || !memcg)
 		return;
 
 	for (i = 1; i < nr; i++)
-		head[i].memcg_data = head->memcg_data;
+		folio_page(folio, i)->memcg_data = folio->memcg_data;
 
-	if (PageMemcgKmem(head))
-		obj_cgroup_get_many(__page_objcg(head), nr - 1);
+	if (folio_memcg_kmem(folio))
+		obj_cgroup_get_many(__folio_objcg(folio), nr - 1);
 	else
 		css_get_many(&memcg->css, nr - 1);
 }
@@ -6811,6 +6813,7 @@ static void uncharge_batch(const struct uncharge_gather *ug)
 
 static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 {
+	struct folio *folio = page_folio(page);
 	unsigned long nr_pages;
 	struct mem_cgroup *memcg;
 	struct obj_cgroup *objcg;
@@ -6824,14 +6827,14 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
 	 * exclusive access to the page.
 	 */
 	if (use_objcg) {
-		objcg = __page_objcg(page);
+		objcg = __folio_objcg(folio);
 		/*
 		 * This get matches the put at the end of the function and
 		 * kmem pages do not hold memcg references anymore.
 		 */
 		memcg = get_mem_cgroup_from_objcg(objcg);
 	} else {
-		memcg = __page_memcg(page);
+		memcg = __folio_memcg(folio);
 	}
 
 	if (!memcg)
-- 
cgit v1.2.3


From 8f425e4ed0eb3ef0b2d85a9efccf947ca6aa9b1c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 25 Jun 2021 09:27:04 -0400
Subject: mm/memcg: Convert mem_cgroup_charge() to take a folio

Convert all callers of mem_cgroup_charge() to call page_folio() on the
page they're currently passing in.  Many of them will be converted to
use folios themselves soon.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 28 +++++++++++++++++++++-------
 kernel/events/uprobes.c    |  3 ++-
 mm/filemap.c               |  2 +-
 mm/huge_memory.c           |  2 +-
 mm/khugepaged.c            |  4 ++--
 mm/ksm.c                   |  3 ++-
 mm/memcontrol.c            | 28 +++++++---------------------
 mm/memory.c                |  9 +++++----
 mm/migrate.c               |  2 +-
 mm/shmem.c                 |  2 +-
 mm/userfaultfd.c           |  2 +-
 11 files changed, 44 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 06659670db32..19a51729e00c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -694,14 +694,28 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
 		page_counter_read(&memcg->memory);
 }
 
-int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
-			gfp_t gfp_mask);
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
-				    gfp_t gfp_mask)
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
+
+/**
+ * mem_cgroup_charge - Charge a newly allocated folio to a cgroup.
+ * @folio: Folio to charge.
+ * @mm: mm context of the allocating task.
+ * @gfp: Reclaim mode.
+ *
+ * Try to charge @folio to the memcg that @mm belongs to, reclaiming
+ * pages according to @gfp if necessary.  If @mm is NULL, try to
+ * charge to the active memcg.
+ *
+ * Do not use this for folios allocated for swapin.
+ *
+ * Return: 0 on success. Otherwise, an error code is returned.
+ */
+static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
+				    gfp_t gfp)
 {
 	if (mem_cgroup_disabled())
 		return 0;
-	return __mem_cgroup_charge(page, mm, gfp_mask);
+	return __mem_cgroup_charge(folio, mm, gfp);
 }
 
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
@@ -1199,8 +1213,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
 	return false;
 }
 
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
-				    gfp_t gfp_mask)
+static inline int mem_cgroup_charge(struct folio *folio,
+		struct mm_struct *mm, gfp_t gfp)
 {
 	return 0;
 }
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index af24dc3febbe..6357c3580d07 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -167,7 +167,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 				addr + PAGE_SIZE);
 
 	if (new_page) {
-		err = mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL);
+		err = mem_cgroup_charge(page_folio(new_page), vma->vm_mm,
+					GFP_KERNEL);
 		if (err)
 			return err;
 	}
diff --git a/mm/filemap.c b/mm/filemap.c
index d74be9fb3aa2..816af226f49d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -889,7 +889,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
 	page->index = offset;
 
 	if (!huge) {
-		error = mem_cgroup_charge(page, NULL, gfp);
+		error = mem_cgroup_charge(page_folio(page), NULL, gfp);
 		if (error)
 			goto error;
 		charged = true;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5e9ef0fc261e..d49986a10d83 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -603,7 +603,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
 
 	VM_BUG_ON_PAGE(!PageCompound(page), page);
 
-	if (mem_cgroup_charge(page, vma->vm_mm, gfp)) {
+	if (mem_cgroup_charge(page_folio(page), vma->vm_mm, gfp)) {
 		put_page(page);
 		count_vm_event(THP_FAULT_FALLBACK);
 		count_vm_event(THP_FAULT_FALLBACK_CHARGE);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 045cc579f724..8480a3b05bcc 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1087,7 +1087,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) {
+	if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out_nolock;
 	}
@@ -1658,7 +1658,7 @@ static void collapse_file(struct mm_struct *mm,
 		goto out;
 	}
 
-	if (unlikely(mem_cgroup_charge(new_page, mm, gfp))) {
+	if (unlikely(mem_cgroup_charge(page_folio(new_page), mm, gfp))) {
 		result = SCAN_CGROUP_CHARGE_FAIL;
 		goto out;
 	}
diff --git a/mm/ksm.c b/mm/ksm.c
index a5716fdec1aa..c246a0b0ac75 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2578,7 +2578,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
 		return page;		/* let do_swap_page report the error */
 
 	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
-	if (new_page && mem_cgroup_charge(new_page, vma->vm_mm, GFP_KERNEL)) {
+	if (new_page &&
+	    mem_cgroup_charge(page_folio(new_page), vma->vm_mm, GFP_KERNEL)) {
 		put_page(new_page);
 		new_page = NULL;
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e352225970d2..dbca7bf92737 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6660,9 +6660,9 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 			atomic_long_read(&parent->memory.children_low_usage)));
 }
 
-static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp)
+static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
+			gfp_t gfp)
 {
-	struct folio *folio = page_folio(page);
 	long nr_pages = folio_nr_pages(folio);
 	int ret;
 
@@ -6675,34 +6675,19 @@ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp)
 
 	local_irq_disable();
 	mem_cgroup_charge_statistics(memcg, nr_pages);
-	memcg_check_events(memcg, page_to_nid(page));
+	memcg_check_events(memcg, folio_nid(folio));
 	local_irq_enable();
 out:
 	return ret;
 }
 
-/**
- * __mem_cgroup_charge - charge a newly allocated page to a cgroup
- * @page: page to charge
- * @mm: mm context of the victim
- * @gfp_mask: reclaim mode
- *
- * Try to charge @page to the memcg that @mm belongs to, reclaiming
- * pages according to @gfp_mask if necessary. if @mm is NULL, try to
- * charge to the active memcg.
- *
- * Do not use this for pages allocated for swapin.
- *
- * Returns 0 on success. Otherwise, an error code is returned.
- */
-int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
-			gfp_t gfp_mask)
+int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
 {
 	struct mem_cgroup *memcg;
 	int ret;
 
 	memcg = get_mem_cgroup_from_mm(mm);
-	ret = charge_memcg(page, memcg, gfp_mask);
+	ret = charge_memcg(folio, memcg, gfp);
 	css_put(&memcg->css);
 
 	return ret;
@@ -6723,6 +6708,7 @@ int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry)
 {
+	struct folio *folio = page_folio(page);
 	struct mem_cgroup *memcg;
 	unsigned short id;
 	int ret;
@@ -6737,7 +6723,7 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 		memcg = get_mem_cgroup_from_mm(mm);
 	rcu_read_unlock();
 
-	ret = charge_memcg(page, memcg, gfp);
+	ret = charge_memcg(folio, memcg, gfp);
 
 	css_put(&memcg->css);
 	return ret;
diff --git a/mm/memory.c b/mm/memory.c
index 269992ba3fa3..b67d80526bee 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -990,7 +990,7 @@ page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma,
 	if (!new_page)
 		return NULL;
 
-	if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) {
+	if (mem_cgroup_charge(page_folio(new_page), src_mm, GFP_KERNEL)) {
 		put_page(new_page);
 		return NULL;
 	}
@@ -3019,7 +3019,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 	}
 
-	if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
+	if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL))
 		goto oom_free_new;
 	cgroup_throttle_swaprate(new_page, GFP_KERNEL);
 
@@ -3769,7 +3769,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	if (!page)
 		goto oom;
 
-	if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
+	if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
 		goto oom_free_page;
 	cgroup_throttle_swaprate(page, GFP_KERNEL);
 
@@ -4193,7 +4193,8 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf)
 	if (!vmf->cow_page)
 		return VM_FAULT_OOM;
 
-	if (mem_cgroup_charge(vmf->cow_page, vma->vm_mm, GFP_KERNEL)) {
+	if (mem_cgroup_charge(page_folio(vmf->cow_page), vma->vm_mm,
+				GFP_KERNEL)) {
 		put_page(vmf->cow_page);
 		return VM_FAULT_OOM;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index a6a7743ee98f..da55d2a8638d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2846,7 +2846,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 
 	if (unlikely(anon_vma_prepare(vma)))
 		goto abort;
-	if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
+	if (mem_cgroup_charge(page_folio(page), vma->vm_mm, GFP_KERNEL))
 		goto abort;
 
 	/*
diff --git a/mm/shmem.c b/mm/shmem.c
index b5860f4a2738..a2e653aeb536 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -710,7 +710,7 @@ static int shmem_add_to_page_cache(struct page *page,
 	page->index = index;
 
 	if (!PageSwapCache(page)) {
-		error = mem_cgroup_charge(page, charge_mm, gfp);
+		error = mem_cgroup_charge(page_folio(page), charge_mm, gfp);
 		if (error) {
 			if (PageTransHuge(page)) {
 				count_vm_event(THP_FILE_FALLBACK);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 7a9008415534..36e5f6ab976f 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -164,7 +164,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
 	__SetPageUptodate(page);
 
 	ret = -ENOMEM;
-	if (mem_cgroup_charge(page, dst_mm, GFP_KERNEL))
+	if (mem_cgroup_charge(page_folio(page), dst_mm, GFP_KERNEL))
 		goto out_release;
 
 	ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
-- 
cgit v1.2.3


From bbc6b703b21963e909f633cf7718903ed5094319 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 1 May 2021 20:42:23 -0400
Subject: mm/memcg: Convert mem_cgroup_uncharge() to take a folio

Convert all the callers to call page_folio().  Most of them were already
using a head page, but a few of them I can't prove were, so this may
actually fix a bug.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 15 +++++++++++----
 mm/filemap.c               |  2 +-
 mm/khugepaged.c            |  4 ++--
 mm/memcontrol.c            | 14 ++++----------
 mm/memory-failure.c        |  2 +-
 mm/memremap.c              |  2 +-
 mm/page_alloc.c            |  2 +-
 mm/swap.c                  |  2 +-
 8 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 19a51729e00c..b4bc052db32b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -722,12 +722,19 @@ int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
 				  gfp_t gfp, swp_entry_t entry);
 void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
 
-void __mem_cgroup_uncharge(struct page *page);
-static inline void mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct folio *folio);
+
+/**
+ * mem_cgroup_uncharge - Uncharge a folio.
+ * @folio: Folio to uncharge.
+ *
+ * Uncharge a folio previously charged with mem_cgroup_charge().
+ */
+static inline void mem_cgroup_uncharge(struct folio *folio)
 {
 	if (mem_cgroup_disabled())
 		return;
-	__mem_cgroup_uncharge(page);
+	__mem_cgroup_uncharge(folio);
 }
 
 void __mem_cgroup_uncharge_list(struct list_head *page_list);
@@ -1229,7 +1236,7 @@ static inline void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry)
 {
 }
 
-static inline void mem_cgroup_uncharge(struct page *page)
+static inline void mem_cgroup_uncharge(struct folio *folio)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 816af226f49d..44fcd9d1dd65 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -940,7 +940,7 @@ unlock:
 	if (xas_error(&xas)) {
 		error = xas_error(&xas);
 		if (charged)
-			mem_cgroup_uncharge(page);
+			mem_cgroup_uncharge(page_folio(page));
 		goto error;
 	}
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8480a3b05bcc..6d56e7abd2b8 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1211,7 +1211,7 @@ out_up_write:
 	mmap_write_unlock(mm);
 out_nolock:
 	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(*hpage);
+		mem_cgroup_uncharge(page_folio(*hpage));
 	trace_mm_collapse_huge_page(mm, isolated, result);
 	return;
 }
@@ -1975,7 +1975,7 @@ xa_unlocked:
 out:
 	VM_BUG_ON(!list_empty(&pagelist));
 	if (!IS_ERR_OR_NULL(*hpage))
-		mem_cgroup_uncharge(*hpage);
+		mem_cgroup_uncharge(page_folio(*hpage));
 	/* TODO: tracepoints */
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 64eac157db79..6321ed6d6e5a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6858,22 +6858,16 @@ static void uncharge_folio(struct folio *folio, struct uncharge_gather *ug)
 	css_put(&memcg->css);
 }
 
-/**
- * __mem_cgroup_uncharge - uncharge a page
- * @page: page to uncharge
- *
- * Uncharge a page previously charged with __mem_cgroup_charge().
- */
-void __mem_cgroup_uncharge(struct page *page)
+void __mem_cgroup_uncharge(struct folio *folio)
 {
 	struct uncharge_gather ug;
 
-	/* Don't touch page->lru of any random page, pre-check: */
-	if (!page_memcg(page))
+	/* Don't touch folio->lru of any random page, pre-check: */
+	if (!folio_memcg(folio))
 		return;
 
 	uncharge_gather_clear(&ug);
-	uncharge_folio(page_folio(page), &ug);
+	uncharge_folio(folio, &ug);
 	uncharge_batch(&ug);
 }
 
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 3e6449f2102a..fffe4afaff43 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -762,7 +762,7 @@ static int delete_from_lru_cache(struct page *p)
 		 * Poisoned page might never drop its ref count to 0 so we have
 		 * to uncharge it manually from its memcg.
 		 */
-		mem_cgroup_uncharge(p);
+		mem_cgroup_uncharge(page_folio(p));
 
 		/*
 		 * drop the page count elevated by isolate_lru_page()
diff --git a/mm/memremap.c b/mm/memremap.c
index ed593bf87109..5a66a71ab591 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -505,7 +505,7 @@ void free_devmap_managed_page(struct page *page)
 
 	__ClearPageWaiters(page);
 
-	mem_cgroup_uncharge(page);
+	mem_cgroup_uncharge(page_folio(page));
 
 	/*
 	 * When a device_private page is freed, the page->mapping field
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37435c274cf..869d0b06e1ef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -724,7 +724,7 @@ static inline void free_the_page(struct page *page, unsigned int order)
 
 void free_compound_page(struct page *page)
 {
-	mem_cgroup_uncharge(page);
+	mem_cgroup_uncharge(page_folio(page));
 	free_the_page(page, compound_order(page));
 }
 
diff --git a/mm/swap.c b/mm/swap.c
index 0edbcb9c8876..5679ce5bc362 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -94,7 +94,7 @@ static void __page_cache_release(struct page *page)
 static void __put_single_page(struct page *page)
 {
 	__page_cache_release(page);
-	mem_cgroup_uncharge(page);
+	mem_cgroup_uncharge(page_folio(page));
 	free_unref_page(page, 0);
 }
 
-- 
cgit v1.2.3


From d21bba2b7d0ae19dd1279e10aee61c37a17aba74 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 6 May 2021 18:14:59 -0400
Subject: mm/memcg: Convert mem_cgroup_migrate() to take folios

Convert all callers of mem_cgroup_migrate() to call page_folio() first.
They all look like they're using head pages already, but this proves it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h |  4 ++--
 mm/filemap.c               |  4 +++-
 mm/memcontrol.c            | 35 +++++++++++++++++------------------
 mm/migrate.c               |  4 +++-
 mm/shmem.c                 |  5 ++++-
 5 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b4bc052db32b..07eda24ec581 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -745,7 +745,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 	__mem_cgroup_uncharge_list(page_list);
 }
 
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
+void mem_cgroup_migrate(struct folio *old, struct folio *new);
 
 /**
  * mem_cgroup_lruvec - get the lru list vector for a memcg & node
@@ -1244,7 +1244,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
 }
 
-static inline void mem_cgroup_migrate(struct page *old, struct page *new)
+static inline void mem_cgroup_migrate(struct folio *old, struct folio *new)
 {
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 44fcd9d1dd65..5368a4dcc35e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -835,6 +835,8 @@ EXPORT_SYMBOL(file_write_and_wait_range);
  */
 void replace_page_cache_page(struct page *old, struct page *new)
 {
+	struct folio *fold = page_folio(old);
+	struct folio *fnew = page_folio(new);
 	struct address_space *mapping = old->mapping;
 	void (*freepage)(struct page *) = mapping->a_ops->freepage;
 	pgoff_t offset = old->index;
@@ -848,7 +850,7 @@ void replace_page_cache_page(struct page *old, struct page *new)
 	new->mapping = mapping;
 	new->index = offset;
 
-	mem_cgroup_migrate(old, new);
+	mem_cgroup_migrate(fold, fnew);
 
 	xas_lock_irq(&xas);
 	xas_store(&xas, new);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6321ed6d6e5a..c83d2f862f8a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6891,36 +6891,35 @@ void __mem_cgroup_uncharge_list(struct list_head *page_list)
 }
 
 /**
- * mem_cgroup_migrate - charge a page's replacement
- * @oldpage: currently circulating page
- * @newpage: replacement page
+ * mem_cgroup_migrate - Charge a folio's replacement.
+ * @old: Currently circulating folio.
+ * @new: Replacement folio.
  *
- * Charge @newpage as a replacement page for @oldpage. @oldpage will
+ * Charge @new as a replacement folio for @old. @old will
  * be uncharged upon free.
  *
- * Both pages must be locked, @newpage->mapping must be set up.
+ * Both folios must be locked, @new->mapping must be set up.
  */
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
+void mem_cgroup_migrate(struct folio *old, struct folio *new)
 {
-	struct folio *newfolio = page_folio(newpage);
 	struct mem_cgroup *memcg;
-	long nr_pages = folio_nr_pages(newfolio);
+	long nr_pages = folio_nr_pages(new);
 	unsigned long flags;
 
-	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
-	VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio);
-	VM_BUG_ON_FOLIO(PageAnon(oldpage) != folio_test_anon(newfolio), newfolio);
-	VM_BUG_ON_FOLIO(compound_nr(oldpage) != nr_pages, newfolio);
+	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
+	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
+	VM_BUG_ON_FOLIO(folio_test_anon(old) != folio_test_anon(new), new);
+	VM_BUG_ON_FOLIO(folio_nr_pages(old) != nr_pages, new);
 
 	if (mem_cgroup_disabled())
 		return;
 
-	/* Page cache replacement: new page already charged? */
-	if (folio_memcg(newfolio))
+	/* Page cache replacement: new folio already charged? */
+	if (folio_memcg(new))
 		return;
 
-	memcg = page_memcg(oldpage);
-	VM_WARN_ON_ONCE_PAGE(!memcg, oldpage);
+	memcg = folio_memcg(old);
+	VM_WARN_ON_ONCE_FOLIO(!memcg, old);
 	if (!memcg)
 		return;
 
@@ -6932,11 +6931,11 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage)
 	}
 
 	css_get(&memcg->css);
-	commit_charge(newfolio, memcg);
+	commit_charge(new, memcg);
 
 	local_irq_save(flags);
 	mem_cgroup_charge_statistics(memcg, nr_pages);
-	memcg_check_events(memcg, page_to_nid(newpage));
+	memcg_check_events(memcg, folio_nid(new));
 	local_irq_restore(flags);
 }
 
diff --git a/mm/migrate.c b/mm/migrate.c
index da55d2a8638d..bfb8ba490479 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -542,6 +542,8 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
  */
 void migrate_page_states(struct page *newpage, struct page *page)
 {
+	struct folio *folio = page_folio(page);
+	struct folio *newfolio = page_folio(newpage);
 	int cpupid;
 
 	if (PageError(page))
@@ -609,7 +611,7 @@ void migrate_page_states(struct page *newpage, struct page *page)
 	copy_page_owner(page, newpage);
 
 	if (!PageHuge(page))
-		mem_cgroup_migrate(page, newpage);
+		mem_cgroup_migrate(folio, newfolio);
 }
 EXPORT_SYMBOL(migrate_page_states);
 
diff --git a/mm/shmem.c b/mm/shmem.c
index a2e653aeb536..1588f33d009a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1637,6 +1637,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 				struct shmem_inode_info *info, pgoff_t index)
 {
 	struct page *oldpage, *newpage;
+	struct folio *old, *new;
 	struct address_space *swap_mapping;
 	swp_entry_t entry;
 	pgoff_t swap_index;
@@ -1673,7 +1674,9 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
 	xa_lock_irq(&swap_mapping->i_pages);
 	error = shmem_replace_entry(swap_mapping, swap_index, oldpage, newpage);
 	if (!error) {
-		mem_cgroup_migrate(oldpage, newpage);
+		old = page_folio(oldpage);
+		new = page_folio(newpage);
+		mem_cgroup_migrate(old, new);
 		__inc_lruvec_page_state(newpage, NR_FILE_PAGES);
 		__dec_lruvec_page_state(oldpage, NR_FILE_PAGES);
 	}
-- 
cgit v1.2.3


From 9d8053fc7a21ee2b3a540165d09418955258d9e8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 4 May 2021 11:43:01 -0400
Subject: mm/memcg: Convert mem_cgroup_track_foreign_dirty_slowpath() to folio

The page was only being used for the memcg and to gather trace
information, so this is a simple conversion.  The only caller of
mem_cgroup_track_foreign_dirty() will be converted to folios in a later
patch, so doing this now makes that patch simpler.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h       | 7 ++++---
 include/trace/events/writeback.h | 8 ++++----
 mm/memcontrol.c                  | 6 +++---
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 07eda24ec581..1c2776c3a223 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1599,17 +1599,18 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 			 unsigned long *pheadroom, unsigned long *pdirty,
 			 unsigned long *pwriteback);
 
-void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
 					     struct bdi_writeback *wb);
 
 static inline void mem_cgroup_track_foreign_dirty(struct page *page,
 						  struct bdi_writeback *wb)
 {
+	struct folio *folio = page_folio(page);
 	if (mem_cgroup_disabled())
 		return;
 
-	if (unlikely(&page_memcg(page)->css != wb->memcg_css))
-		mem_cgroup_track_foreign_dirty_slowpath(page, wb);
+	if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
+		mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
 }
 
 void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 840d1ba84cf5..297871ca0004 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -236,9 +236,9 @@ TRACE_EVENT(inode_switch_wbs,
 
 TRACE_EVENT(track_foreign_dirty,
 
-	TP_PROTO(struct page *page, struct bdi_writeback *wb),
+	TP_PROTO(struct folio *folio, struct bdi_writeback *wb),
 
-	TP_ARGS(page, wb),
+	TP_ARGS(folio, wb),
 
 	TP_STRUCT__entry(
 		__array(char,		name, 32)
@@ -250,7 +250,7 @@ TRACE_EVENT(track_foreign_dirty,
 	),
 
 	TP_fast_assign(
-		struct address_space *mapping = page_mapping(page);
+		struct address_space *mapping = folio_mapping(folio);
 		struct inode *inode = mapping ? mapping->host : NULL;
 
 		strscpy_pad(__entry->name, bdi_dev_name(wb->bdi), 32);
@@ -258,7 +258,7 @@ TRACE_EVENT(track_foreign_dirty,
 		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->memcg_id	= wb->memcg_css->id;
 		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
-		__entry->page_cgroup_ino = cgroup_ino(page_memcg(page)->css.cgroup);
+		__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
 	),
 
 	TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c83d2f862f8a..4a04bddefdbc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4516,17 +4516,17 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
  * As being wrong occasionally doesn't matter, updates and accesses to the
  * records are lockless and racy.
  */
-void mem_cgroup_track_foreign_dirty_slowpath(struct page *page,
+void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
 					     struct bdi_writeback *wb)
 {
-	struct mem_cgroup *memcg = page_memcg(page);
+	struct mem_cgroup *memcg = folio_memcg(folio);
 	struct memcg_cgwb_frn *frn;
 	u64 now = get_jiffies_64();
 	u64 oldest_at = now;
 	int oldest = -1;
 	int i;
 
-	trace_track_foreign_dirty(page, wb);
+	trace_track_foreign_dirty(folio, wb);
 
 	/*
 	 * Pick the slot to use.  If there is already a slot for @wb, keep
-- 
cgit v1.2.3


From f70ad448741580bf61cdfbeb02229c581409760a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 17:26:00 -0400
Subject: mm/memcg: Add folio_memcg_lock() and folio_memcg_unlock()

These are the folio equivalents of lock_page_memcg() and
unlock_page_memcg().

lock_page_memcg() and unlock_page_memcg() have too many callers to be
easily replaced in a single patch, so reimplement them as wrappers for
now to be cleaned up later when enough callers have been converted to
use folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 10 ++++++++++
 mm/memcontrol.c            | 45 +++++++++++++++++++++++++++++----------------
 2 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1c2776c3a223..be85450f066f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -978,6 +978,8 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg);
 extern bool cgroup_memory_noswap;
 #endif
 
+void folio_memcg_lock(struct folio *folio);
+void folio_memcg_unlock(struct folio *folio);
 void lock_page_memcg(struct page *page);
 void unlock_page_memcg(struct page *page);
 
@@ -1397,6 +1399,14 @@ static inline void unlock_page_memcg(struct page *page)
 {
 }
 
+static inline void folio_memcg_lock(struct folio *folio)
+{
+}
+
+static inline void folio_memcg_unlock(struct folio *folio)
+{
+}
+
 static inline void mem_cgroup_handle_over_high(void)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4a04bddefdbc..23fe124fe78d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1933,18 +1933,17 @@ void mem_cgroup_print_oom_group(struct mem_cgroup *memcg)
 }
 
 /**
- * lock_page_memcg - lock a page and memcg binding
- * @page: the page
+ * folio_memcg_lock - Bind a folio to its memcg.
+ * @folio: The folio.
  *
- * This function protects unlocked LRU pages from being moved to
+ * This function prevents unlocked LRU folios from being moved to
  * another cgroup.
  *
- * It ensures lifetime of the locked memcg. Caller is responsible
- * for the lifetime of the page.
+ * It ensures lifetime of the bound memcg.  The caller is responsible
+ * for the lifetime of the folio.
  */
-void lock_page_memcg(struct page *page)
+void folio_memcg_lock(struct folio *folio)
 {
-	struct page *head = compound_head(page); /* rmap on tail pages */
 	struct mem_cgroup *memcg;
 	unsigned long flags;
 
@@ -1958,7 +1957,7 @@ void lock_page_memcg(struct page *page)
 	if (mem_cgroup_disabled())
 		return;
 again:
-	memcg = page_memcg(head);
+	memcg = folio_memcg(folio);
 	if (unlikely(!memcg))
 		return;
 
@@ -1972,7 +1971,7 @@ again:
 		return;
 
 	spin_lock_irqsave(&memcg->move_lock, flags);
-	if (memcg != page_memcg(head)) {
+	if (memcg != folio_memcg(folio)) {
 		spin_unlock_irqrestore(&memcg->move_lock, flags);
 		goto again;
 	}
@@ -1986,9 +1985,15 @@ again:
 	memcg->move_lock_task = current;
 	memcg->move_lock_flags = flags;
 }
+EXPORT_SYMBOL(folio_memcg_lock);
+
+void lock_page_memcg(struct page *page)
+{
+	folio_memcg_lock(page_folio(page));
+}
 EXPORT_SYMBOL(lock_page_memcg);
 
-static void __unlock_page_memcg(struct mem_cgroup *memcg)
+static void __folio_memcg_unlock(struct mem_cgroup *memcg)
 {
 	if (memcg && memcg->move_lock_task == current) {
 		unsigned long flags = memcg->move_lock_flags;
@@ -2003,14 +2008,22 @@ static void __unlock_page_memcg(struct mem_cgroup *memcg)
 }
 
 /**
- * unlock_page_memcg - unlock a page and memcg binding
- * @page: the page
+ * folio_memcg_unlock - Release the binding between a folio and its memcg.
+ * @folio: The folio.
+ *
+ * This releases the binding created by folio_memcg_lock().  This does
+ * not change the accounting of this folio to its memcg, but it does
+ * permit others to change it.
  */
-void unlock_page_memcg(struct page *page)
+void folio_memcg_unlock(struct folio *folio)
 {
-	struct page *head = compound_head(page);
+	__folio_memcg_unlock(folio_memcg(folio));
+}
+EXPORT_SYMBOL(folio_memcg_unlock);
 
-	__unlock_page_memcg(page_memcg(head));
+void unlock_page_memcg(struct page *page)
+{
+	folio_memcg_unlock(page_folio(page));
 }
 EXPORT_SYMBOL(unlock_page_memcg);
 
@@ -5643,7 +5656,7 @@ static int mem_cgroup_move_account(struct page *page,
 
 	page->memcg_data = (unsigned long)to;
 
-	__unlock_page_memcg(from);
+	__folio_memcg_unlock(from);
 
 	ret = 0;
 	nid = page_to_nid(page);
-- 
cgit v1.2.3


From b1baabd995ab8e830dbf647fe731b51e12b8cedd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 20:00:28 -0400
Subject: mm/memcg: Add folio_lruvec()

This replaces mem_cgroup_page_lruvec().  All callers converted.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 20 +++++++++-----------
 mm/compaction.c            |  2 +-
 mm/memcontrol.c            |  9 ++++++---
 mm/swap.c                  |  3 ++-
 mm/workingset.c            |  3 ++-
 5 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index be85450f066f..35577caf27d9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -784,18 +784,17 @@ out:
 }
 
 /**
- * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
- * @page: the page
+ * folio_lruvec - return lruvec for isolating/putting an LRU folio
+ * @folio: Pointer to the folio.
  *
- * This function relies on page->mem_cgroup being stable.
+ * This function relies on folio->mem_cgroup being stable.
  */
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
 {
-	pg_data_t *pgdat = page_pgdat(page);
-	struct mem_cgroup *memcg = page_memcg(page);
+	struct mem_cgroup *memcg = folio_memcg(folio);
 
-	VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
-	return mem_cgroup_lruvec(memcg, pgdat);
+	VM_WARN_ON_ONCE_FOLIO(!memcg && !mem_cgroup_disabled(), folio);
+	return mem_cgroup_lruvec(memcg, folio_pgdat(folio));
 }
 
 struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -1256,10 +1255,9 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
 	return &pgdat->__lruvec;
 }
 
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec(struct folio *folio)
 {
-	pg_data_t *pgdat = page_pgdat(page);
-
+	struct pglist_data *pgdat = folio_pgdat(folio);
 	return &pgdat->__lruvec;
 }
 
diff --git a/mm/compaction.c b/mm/compaction.c
index bfc93da1c2c7..37dfdf2b2287 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1022,7 +1022,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (!TestClearPageLRU(page))
 			goto isolate_fail_put;
 
-		lruvec = mem_cgroup_page_lruvec(page);
+		lruvec = folio_lruvec(page_folio(page));
 
 		/* If we already hold the lock, we can skip some rechecking */
 		if (lruvec != locked) {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c17681defeec..ea4f879d2771 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1154,9 +1154,10 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
  */
 struct lruvec *lock_page_lruvec(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	struct lruvec *lruvec;
 
-	lruvec = mem_cgroup_page_lruvec(page);
+	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
 
 	lruvec_memcg_debug(lruvec, page);
@@ -1166,9 +1167,10 @@ struct lruvec *lock_page_lruvec(struct page *page)
 
 struct lruvec *lock_page_lruvec_irq(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	struct lruvec *lruvec;
 
-	lruvec = mem_cgroup_page_lruvec(page);
+	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
 
 	lruvec_memcg_debug(lruvec, page);
@@ -1178,9 +1180,10 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
 
 struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
 {
+	struct folio *folio = page_folio(page);
 	struct lruvec *lruvec;
 
-	lruvec = mem_cgroup_page_lruvec(page);
+	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
 
 	lruvec_memcg_debug(lruvec, page);
diff --git a/mm/swap.c b/mm/swap.c
index 5679ce5bc362..65a74c89e7cf 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -293,7 +293,8 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 
 void lru_note_cost_page(struct page *page)
 {
-	lru_note_cost(mem_cgroup_page_lruvec(page),
+	struct folio *folio = page_folio(page);
+	lru_note_cost(folio_lruvec(folio),
 		      page_is_file_lru(page), thp_nr_pages(page));
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index d5b81e4f4cbe..3deb408a240d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -397,6 +397,7 @@ out:
  */
 void workingset_activation(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
@@ -411,7 +412,7 @@ void workingset_activation(struct page *page)
 	memcg = page_memcg_rcu(page);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
-	lruvec = mem_cgroup_page_lruvec(page);
+	lruvec = folio_lruvec(folio);
 	workingset_age_nonresident(lruvec, thp_nr_pages(page));
 out:
 	rcu_read_unlock();
-- 
cgit v1.2.3


From e809c3fedeeb806993349e7bf797b4c2b728be7d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 28 Jun 2021 21:59:47 -0400
Subject: mm/memcg: Add folio_lruvec_lock() and similar functions

These are the folio equivalents of lock_page_lruvec() and similar
functions.  Also convert lruvec_memcg_debug() to take a folio.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 32 ++++++++++---------
 mm/compaction.c            |  2 +-
 mm/huge_memory.c           |  5 +--
 mm/memcontrol.c            | 77 +++++++++++++++++++++++++++++-----------------
 mm/rmap.c                  |  2 +-
 mm/swap.c                  |  8 +++--
 mm/vmscan.c                |  3 +-
 7 files changed, 79 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 35577caf27d9..30d2cd7b5c9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -801,15 +801,16 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
-struct lruvec *lock_page_lruvec(struct page *page);
-struct lruvec *lock_page_lruvec_irq(struct page *page);
-struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+struct lruvec *folio_lruvec_lock(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
+struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 						unsigned long *flags);
 
 #ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
 #else
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
 {
 }
 #endif
@@ -1261,7 +1262,8 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
 	return &pgdat->__lruvec;
 }
 
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+static inline
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
 {
 }
 
@@ -1291,26 +1293,26 @@ static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
 
-static inline struct lruvec *lock_page_lruvec(struct page *page)
+static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
-	struct pglist_data *pgdat = page_pgdat(page);
+	struct pglist_data *pgdat = folio_pgdat(folio);
 
 	spin_lock(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
 
-static inline struct lruvec *lock_page_lruvec_irq(struct page *page)
+static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
-	struct pglist_data *pgdat = page_pgdat(page);
+	struct pglist_data *pgdat = folio_pgdat(folio);
 
 	spin_lock_irq(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
 
-static inline struct lruvec *lock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 		unsigned long *flagsp)
 {
-	struct pglist_data *pgdat = page_pgdat(page);
+	struct pglist_data *pgdat = folio_pgdat(folio);
 
 	spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
 	return &pgdat->__lruvec;
@@ -1576,6 +1578,7 @@ static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
 static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
 		struct lruvec *locked_lruvec)
 {
+	struct folio *folio = page_folio(page);
 	if (locked_lruvec) {
 		if (page_matches_lruvec(page, locked_lruvec))
 			return locked_lruvec;
@@ -1583,13 +1586,14 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
 		unlock_page_lruvec_irq(locked_lruvec);
 	}
 
-	return lock_page_lruvec_irq(page);
+	return folio_lruvec_lock_irq(folio);
 }
 
 /* Don't lock again iff page's lruvec locked */
 static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
 		struct lruvec *locked_lruvec, unsigned long *flags)
 {
+	struct folio *folio = page_folio(page);
 	if (locked_lruvec) {
 		if (page_matches_lruvec(page, locked_lruvec))
 			return locked_lruvec;
@@ -1597,7 +1601,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
 		unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
 	}
 
-	return lock_page_lruvec_irqsave(page, flags);
+	return folio_lruvec_lock_irqsave(folio, flags);
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
diff --git a/mm/compaction.c b/mm/compaction.c
index 37dfdf2b2287..fbc60f964c38 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1032,7 +1032,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
 			locked = lruvec;
 
-			lruvec_memcg_debug(lruvec, page);
+			lruvec_memcg_debug(lruvec, page_folio(page));
 
 			/* Try get exclusive access under lock */
 			if (!skip_updated) {
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d49986a10d83..e5ea5f775d5c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2405,7 +2405,8 @@ static void __split_huge_page_tail(struct page *head, int tail,
 static void __split_huge_page(struct page *page, struct list_head *list,
 		pgoff_t end)
 {
-	struct page *head = compound_head(page);
+	struct folio *folio = page_folio(page);
+	struct page *head = &folio->page;
 	struct lruvec *lruvec;
 	struct address_space *swap_cache = NULL;
 	unsigned long offset = 0;
@@ -2424,7 +2425,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	}
 
 	/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
-	lruvec = lock_page_lruvec(head);
+	lruvec = folio_lruvec_lock(folio);
 
 	for (i = nr - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ea4f879d2771..8dab23a71fc4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1126,67 +1126,88 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 }
 
 #ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
+void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
 {
 	struct mem_cgroup *memcg;
 
 	if (mem_cgroup_disabled())
 		return;
 
-	memcg = page_memcg(page);
+	memcg = folio_memcg(folio);
 
 	if (!memcg)
-		VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page);
+		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio);
 	else
-		VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page);
+		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
 }
 #endif
 
 /**
- * lock_page_lruvec - lock and return lruvec for a given page.
- * @page: the page
+ * folio_lruvec_lock - Lock the lruvec for a folio.
+ * @folio: Pointer to the folio.
  *
  * These functions are safe to use under any of the following conditions:
- * - page locked
- * - PageLRU cleared
- * - lock_page_memcg()
- * - page->_refcount is zero
+ * - folio locked
+ * - folio_test_lru false
+ * - folio_memcg_lock()
+ * - folio frozen (refcount of 0)
+ *
+ * Return: The lruvec this folio is on with its lock held.
  */
-struct lruvec *lock_page_lruvec(struct page *page)
+struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = folio_lruvec(folio);
 
-	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
-
-	lruvec_memcg_debug(lruvec, page);
+	lruvec_memcg_debug(lruvec, folio);
 
 	return lruvec;
 }
 
-struct lruvec *lock_page_lruvec_irq(struct page *page)
+/**
+ * folio_lruvec_lock_irq - Lock the lruvec for a folio.
+ * @folio: Pointer to the folio.
+ *
+ * These functions are safe to use under any of the following conditions:
+ * - folio locked
+ * - folio_test_lru false
+ * - folio_memcg_lock()
+ * - folio frozen (refcount of 0)
+ *
+ * Return: The lruvec this folio is on with its lock held and interrupts
+ * disabled.
+ */
+struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = folio_lruvec(folio);
 
-	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
-
-	lruvec_memcg_debug(lruvec, page);
+	lruvec_memcg_debug(lruvec, folio);
 
 	return lruvec;
 }
 
-struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
+/**
+ * folio_lruvec_lock_irqsave - Lock the lruvec for a folio.
+ * @folio: Pointer to the folio.
+ * @flags: Pointer to irqsave flags.
+ *
+ * These functions are safe to use under any of the following conditions:
+ * - folio locked
+ * - folio_test_lru false
+ * - folio_memcg_lock()
+ * - folio frozen (refcount of 0)
+ *
+ * Return: The lruvec this folio is on with its lock held and interrupts
+ * disabled.
+ */
+struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
+		unsigned long *flags)
 {
-	struct folio *folio = page_folio(page);
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = folio_lruvec(folio);
 
-	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
-
-	lruvec_memcg_debug(lruvec, page);
+	lruvec_memcg_debug(lruvec, folio);
 
 	return lruvec;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 6aebd1747251..059556dbefec 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -34,7 +34,7 @@
  *                   mapping->private_lock (in __set_page_dirty_buffers)
  *                     lock_page_memcg move_lock (in __set_page_dirty_buffers)
  *                       i_pages lock (widely used)
- *                         lruvec->lru_lock (in lock_page_lruvec_irq)
+ *                         lruvec->lru_lock (in folio_lruvec_lock_irq)
  *                   inode->i_lock (in set_page_dirty's __mark_inode_dirty)
  *                   bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
  *                     sb_lock (within inode_lock in fs/fs-writeback.c)
diff --git a/mm/swap.c b/mm/swap.c
index 65a74c89e7cf..d1fc964def12 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -80,10 +80,11 @@ static DEFINE_PER_CPU(struct lru_pvecs, lru_pvecs) = {
 static void __page_cache_release(struct page *page)
 {
 	if (PageLRU(page)) {
+		struct folio *folio = page_folio(page);
 		struct lruvec *lruvec;
 		unsigned long flags;
 
-		lruvec = lock_page_lruvec_irqsave(page, &flags);
+		lruvec = folio_lruvec_lock_irqsave(folio, &flags);
 		del_page_from_lru_list(page, lruvec);
 		__clear_page_lru_flags(page);
 		unlock_page_lruvec_irqrestore(lruvec, flags);
@@ -350,11 +351,12 @@ static inline void activate_page_drain(int cpu)
 
 static void activate_page(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	struct lruvec *lruvec;
 
-	page = compound_head(page);
+	page = &folio->page;
 	if (TestClearPageLRU(page)) {
-		lruvec = lock_page_lruvec_irq(page);
+		lruvec = folio_lruvec_lock_irq(folio);
 		__activate_page(page, lruvec);
 		unlock_page_lruvec_irq(lruvec);
 		SetPageLRU(page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74296c2d1fed..8694e1549bcd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2090,6 +2090,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
  */
 int isolate_lru_page(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	int ret = -EBUSY;
 
 	VM_BUG_ON_PAGE(!page_count(page), page);
@@ -2099,7 +2100,7 @@ int isolate_lru_page(struct page *page)
 		struct lruvec *lruvec;
 
 		get_page(page);
-		lruvec = lock_page_lruvec_irq(page);
+		lruvec = folio_lruvec_lock_irq(folio);
 		del_page_from_lru_list(page, lruvec);
 		unlock_page_lruvec_irq(lruvec);
 		ret = 0;
-- 
cgit v1.2.3


From 0de340cbed3359423e38ed49242ac9d6986b5cfd Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 29 Jun 2021 22:27:31 -0400
Subject: mm/memcg: Add folio_lruvec_relock_irq() and
 folio_lruvec_relock_irqsave()

These are the folio equivalents of relock_page_lruvec_irq() and
folio_lruvec_relock_irqsave().  Also convert page_matches_lruvec()
to folio_matches_lruvec().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 17 ++++++++---------
 mm/mlock.c                 |  3 ++-
 mm/swap.c                  | 13 ++++++++-----
 mm/vmscan.c                |  5 +++--
 4 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 30d2cd7b5c9e..05094eaf1d61 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1568,19 +1568,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
 }
 
 /* Test requires a stable page->memcg binding, see page_memcg() */
-static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+static inline bool folio_matches_lruvec(struct folio *folio,
+		struct lruvec *lruvec)
 {
-	return lruvec_pgdat(lruvec) == page_pgdat(page) &&
-	       lruvec_memcg(lruvec) == page_memcg(page);
+	return lruvec_pgdat(lruvec) == folio_pgdat(folio) &&
+	       lruvec_memcg(lruvec) == folio_memcg(folio);
 }
 
 /* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irq(struct folio *folio,
 		struct lruvec *locked_lruvec)
 {
-	struct folio *folio = page_folio(page);
 	if (locked_lruvec) {
-		if (page_matches_lruvec(page, locked_lruvec))
+		if (folio_matches_lruvec(folio, locked_lruvec))
 			return locked_lruvec;
 
 		unlock_page_lruvec_irq(locked_lruvec);
@@ -1590,12 +1590,11 @@ static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
 }
 
 /* Don't lock again iff page's lruvec locked */
-static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
+static inline struct lruvec *folio_lruvec_relock_irqsave(struct folio *folio,
 		struct lruvec *locked_lruvec, unsigned long *flags)
 {
-	struct folio *folio = page_folio(page);
 	if (locked_lruvec) {
-		if (page_matches_lruvec(page, locked_lruvec))
+		if (folio_matches_lruvec(folio, locked_lruvec))
 			return locked_lruvec;
 
 		unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
diff --git a/mm/mlock.c b/mm/mlock.c
index 16d2ee160d43..e263d62ae2d0 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -271,6 +271,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 	/* Phase 1: page isolation */
 	for (i = 0; i < nr; i++) {
 		struct page *page = pvec->pages[i];
+		struct folio *folio = page_folio(page);
 
 		if (TestClearPageMlocked(page)) {
 			/*
@@ -278,7 +279,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
 			 * so we can spare the get_page() here.
 			 */
 			if (TestClearPageLRU(page)) {
-				lruvec = relock_page_lruvec_irq(page, lruvec);
+				lruvec = folio_lruvec_relock_irq(folio, lruvec);
 				del_page_from_lru_list(page, lruvec);
 				continue;
 			} else
diff --git a/mm/swap.c b/mm/swap.c
index d1fc964def12..57791ae80f2e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -189,12 +189,13 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
+		struct folio *folio = page_folio(page);
 
 		/* block memcg migration during page moving between lru */
 		if (!TestClearPageLRU(page))
 			continue;
 
-		lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
+		lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
 		(*move_fn)(page, lruvec);
 
 		SetPageLRU(page);
@@ -893,11 +894,12 @@ void release_pages(struct page **pages, int nr)
 	int i;
 	LIST_HEAD(pages_to_free);
 	struct lruvec *lruvec = NULL;
-	unsigned long flags;
+	unsigned long flags = 0;
 	unsigned int lock_batch;
 
 	for (i = 0; i < nr; i++) {
 		struct page *page = pages[i];
+		struct folio *folio = page_folio(page);
 
 		/*
 		 * Make sure the IRQ-safe lock-holding time does not get
@@ -909,7 +911,7 @@ void release_pages(struct page **pages, int nr)
 			lruvec = NULL;
 		}
 
-		page = compound_head(page);
+		page = &folio->page;
 		if (is_huge_zero_page(page))
 			continue;
 
@@ -948,7 +950,7 @@ void release_pages(struct page **pages, int nr)
 		if (PageLRU(page)) {
 			struct lruvec *prev_lruvec = lruvec;
 
-			lruvec = relock_page_lruvec_irqsave(page, lruvec,
+			lruvec = folio_lruvec_relock_irqsave(folio, lruvec,
 									&flags);
 			if (prev_lruvec != lruvec)
 				lock_batch = 0;
@@ -1052,8 +1054,9 @@ void __pagevec_lru_add(struct pagevec *pvec)
 
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
+		struct folio *folio = page_folio(page);
 
-		lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
+		lruvec = folio_lruvec_relock_irqsave(folio, lruvec, &flags);
 		__pagevec_lru_add_fn(page, lruvec);
 	}
 	if (lruvec)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8694e1549bcd..306229c4313f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2200,7 +2200,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec,
 		 * All pages were isolated from the same lruvec (and isolation
 		 * inhibits memcg migration).
 		 */
-		VM_BUG_ON_PAGE(!page_matches_lruvec(page, lruvec), page);
+		VM_BUG_ON_PAGE(!folio_matches_lruvec(page_folio(page), lruvec), page);
 		add_page_to_lru_list(page, lruvec);
 		nr_pages = thp_nr_pages(page);
 		nr_moved += nr_pages;
@@ -4666,6 +4666,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 
 	for (i = 0; i < pvec->nr; i++) {
 		struct page *page = pvec->pages[i];
+		struct folio *folio = page_folio(page);
 		int nr_pages;
 
 		if (PageTransTail(page))
@@ -4678,7 +4679,7 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 		if (!TestClearPageLRU(page))
 			continue;
 
-		lruvec = relock_page_lruvec_irq(page, lruvec);
+		lruvec = folio_lruvec_relock_irq(folio, lruvec);
 		if (page_evictable(page) && PageUnevictable(page)) {
 			del_page_from_lru_list(page, lruvec);
 			ClearPageUnevictable(page);
-- 
cgit v1.2.3


From c5ce619a77ce00d537ef512e7a823c99ce890a40 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 4 May 2021 17:19:13 -0400
Subject: mm/workingset: Convert workingset_activation to take a folio

This function already assumed it was being passed a head page.  No real
change here, except that thp_nr_pages() compiles away on kernels with
THP compiled out while folio_nr_pages() is always present.  Also convert
page_memcg_rcu() to folio_memcg_rcu().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h | 22 ++++++++++++----------
 include/linux/swap.h       |  2 +-
 mm/swap.c                  |  2 +-
 mm/workingset.c            | 11 ++++-------
 4 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 05094eaf1d61..7bd78c13d1fa 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -457,20 +457,22 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
 	return folio_memcg(page_folio(page));
 }
 
-/*
- * page_memcg_rcu - locklessly get the memory cgroup associated with a page
- * @page: a pointer to the page struct
+/**
+ * folio_memcg_rcu - Locklessly get the memory cgroup associated with a folio.
+ * @folio: Pointer to the folio.
  *
- * Returns a pointer to the memory cgroup associated with the page,
- * or NULL. This function assumes that the page is known to have a
+ * This function assumes that the folio is known to have a
  * proper memory cgroup pointer. It's not safe to call this function
- * against some type of pages, e.g. slab pages or ex-slab pages.
+ * against some type of folios, e.g. slab folios or ex-slab folios.
+ *
+ * Return: A pointer to the memory cgroup associated with the folio,
+ * or NULL.
  */
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 {
-	unsigned long memcg_data = READ_ONCE(page->memcg_data);
+	unsigned long memcg_data = READ_ONCE(folio->memcg_data);
 
-	VM_BUG_ON_PAGE(PageSlab(page), page);
+	VM_BUG_ON_FOLIO(folio_test_slab(folio), folio);
 	WARN_ON_ONCE(!rcu_read_lock_held());
 
 	if (memcg_data & MEMCG_DATA_KMEM) {
@@ -1158,7 +1160,7 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
 	return NULL;
 }
 
-static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
+static inline struct mem_cgroup *folio_memcg_rcu(struct folio *folio)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
 	return NULL;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c7ecd3ad8e2e..0fc84797623f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -330,7 +330,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
 void workingset_refault(struct page *page, void *shadow);
-void workingset_activation(struct page *page);
+void workingset_activation(struct folio *folio);
 
 /* Only track the nodes of mappings with shadow entries */
 void workingset_update_node(struct xa_node *node);
diff --git a/mm/swap.c b/mm/swap.c
index 57791ae80f2e..5c688897c013 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -429,7 +429,7 @@ void mark_page_accessed(struct page *page)
 		else
 			__lru_cache_activate_page(page);
 		ClearPageReferenced(page);
-		workingset_activation(page);
+		workingset_activation(page_folio(page));
 	}
 	if (page_is_idle(page))
 		clear_page_idle(page);
diff --git a/mm/workingset.c b/mm/workingset.c
index 3deb408a240d..1c96ed525a0e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -393,13 +393,11 @@ out:
 
 /**
  * workingset_activation - note a page activation
- * @page: page that is being activated
+ * @folio: Folio that is being activated.
  */
-void workingset_activation(struct page *page)
+void workingset_activation(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
 	struct mem_cgroup *memcg;
-	struct lruvec *lruvec;
 
 	rcu_read_lock();
 	/*
@@ -409,11 +407,10 @@ void workingset_activation(struct page *page)
 	 * XXX: See workingset_refault() - this should return
 	 * root_mem_cgroup even for !CONFIG_MEMCG.
 	 */
-	memcg = page_memcg_rcu(page);
+	memcg = folio_memcg_rcu(folio);
 	if (!mem_cgroup_disabled() && !memcg)
 		goto out;
-	lruvec = folio_lruvec(folio);
-	workingset_age_nonresident(lruvec, thp_nr_pages(page));
+	workingset_age_nonresident(folio_lruvec(folio), folio_nr_pages(folio));
 out:
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From bf6bd276b374d44f6e7146d52aa6097eb91384a3 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 7 May 2021 10:55:27 -0400
Subject: mm: Add folio_pfn()

This is the folio equivalent of page_to_pfn().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 04e41d4d85ea..47143f3e7f0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1624,6 +1624,20 @@ static inline unsigned long page_to_section(const struct page *page)
 }
 #endif
 
+/**
+ * folio_pfn - Return the Page Frame Number of a folio.
+ * @folio: The folio.
+ *
+ * A folio may contain multiple pages.  The pages have consecutive
+ * Page Frame Numbers.
+ *
+ * Return: The Page Frame Number of the first page in the folio.
+ */
+static inline unsigned long folio_pfn(struct folio *folio)
+{
+	return page_to_pfn(&folio->page);
+}
+
 /* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
 #ifdef CONFIG_MIGRATION
 static inline bool is_pinnable_page(struct page *page)
-- 
cgit v1.2.3


From 2a5a8fa8b23144d14567d6f8293dd6fbeecee393 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Sep 2021 18:16:01 +0200
Subject: leds: trigger: use RCU to protect the led_cdevs list

Even with the previous commit 27af8e2c90fb
("leds: trigger: fix potential deadlock with libata")
to this file, we still get lockdep unhappy, and Boqun
explained the report here:
https://lore.kernel.org/r/YNA+d1X4UkoQ7g8a@boqun-archlinux

Effectively, this means that the read_lock_irqsave() isn't
enough here because another CPU might be trying to do a
write lock, and thus block the readers.

This is all pretty messy, but it doesn't seem right that
the LEDs framework imposes some locking requirements on
users, in particular we'd have to make the spinlock in the
iwlwifi driver always disable IRQs, even if we don't need
that for any other reason, just to avoid this deadlock.

Since writes to the led_cdevs list are rare (and are done
by userspace), just switch the list to RCU. This costs a
synchronize_rcu() at removal time so we can ensure things
are correct, but that seems like a small price to pay for
getting lock-free iterations and no deadlocks (nor any
locking requirements imposed on users.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/led-triggers.c | 41 +++++++++++++++++++++--------------------
 include/linux/leds.h        |  2 +-
 2 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index 4e7b78a84149..072491d3e17b 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -157,7 +157,6 @@ EXPORT_SYMBOL_GPL(led_trigger_read);
 /* Caller must ensure led_cdev->trigger_lock held */
 int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 {
-	unsigned long flags;
 	char *event = NULL;
 	char *envp[2];
 	const char *name;
@@ -171,10 +170,13 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 
 	/* Remove any existing trigger */
 	if (led_cdev->trigger) {
-		write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
-		list_del(&led_cdev->trig_list);
-		write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock,
-			flags);
+		spin_lock(&led_cdev->trigger->leddev_list_lock);
+		list_del_rcu(&led_cdev->trig_list);
+		spin_unlock(&led_cdev->trigger->leddev_list_lock);
+
+		/* ensure it's no longer visible on the led_cdevs list */
+		synchronize_rcu();
+
 		cancel_work_sync(&led_cdev->set_brightness_work);
 		led_stop_software_blink(led_cdev);
 		if (led_cdev->trigger->deactivate)
@@ -186,9 +188,9 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig)
 		led_set_brightness(led_cdev, LED_OFF);
 	}
 	if (trig) {
-		write_lock_irqsave(&trig->leddev_list_lock, flags);
-		list_add_tail(&led_cdev->trig_list, &trig->led_cdevs);
-		write_unlock_irqrestore(&trig->leddev_list_lock, flags);
+		spin_lock(&trig->leddev_list_lock);
+		list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs);
+		spin_unlock(&trig->leddev_list_lock);
 		led_cdev->trigger = trig;
 
 		if (trig->activate)
@@ -223,9 +225,10 @@ err_add_groups:
 		trig->deactivate(led_cdev);
 err_activate:
 
-	write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags);
-	list_del(&led_cdev->trig_list);
-	write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags);
+	spin_lock(&led_cdev->trigger->leddev_list_lock);
+	list_del_rcu(&led_cdev->trig_list);
+	spin_unlock(&led_cdev->trigger->leddev_list_lock);
+	synchronize_rcu();
 	led_cdev->trigger = NULL;
 	led_cdev->trigger_data = NULL;
 	led_set_brightness(led_cdev, LED_OFF);
@@ -285,7 +288,7 @@ int led_trigger_register(struct led_trigger *trig)
 	struct led_classdev *led_cdev;
 	struct led_trigger *_trig;
 
-	rwlock_init(&trig->leddev_list_lock);
+	spin_lock_init(&trig->leddev_list_lock);
 	INIT_LIST_HEAD(&trig->led_cdevs);
 
 	down_write(&triggers_list_lock);
@@ -378,15 +381,14 @@ void led_trigger_event(struct led_trigger *trig,
 			enum led_brightness brightness)
 {
 	struct led_classdev *led_cdev;
-	unsigned long flags;
 
 	if (!trig)
 		return;
 
-	read_lock_irqsave(&trig->leddev_list_lock, flags);
-	list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list)
 		led_set_brightness(led_cdev, brightness);
-	read_unlock_irqrestore(&trig->leddev_list_lock, flags);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(led_trigger_event);
 
@@ -397,20 +399,19 @@ static void led_trigger_blink_setup(struct led_trigger *trig,
 			     int invert)
 {
 	struct led_classdev *led_cdev;
-	unsigned long flags;
 
 	if (!trig)
 		return;
 
-	read_lock_irqsave(&trig->leddev_list_lock, flags);
-	list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) {
 		if (oneshot)
 			led_blink_set_oneshot(led_cdev, delay_on, delay_off,
 					      invert);
 		else
 			led_blink_set(led_cdev, delay_on, delay_off);
 	}
-	read_unlock_irqrestore(&trig->leddev_list_lock, flags);
+	rcu_read_unlock();
 }
 
 void led_trigger_blink(struct led_trigger *trig,
diff --git a/include/linux/leds.h b/include/linux/leds.h
index a0b730be40ad..ba4861ec73d3 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -360,7 +360,7 @@ struct led_trigger {
 	struct led_hw_trigger_type *trigger_type;
 
 	/* LEDs under control by this trigger (for simple triggers) */
-	rwlock_t	  leddev_list_lock;
+	spinlock_t	  leddev_list_lock;
 	struct list_head  led_cdevs;
 
 	/* Link to next registered trigger */
-- 
cgit v1.2.3


From 4795448117824cc4900d696f17d1516c56371045 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 18 Sep 2021 14:53:44 +0200
Subject: PCI: ACPI: Drop acpi_pci_bus

The acpi_pci_bus structure was used primarily for running
acpi_pci_find_companion() during PCI device objects registration,
but after commit 375553a93201 ("PCI: Setup ACPI fwnode early and at
the same time with OF") that function is called by pci_setup_device()
via pci_set_acpi_fwnode(), which happens before calling
pci_device_add() on the new PCI device object, so its ACPI companion
has been set already when acpi_device_notify() runs and it will never
call ->find_companion() from acpi_pci_bus.

For this reason, modify acpi_device_notify() and
acpi_device_notify_remove() to call pci_acpi_setup() and
pci_acpi_cleanup(), respectively, directly on PCI device objects
and drop acpi_pci_bus altogether.

While at it, notice that pci_acpi_setup() and pci_acpi_cleanup()
can obtain the ACPI companion pointer, which is guaranteed to not
be NULL, from their callers and modify them to work that way so
as to reduce the number of redundant checks somewhat.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Ferry Toth <fntoth@gmail.com>
---
 drivers/acpi/glue.c      | 34 ++++++++++++++++++++++------------
 drivers/pci/pci-acpi.c   | 31 +++----------------------------
 include/linux/pci-acpi.h |  8 ++++++++
 3 files changed, 33 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 7a33a6d985f8..68768d6c8a47 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -17,6 +17,8 @@
 #include <linux/rwsem.h>
 #include <linux/acpi.h>
 #include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/pci-acpi.h>
 #include <linux/platform_device.h>
 
 #include "internal.h"
@@ -307,13 +309,17 @@ void acpi_device_notify(struct device *dev)
 	}
 	adev = ACPI_COMPANION(dev);
 
-	if (dev_is_platform(dev))
-		acpi_configure_pmsi_domain(dev);
+	if (dev_is_pci(dev)) {
+		pci_acpi_setup(dev, adev);
+	} else {
+		if (dev_is_platform(dev))
+			acpi_configure_pmsi_domain(dev);
 
-	if (type && type->setup)
-		type->setup(dev);
-	else if (adev->handler && adev->handler->bind)
-		adev->handler->bind(dev);
+		if (type && type->setup)
+			type->setup(dev);
+		else if (adev->handler && adev->handler->bind)
+			adev->handler->bind(dev);
+	}
 
 	acpi_handle_debug(ACPI_HANDLE(dev), "Bound to device %s\n",
 			  dev_name(dev));
@@ -327,16 +333,20 @@ err:
 void acpi_device_notify_remove(struct device *dev)
 {
 	struct acpi_device *adev = ACPI_COMPANION(dev);
-	struct acpi_bus_type *type;
 
 	if (!adev)
 		return;
 
-	type = acpi_get_bus_type(dev);
-	if (type && type->cleanup)
-		type->cleanup(dev);
-	else if (adev->handler && adev->handler->unbind)
-		adev->handler->unbind(dev);
+	if (dev_is_pci(dev)) {
+		pci_acpi_cleanup(dev, adev);
+	} else {
+		struct acpi_bus_type *type = acpi_get_bus_type(dev);
+
+		if (type && type->cleanup)
+			type->cleanup(dev);
+		else if (adev->handler && adev->handler->unbind)
+			adev->handler->unbind(dev);
+	}
 
 	acpi_unbind_one(dev);
 }
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index 0f40943a9a18..d6720ebb252d 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -1353,13 +1353,9 @@ static void pci_acpi_set_external_facing(struct pci_dev *dev)
 		dev->external_facing = 1;
 }
 
-static void pci_acpi_setup(struct device *dev)
+void pci_acpi_setup(struct device *dev, struct acpi_device *adev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-
-	if (!adev)
-		return;
 
 	pci_acpi_optimize_delay(pci_dev, adev->handle);
 	pci_acpi_set_external_facing(pci_dev);
@@ -1383,14 +1379,10 @@ static void pci_acpi_setup(struct device *dev)
 	acpi_device_power_add_dependent(adev, dev);
 }
 
-static void pci_acpi_cleanup(struct device *dev)
+void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev)
 {
-	struct acpi_device *adev = ACPI_COMPANION(dev);
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 
-	if (!adev)
-		return;
-
 	pci_acpi_remove_edr_notifier(pci_dev);
 	pci_acpi_remove_pm_notifier(adev);
 	if (adev->wakeup.flags.valid) {
@@ -1402,20 +1394,6 @@ static void pci_acpi_cleanup(struct device *dev)
 	}
 }
 
-static bool pci_acpi_bus_match(struct device *dev)
-{
-	return dev_is_pci(dev);
-}
-
-static struct acpi_bus_type acpi_pci_bus = {
-	.name = "PCI",
-	.match = pci_acpi_bus_match,
-	.find_companion = acpi_pci_find_companion,
-	.setup = pci_acpi_setup,
-	.cleanup = pci_acpi_cleanup,
-};
-
-
 static struct fwnode_handle *(*pci_msi_get_fwnode_cb)(struct device *dev);
 
 /**
@@ -1457,8 +1435,6 @@ struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus)
 
 static int __init acpi_pci_init(void)
 {
-	int ret;
-
 	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_MSI) {
 		pr_info("ACPI FADT declares the system doesn't support MSI, so disable it\n");
 		pci_no_msi();
@@ -1469,8 +1445,7 @@ static int __init acpi_pci_init(void)
 		pcie_no_aspm();
 	}
 
-	ret = register_acpi_bus_type(&acpi_pci_bus);
-	if (ret)
+	if (acpi_pci_disabled)
 		return 0;
 
 	pci_set_platform_pm(&acpi_pci_platform_pm);
diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index f16de399d2de..078225b514d4 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -84,6 +84,14 @@ extern struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
 void acpi_pci_add_bus(struct pci_bus *bus);
 void acpi_pci_remove_bus(struct pci_bus *bus);
 
+#ifdef CONFIG_PCI
+void pci_acpi_setup(struct device *dev, struct acpi_device *adev);
+void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev);
+#else
+static inline void pci_acpi_setup(struct device *dev, struct acpi_device *adev) {}
+static inline void pci_acpi_cleanup(struct device *dev, struct acpi_device *adev) {}
+#endif
+
 #ifdef	CONFIG_ACPI_PCI_SLOT
 void acpi_pci_slot_init(void);
 void acpi_pci_slot_enumerate(struct pci_bus *bus);
-- 
cgit v1.2.3


From e765f13ed126fe7e41d1a6e3c60d754cd6c2af93 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 22 Sep 2021 19:34:30 +0200
Subject: nvdimm/pmem: move dax_attribute_group from dax to pmem

dax_attribute_group is only used by the pmem driver, and can avoid the
completely pointless lookup by the disk name if moved there.  This
leaves just a single caller of dax_get_by_host, so move dax_get_by_host
into the same ifdef block as that caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20210922173431.2454024-3-hch@lst.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c   | 100 +++++++++-----------------------------------------
 drivers/nvdimm/pmem.c |  43 ++++++++++++++++++++++
 include/linux/dax.h   |   2 -
 3 files changed, 61 insertions(+), 84 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index fc89e91beea7..b882cf8106ea 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -63,6 +63,24 @@ static int dax_host_hash(const char *host)
 	return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
 }
 
+#ifdef CONFIG_BLOCK
+#include <linux/blkdev.h>
+
+int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
+		pgoff_t *pgoff)
+{
+	sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
+	phys_addr_t phys_off = (start_sect + sector) * 512;
+
+	if (pgoff)
+		*pgoff = PHYS_PFN(phys_off);
+	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
+		return -EINVAL;
+	return 0;
+}
+EXPORT_SYMBOL(bdev_dax_pgoff);
+
+#if IS_ENABLED(CONFIG_FS_DAX)
 /**
  * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
  * @host: alternate name for the device registered by a dax driver
@@ -94,24 +112,6 @@ static struct dax_device *dax_get_by_host(const char *host)
 	return found;
 }
 
-#ifdef CONFIG_BLOCK
-#include <linux/blkdev.h>
-
-int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
-		pgoff_t *pgoff)
-{
-	sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
-	phys_addr_t phys_off = (start_sect + sector) * 512;
-
-	if (pgoff)
-		*pgoff = PHYS_PFN(phys_off);
-	if (phys_off % PAGE_SIZE || size % PAGE_SIZE)
-		return -EINVAL;
-	return 0;
-}
-EXPORT_SYMBOL(bdev_dax_pgoff);
-
-#if IS_ENABLED(CONFIG_FS_DAX)
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	if (!blk_queue_dax(bdev->bd_disk->queue))
@@ -231,70 +231,6 @@ enum dax_device_flags {
 	DAXDEV_SYNC,
 };
 
-static ssize_t write_cache_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
-	ssize_t rc;
-
-	WARN_ON_ONCE(!dax_dev);
-	if (!dax_dev)
-		return -ENXIO;
-
-	rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev));
-	put_dax(dax_dev);
-	return rc;
-}
-
-static ssize_t write_cache_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
-{
-	bool write_cache;
-	int rc = strtobool(buf, &write_cache);
-	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
-
-	WARN_ON_ONCE(!dax_dev);
-	if (!dax_dev)
-		return -ENXIO;
-
-	if (rc)
-		len = rc;
-	else
-		dax_write_cache(dax_dev, write_cache);
-
-	put_dax(dax_dev);
-	return len;
-}
-static DEVICE_ATTR_RW(write_cache);
-
-static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
-{
-	struct device *dev = container_of(kobj, typeof(*dev), kobj);
-	struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
-
-	WARN_ON_ONCE(!dax_dev);
-	if (!dax_dev)
-		return 0;
-
-#ifndef CONFIG_ARCH_HAS_PMEM_API
-	if (a == &dev_attr_write_cache.attr)
-		return 0;
-#endif
-	return a->mode;
-}
-
-static struct attribute *dax_attributes[] = {
-	&dev_attr_write_cache.attr,
-	NULL,
-};
-
-struct attribute_group dax_attribute_group = {
-	.name = "dax",
-	.attrs = dax_attributes,
-	.is_visible = dax_visible,
-};
-EXPORT_SYMBOL_GPL(dax_attribute_group);
-
 /**
  * dax_direct_access() - translate a device pgoff to an absolute pfn
  * @dax_dev: a dax_device instance representing the logical memory range
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 72de88ff0d30..bccbc95f6d70 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -328,6 +328,49 @@ static const struct dax_operations pmem_dax_ops = {
 	.zero_page_range = pmem_dax_zero_page_range,
 };
 
+static ssize_t write_cache_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct pmem_device *pmem = dev_to_disk(dev)->private_data;
+
+	return sprintf(buf, "%d\n", !!dax_write_cache_enabled(pmem->dax_dev));
+}
+
+static ssize_t write_cache_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct pmem_device *pmem = dev_to_disk(dev)->private_data;
+	bool write_cache;
+	int rc;
+
+	rc = strtobool(buf, &write_cache);
+	if (rc)
+		return rc;
+	dax_write_cache(pmem->dax_dev, write_cache);
+	return len;
+}
+static DEVICE_ATTR_RW(write_cache);
+
+static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+#ifndef CONFIG_ARCH_HAS_PMEM_API
+	if (a == &dev_attr_write_cache.attr)
+		return 0;
+#endif
+	return a->mode;
+}
+
+static struct attribute *dax_attributes[] = {
+	&dev_attr_write_cache.attr,
+	NULL,
+};
+
+static const struct attribute_group dax_attribute_group = {
+	.name		= "dax",
+	.attrs		= dax_attributes,
+	.is_visible	= dax_visible,
+};
+
 static const struct attribute_group *pmem_attribute_groups[] = {
 	&dax_attribute_group,
 	NULL,
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 2619d94c308d..8623caa67388 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,8 +38,6 @@ struct dax_operations {
 	int (*zero_page_range)(struct dax_device *, pgoff_t, size_t);
 };
 
-extern struct attribute_group dax_attribute_group;
-
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *alloc_dax(void *private, const char *host,
 		const struct dax_operations *ops, unsigned long flags);
-- 
cgit v1.2.3


From 7b4d7894c65bd1e83b2f021d7944a46bdd64836a Mon Sep 17 00:00:00 2001
From: Deepak Kumar Singh <deesin@codeaurora.org>
Date: Tue, 31 Aug 2021 20:00:27 +0530
Subject: soc: qcom: aoss: Expose send for generic usecase

Not all upcoming usecases will have an interface to allow the aoss
driver to hook onto. Expose the send api and create a get function to
enable drivers to send their own messages to aoss.

Signed-off-by: Chris Lew <clew@codeaurora.org>
Signed-off-by: Deepak Kumar Singh <deesin@codeaurora.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/1630420228-31075-2-git-send-email-deesin@codeaurora.org
---
 drivers/soc/qcom/qcom_aoss.c       | 54 +++++++++++++++++++++++++++++++++++++-
 include/linux/soc/qcom/qcom_aoss.h | 38 +++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/soc/qcom/qcom_aoss.h

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c
index 536c3e4114fb..bb336cc8df6f 100644
--- a/drivers/soc/qcom/qcom_aoss.c
+++ b/drivers/soc/qcom/qcom_aoss.c
@@ -8,10 +8,12 @@
 #include <linux/io.h>
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/thermal.h>
 #include <linux/slab.h>
+#include <linux/soc/qcom/qcom_aoss.h>
 
 #define QMP_DESC_MAGIC			0x0
 #define QMP_DESC_VERSION		0x4
@@ -223,11 +225,14 @@ static bool qmp_message_empty(struct qmp *qmp)
  *
  * Return: 0 on success, negative errno on failure
  */
-static int qmp_send(struct qmp *qmp, const void *data, size_t len)
+int qmp_send(struct qmp *qmp, const void *data, size_t len)
 {
 	long time_left;
 	int ret;
 
+	if (WARN_ON(IS_ERR_OR_NULL(qmp) || !data))
+		return -EINVAL;
+
 	if (WARN_ON(len + sizeof(u32) > qmp->size))
 		return -EINVAL;
 
@@ -261,6 +266,7 @@ static int qmp_send(struct qmp *qmp, const void *data, size_t len)
 
 	return ret;
 }
+EXPORT_SYMBOL(qmp_send);
 
 static int qmp_qdss_clk_prepare(struct clk_hw *hw)
 {
@@ -519,6 +525,51 @@ static void qmp_cooling_devices_remove(struct qmp *qmp)
 		thermal_cooling_device_unregister(qmp->cooling_devs[i].cdev);
 }
 
+/**
+ * qmp_get() - get a qmp handle from a device
+ * @dev: client device pointer
+ *
+ * Return: handle to qmp device on success, ERR_PTR() on failure
+ */
+struct qmp *qmp_get(struct device *dev)
+{
+	struct platform_device *pdev;
+	struct device_node *np;
+	struct qmp *qmp;
+
+	if (!dev || !dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	np = of_parse_phandle(dev->of_node, "qcom,qmp", 0);
+	if (!np)
+		return ERR_PTR(-ENODEV);
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+	if (!pdev)
+		return ERR_PTR(-EINVAL);
+
+	qmp = platform_get_drvdata(pdev);
+
+	return qmp ? qmp : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL(qmp_get);
+
+/**
+ * qmp_put() - release a qmp handle
+ * @qmp: qmp handle obtained from qmp_get()
+ */
+void qmp_put(struct qmp *qmp)
+{
+	/*
+	 * Match get_device() inside of_find_device_by_node() in
+	 * qmp_get()
+	 */
+	if (!IS_ERR_OR_NULL(qmp))
+		put_device(qmp->dev);
+}
+EXPORT_SYMBOL(qmp_put);
+
 static int qmp_probe(struct platform_device *pdev)
 {
 	struct resource *res;
@@ -615,6 +666,7 @@ static struct platform_driver qmp_driver = {
 	.driver = {
 		.name		= "qcom_aoss_qmp",
 		.of_match_table	= qmp_dt_match,
+		.suppress_bind_attrs = true,
 	},
 	.probe = qmp_probe,
 	.remove	= qmp_remove,
diff --git a/include/linux/soc/qcom/qcom_aoss.h b/include/linux/soc/qcom/qcom_aoss.h
new file mode 100644
index 000000000000..3c2a82e606f8
--- /dev/null
+++ b/include/linux/soc/qcom/qcom_aoss.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef __QCOM_AOSS_H__
+#define __QCOM_AOSS_H__
+
+#include <linux/err.h>
+#include <linux/device.h>
+
+struct qmp;
+
+#if IS_ENABLED(CONFIG_QCOM_AOSS_QMP)
+
+int qmp_send(struct qmp *qmp, const void *data, size_t len);
+struct qmp *qmp_get(struct device *dev);
+void qmp_put(struct qmp *qmp);
+
+#else
+
+static inline int qmp_send(struct qmp *qmp, const void *data, size_t len)
+{
+	return -ENODEV;
+}
+
+static inline struct qmp *qmp_get(struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+
+static inline void qmp_put(struct qmp *qmp)
+{
+}
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 99139b80c1b3d73026ed8be2de42c52e2976ab64 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 27 Sep 2021 14:55:40 +0100
Subject: soc: qcom: apr: make code more reuseable

APR and other packet routers like GPR are pretty much same and
interact with other drivers in similar way.

Ex: GPR ports can be considered as APR services, only difference
is they are allocated dynamically.

Other difference is packet layout, which should not matter
with the apis abstracted. Apart from this the rest of the
functionality is pretty much identical across APR and GPR.

Make the apr code more reusable by abstracting it service level,
rather than device level so that we do not need to write
new drivers for other new packet routers like GPR.

This patch is in preparation to add GPR support to this driver.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210927135559.738-4-srinivas.kandagatla@linaro.org
---
 drivers/soc/qcom/apr.c       | 129 ++++++++++++++++++++++++++-----------------
 include/linux/soc/qcom/apr.h |  12 +++-
 2 files changed, 90 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index 475a57b435b2..bfad71e540ad 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -15,13 +15,18 @@
 #include <linux/rpmsg.h>
 #include <linux/of.h>
 
-struct apr {
+enum {
+	PR_TYPE_APR = 0,
+};
+
+struct packet_router {
 	struct rpmsg_endpoint *ch;
 	struct device *dev;
 	spinlock_t svcs_lock;
 	spinlock_t rx_lock;
 	struct idr svcs_idr;
 	int dest_domain_id;
+	int type;
 	struct pdr_handle *pdr;
 	struct workqueue_struct *rxwq;
 	struct work_struct rx_work;
@@ -44,21 +49,21 @@ struct apr_rx_buf {
  */
 int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt)
 {
-	struct apr *apr = dev_get_drvdata(adev->dev.parent);
+	struct packet_router *apr = dev_get_drvdata(adev->dev.parent);
 	struct apr_hdr *hdr;
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&adev->lock, flags);
+	spin_lock_irqsave(&adev->svc.lock, flags);
 
 	hdr = &pkt->hdr;
 	hdr->src_domain = APR_DOMAIN_APPS;
-	hdr->src_svc = adev->svc_id;
+	hdr->src_svc = adev->svc.id;
 	hdr->dest_domain = adev->domain_id;
-	hdr->dest_svc = adev->svc_id;
+	hdr->dest_svc = adev->svc.id;
 
 	ret = rpmsg_trysend(apr->ch, pkt, hdr->pkt_size);
-	spin_unlock_irqrestore(&adev->lock, flags);
+	spin_unlock_irqrestore(&adev->svc.lock, flags);
 
 	return ret ? ret : hdr->pkt_size;
 }
@@ -74,7 +79,7 @@ static void apr_dev_release(struct device *dev)
 static int apr_callback(struct rpmsg_device *rpdev, void *buf,
 				  int len, void *priv, u32 addr)
 {
-	struct apr *apr = dev_get_drvdata(&rpdev->dev);
+	struct packet_router *apr = dev_get_drvdata(&rpdev->dev);
 	struct apr_rx_buf *abuf;
 	unsigned long flags;
 
@@ -100,11 +105,11 @@ static int apr_callback(struct rpmsg_device *rpdev, void *buf,
 	return 0;
 }
 
-
-static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
+static int apr_do_rx_callback(struct packet_router *apr, struct apr_rx_buf *abuf)
 {
 	uint16_t hdr_size, msg_type, ver, svc_id;
-	struct apr_device *svc = NULL;
+	struct pkt_router_svc *svc;
+	struct apr_device *adev;
 	struct apr_driver *adrv = NULL;
 	struct apr_resp_pkt resp;
 	struct apr_hdr *hdr;
@@ -145,12 +150,15 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
 	svc_id = hdr->dest_svc;
 	spin_lock_irqsave(&apr->svcs_lock, flags);
 	svc = idr_find(&apr->svcs_idr, svc_id);
-	if (svc && svc->dev.driver)
-		adrv = to_apr_driver(svc->dev.driver);
+	if (svc && svc->dev->driver) {
+		adev = svc_to_apr_device(svc);
+		adrv = to_apr_driver(adev->dev.driver);
+	}
 	spin_unlock_irqrestore(&apr->svcs_lock, flags);
 
-	if (!adrv) {
-		dev_err(apr->dev, "APR: service is not registered\n");
+	if (!adrv || !adev) {
+		dev_err(apr->dev, "APR: service is not registered (%d)\n",
+			svc_id);
 		return -EINVAL;
 	}
 
@@ -164,20 +172,26 @@ static int apr_do_rx_callback(struct apr *apr, struct apr_rx_buf *abuf)
 	if (resp.payload_size > 0)
 		resp.payload = buf + hdr_size;
 
-	adrv->callback(svc, &resp);
+	adrv->callback(adev, &resp);
 
 	return 0;
 }
 
 static void apr_rxwq(struct work_struct *work)
 {
-	struct apr *apr = container_of(work, struct apr, rx_work);
+	struct packet_router *apr = container_of(work, struct packet_router, rx_work);
 	struct apr_rx_buf *abuf, *b;
 	unsigned long flags;
 
 	if (!list_empty(&apr->rx_list)) {
 		list_for_each_entry_safe(abuf, b, &apr->rx_list, node) {
-			apr_do_rx_callback(apr, abuf);
+			switch (apr->type) {
+			case PR_TYPE_APR:
+				apr_do_rx_callback(apr, abuf);
+				break;
+			default:
+				break;
+			}
 			spin_lock_irqsave(&apr->rx_lock, flags);
 			list_del(&abuf->node);
 			spin_unlock_irqrestore(&apr->rx_lock, flags);
@@ -201,7 +215,7 @@ static int apr_device_match(struct device *dev, struct device_driver *drv)
 
 	while (id->domain_id != 0 || id->svc_id != 0) {
 		if (id->domain_id == adev->domain_id &&
-		    id->svc_id == adev->svc_id)
+		    id->svc_id == adev->svc.id)
 			return 1;
 		id++;
 	}
@@ -221,14 +235,14 @@ static void apr_device_remove(struct device *dev)
 {
 	struct apr_device *adev = to_apr_device(dev);
 	struct apr_driver *adrv;
-	struct apr *apr = dev_get_drvdata(adev->dev.parent);
+	struct packet_router *apr = dev_get_drvdata(adev->dev.parent);
 
 	if (dev->driver) {
 		adrv = to_apr_driver(dev->driver);
 		if (adrv->remove)
 			adrv->remove(adev);
 		spin_lock(&apr->svcs_lock);
-		idr_remove(&apr->svcs_idr, adev->svc_id);
+		idr_remove(&apr->svcs_idr, adev->svc.id);
 		spin_unlock(&apr->svcs_lock);
 	}
 }
@@ -255,28 +269,39 @@ struct bus_type aprbus = {
 EXPORT_SYMBOL_GPL(aprbus);
 
 static int apr_add_device(struct device *dev, struct device_node *np,
-			  const struct apr_device_id *id)
+			  u32 svc_id, u32 domain_id)
 {
-	struct apr *apr = dev_get_drvdata(dev);
+	struct packet_router *apr = dev_get_drvdata(dev);
 	struct apr_device *adev = NULL;
+	struct pkt_router_svc *svc;
 	int ret;
 
 	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
 	if (!adev)
 		return -ENOMEM;
 
-	spin_lock_init(&adev->lock);
+	adev->svc_id = svc_id;
+	svc = &adev->svc;
+
+	svc->id = svc_id;
+	svc->pr = apr;
+	svc->priv = adev;
+	svc->dev = dev;
+	spin_lock_init(&svc->lock);
+
+	adev->domain_id = domain_id;
 
-	adev->svc_id = id->svc_id;
-	adev->domain_id = id->domain_id;
-	adev->version = id->svc_version;
 	if (np)
 		snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np);
-	else
-		strscpy(adev->name, id->name, APR_NAME_SIZE);
 
-	dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
-		     id->domain_id, id->svc_id);
+	switch (apr->type) {
+	case PR_TYPE_APR:
+		dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
+			     domain_id, svc_id);
+		break;
+	default:
+		break;
+	}
 
 	adev->dev.bus = &aprbus;
 	adev->dev.parent = dev;
@@ -285,8 +310,7 @@ static int apr_add_device(struct device *dev, struct device_node *np,
 	adev->dev.driver = NULL;
 
 	spin_lock(&apr->svcs_lock);
-	idr_alloc(&apr->svcs_idr, adev, id->svc_id,
-		  id->svc_id + 1, GFP_ATOMIC);
+	idr_alloc(&apr->svcs_idr, svc, svc_id, svc_id + 1, GFP_ATOMIC);
 	spin_unlock(&apr->svcs_lock);
 
 	of_property_read_string_index(np, "qcom,protection-domain",
@@ -306,7 +330,7 @@ static int apr_add_device(struct device *dev, struct device_node *np,
 static int of_apr_add_pd_lookups(struct device *dev)
 {
 	const char *service_name, *service_path;
-	struct apr *apr = dev_get_drvdata(dev);
+	struct packet_router *apr = dev_get_drvdata(dev);
 	struct device_node *node;
 	struct pdr_service *pds;
 	int ret;
@@ -336,13 +360,14 @@ static int of_apr_add_pd_lookups(struct device *dev)
 
 static void of_register_apr_devices(struct device *dev, const char *svc_path)
 {
-	struct apr *apr = dev_get_drvdata(dev);
+	struct packet_router *apr = dev_get_drvdata(dev);
 	struct device_node *node;
 	const char *service_path;
 	int ret;
 
 	for_each_child_of_node(dev->of_node, node) {
-		struct apr_device_id id = { {0} };
+		u32 svc_id;
+		u32 domain_id;
 
 		/*
 		 * This function is called with svc_path NULL during
@@ -372,13 +397,13 @@ static void of_register_apr_devices(struct device *dev, const char *svc_path)
 				continue;
 		}
 
-		if (of_property_read_u32(node, "reg", &id.svc_id))
+		if (of_property_read_u32(node, "reg", &svc_id))
 			continue;
 
-		id.domain_id = apr->dest_domain_id;
+		domain_id = apr->dest_domain_id;
 
-		if (apr_add_device(dev, node, &id))
-			dev_err(dev, "Failed to add apr %d svc\n", id.svc_id);
+		if (apr_add_device(dev, node, svc_id, domain_id))
+			dev_err(dev, "Failed to add apr %d svc\n", svc_id);
 	}
 }
 
@@ -398,7 +423,7 @@ static int apr_remove_device(struct device *dev, void *svc_path)
 
 static void apr_pd_status(int state, char *svc_path, void *priv)
 {
-	struct apr *apr = (struct apr *)priv;
+	struct packet_router *apr = (struct packet_router *)priv;
 
 	switch (state) {
 	case SERVREG_SERVICE_STATE_UP:
@@ -413,16 +438,20 @@ static void apr_pd_status(int state, char *svc_path, void *priv)
 static int apr_probe(struct rpmsg_device *rpdev)
 {
 	struct device *dev = &rpdev->dev;
-	struct apr *apr;
+	struct packet_router *apr;
 	int ret;
 
 	apr = devm_kzalloc(dev, sizeof(*apr), GFP_KERNEL);
 	if (!apr)
 		return -ENOMEM;
 
-	ret = of_property_read_u32(dev->of_node, "qcom,apr-domain", &apr->dest_domain_id);
+	ret = of_property_read_u32(dev->of_node, "qcom,domain", &apr->dest_domain_id);
+	if (ret) /* try deprecated apr-domain property */
+		ret = of_property_read_u32(dev->of_node, "qcom,apr-domain",
+					   &apr->dest_domain_id);
+	apr->type = PR_TYPE_APR;
 	if (ret) {
-		dev_err(dev, "APR Domain ID not specified in DT\n");
+		dev_err(dev, "Domain ID not specified in DT\n");
 		return ret;
 	}
 
@@ -465,7 +494,7 @@ destroy_wq:
 
 static void apr_remove(struct rpmsg_device *rpdev)
 {
-	struct apr *apr = dev_get_drvdata(&rpdev->dev);
+	struct packet_router *apr = dev_get_drvdata(&rpdev->dev);
 
 	pdr_handle_release(apr->pdr);
 	device_for_each_child(&rpdev->dev, NULL, apr_remove_device);
@@ -502,20 +531,20 @@ void apr_driver_unregister(struct apr_driver *drv)
 }
 EXPORT_SYMBOL_GPL(apr_driver_unregister);
 
-static const struct of_device_id apr_of_match[] = {
+static const struct of_device_id pkt_router_of_match[] = {
 	{ .compatible = "qcom,apr"},
 	{ .compatible = "qcom,apr-v2"},
 	{}
 };
-MODULE_DEVICE_TABLE(of, apr_of_match);
+MODULE_DEVICE_TABLE(of, pkt_router_of_match);
 
-static struct rpmsg_driver apr_driver = {
+static struct rpmsg_driver packet_router_driver = {
 	.probe = apr_probe,
 	.remove = apr_remove,
 	.callback = apr_callback,
 	.drv = {
 		.name = "qcom,apr",
-		.of_match_table = apr_of_match,
+		.of_match_table = pkt_router_of_match,
 	},
 };
 
@@ -525,7 +554,7 @@ static int __init apr_init(void)
 
 	ret = bus_register(&aprbus);
 	if (!ret)
-		ret = register_rpmsg_driver(&apr_driver);
+		ret = register_rpmsg_driver(&packet_router_driver);
 	else
 		bus_unregister(&aprbus);
 
@@ -535,7 +564,7 @@ static int __init apr_init(void)
 static void __exit apr_exit(void)
 {
 	bus_unregister(&aprbus);
-	unregister_rpmsg_driver(&apr_driver);
+	unregister_rpmsg_driver(&packet_router_driver);
 }
 
 subsys_initcall(apr_init);
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index 137f9f2ac4c3..7bca213a3f83 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -79,6 +79,15 @@ struct apr_resp_pkt {
 #define APR_SVC_MAJOR_VERSION(v)	((v >> 16) & 0xFF)
 #define APR_SVC_MINOR_VERSION(v)	(v & 0xFF)
 
+struct packet_router;
+struct pkt_router_svc {
+	struct device *dev;
+	struct packet_router *pr;
+	spinlock_t lock;
+	int id;
+	void *priv;
+};
+
 struct apr_device {
 	struct device	dev;
 	uint16_t	svc_id;
@@ -86,11 +95,12 @@ struct apr_device {
 	uint32_t	version;
 	char name[APR_NAME_SIZE];
 	const char *service_path;
-	spinlock_t	lock;
+	struct pkt_router_svc svc;
 	struct list_head node;
 };
 
 #define to_apr_device(d) container_of(d, struct apr_device, dev)
+#define svc_to_apr_device(d) container_of(d, struct apr_device, svc)
 
 struct apr_driver {
 	int	(*probe)(struct apr_device *sl);
-- 
cgit v1.2.3


From ec1471a898cca38af6b8956a83ebc1297214546f Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Mon, 27 Sep 2021 14:55:42 +0100
Subject: soc: qcom: apr: Add GPR support

Qualcomm Generic Packet router aka GPR is the IPC mechanism found
in AudioReach next generation signal processing framework to perform
command and response messages between various processors.

GPR has concepts of static and dynamic port, all static services like
APM (Audio Processing Manager), PRM (Proxy resource manager) have
fixed port numbers where as dynamic services like graphs have dynamic
port numbers which are allocated at runtime. All GPR packet messages
will have source and destination domain and port along with opcode
and payload.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20210927135559.738-6-srinivas.kandagatla@linaro.org
---
 drivers/soc/qcom/Kconfig     |   2 +-
 drivers/soc/qcom/apr.c       | 166 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/soc/qcom/apr.h |  58 +++++++++++++++
 3 files changed, 219 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig
index 79b568f82a1c..bfa2ab5772cf 100644
--- a/drivers/soc/qcom/Kconfig
+++ b/drivers/soc/qcom/Kconfig
@@ -199,7 +199,7 @@ config QCOM_WCNSS_CTRL
 	  firmware to a newly booted WCNSS chip.
 
 config QCOM_APR
-	tristate "Qualcomm APR Bus (Asynchronous Packet Router)"
+	tristate "Qualcomm APR/GPR Bus (Asynchronous/Generic Packet Router)"
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on RPMSG
 	depends on NET
diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c
index bfad71e540ad..8a9bfbcd4bb9 100644
--- a/drivers/soc/qcom/apr.c
+++ b/drivers/soc/qcom/apr.c
@@ -17,8 +17,13 @@
 
 enum {
 	PR_TYPE_APR = 0,
+	PR_TYPE_GPR,
 };
 
+/* Some random values tbh which does not collide with static modules */
+#define GPR_DYNAMIC_PORT_START	0x10000000
+#define GPR_DYNAMIC_PORT_END	0x20000000
+
 struct packet_router {
 	struct rpmsg_endpoint *ch;
 	struct device *dev;
@@ -69,6 +74,83 @@ int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt)
 }
 EXPORT_SYMBOL_GPL(apr_send_pkt);
 
+void gpr_free_port(gpr_port_t *port)
+{
+	struct packet_router *gpr = port->pr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gpr->svcs_lock, flags);
+	idr_remove(&gpr->svcs_idr, port->id);
+	spin_unlock_irqrestore(&gpr->svcs_lock, flags);
+
+	kfree(port);
+}
+EXPORT_SYMBOL_GPL(gpr_free_port);
+
+gpr_port_t *gpr_alloc_port(struct apr_device *gdev, struct device *dev,
+				gpr_port_cb cb,	void *priv)
+{
+	struct packet_router *pr = dev_get_drvdata(gdev->dev.parent);
+	gpr_port_t *port;
+	struct pkt_router_svc *svc;
+	int id;
+
+	port = kzalloc(sizeof(*port), GFP_KERNEL);
+	if (!port)
+		return ERR_PTR(-ENOMEM);
+
+	svc = port;
+	svc->callback = cb;
+	svc->pr = pr;
+	svc->priv = priv;
+	svc->dev = dev;
+	spin_lock_init(&svc->lock);
+
+	spin_lock(&pr->svcs_lock);
+	id = idr_alloc_cyclic(&pr->svcs_idr, svc, GPR_DYNAMIC_PORT_START,
+			      GPR_DYNAMIC_PORT_END, GFP_ATOMIC);
+	if (id < 0) {
+		dev_err(dev, "Unable to allocate dynamic GPR src port\n");
+		kfree(port);
+		spin_unlock(&pr->svcs_lock);
+		return ERR_PTR(id);
+	}
+
+	svc->id = id;
+	spin_unlock(&pr->svcs_lock);
+
+	return port;
+}
+EXPORT_SYMBOL_GPL(gpr_alloc_port);
+
+static int pkt_router_send_svc_pkt(struct pkt_router_svc *svc, struct gpr_pkt *pkt)
+{
+	struct packet_router *pr = svc->pr;
+	struct gpr_hdr *hdr;
+	unsigned long flags;
+	int ret;
+
+	hdr = &pkt->hdr;
+
+	spin_lock_irqsave(&svc->lock, flags);
+	ret = rpmsg_trysend(pr->ch, pkt, hdr->pkt_size);
+	spin_unlock_irqrestore(&svc->lock, flags);
+
+	return ret ? ret : hdr->pkt_size;
+}
+
+int gpr_send_pkt(struct apr_device *gdev, struct gpr_pkt *pkt)
+{
+	return pkt_router_send_svc_pkt(&gdev->svc, pkt);
+}
+EXPORT_SYMBOL_GPL(gpr_send_pkt);
+
+int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt)
+{
+	return pkt_router_send_svc_pkt(port, pkt);
+}
+EXPORT_SYMBOL_GPL(gpr_send_port_pkt);
+
 static void apr_dev_release(struct device *dev)
 {
 	struct apr_device *adev = to_apr_device(dev);
@@ -177,6 +259,59 @@ static int apr_do_rx_callback(struct packet_router *apr, struct apr_rx_buf *abuf
 	return 0;
 }
 
+static int gpr_do_rx_callback(struct packet_router *gpr, struct apr_rx_buf *abuf)
+{
+	uint16_t hdr_size, ver;
+	struct pkt_router_svc *svc = NULL;
+	struct gpr_resp_pkt resp;
+	struct gpr_hdr *hdr;
+	unsigned long flags;
+	void *buf = abuf->buf;
+	int len = abuf->len;
+
+	hdr = buf;
+	ver = hdr->version;
+	if (ver > GPR_PKT_VER + 1)
+		return -EINVAL;
+
+	hdr_size = hdr->hdr_size;
+	if (hdr_size < GPR_PKT_HEADER_WORD_SIZE) {
+		dev_err(gpr->dev, "GPR: Wrong hdr size:%d\n", hdr_size);
+		return -EINVAL;
+	}
+
+	if (hdr->pkt_size < GPR_PKT_HEADER_BYTE_SIZE || hdr->pkt_size != len) {
+		dev_err(gpr->dev, "GPR: Wrong packet size\n");
+		return -EINVAL;
+	}
+
+	resp.hdr = *hdr;
+	resp.payload_size = hdr->pkt_size - (hdr_size * 4);
+
+	/*
+	 * NOTE: hdr_size is not same as GPR_HDR_SIZE as remote can include
+	 * optional headers in to gpr_hdr which should be ignored
+	 */
+	if (resp.payload_size > 0)
+		resp.payload = buf + (hdr_size *  4);
+
+
+	spin_lock_irqsave(&gpr->svcs_lock, flags);
+	svc = idr_find(&gpr->svcs_idr, hdr->dest_port);
+	spin_unlock_irqrestore(&gpr->svcs_lock, flags);
+
+	if (!svc) {
+		dev_err(gpr->dev, "GPR: Port(%x) is not registered\n",
+			hdr->dest_port);
+		return -EINVAL;
+	}
+
+	if (svc->callback)
+		svc->callback(&resp, svc->priv, 0);
+
+	return 0;
+}
+
 static void apr_rxwq(struct work_struct *work)
 {
 	struct packet_router *apr = container_of(work, struct packet_router, rx_work);
@@ -189,6 +324,9 @@ static void apr_rxwq(struct work_struct *work)
 			case PR_TYPE_APR:
 				apr_do_rx_callback(apr, abuf);
 				break;
+			case PR_TYPE_GPR:
+				gpr_do_rx_callback(apr, abuf);
+				break;
 			default:
 				break;
 			}
@@ -227,8 +365,13 @@ static int apr_device_probe(struct device *dev)
 {
 	struct apr_device *adev = to_apr_device(dev);
 	struct apr_driver *adrv = to_apr_driver(dev->driver);
+	int ret;
 
-	return adrv->probe(adev);
+	ret = adrv->probe(adev);
+	if (!ret)
+		adev->svc.callback = adrv->gpr_callback;
+
+	return ret;
 }
 
 static void apr_device_remove(struct device *dev)
@@ -299,6 +442,10 @@ static int apr_add_device(struct device *dev, struct device_node *np,
 		dev_set_name(&adev->dev, "aprsvc:%s:%x:%x", adev->name,
 			     domain_id, svc_id);
 		break;
+	case PR_TYPE_GPR:
+		dev_set_name(&adev->dev, "gprsvc:%s:%x:%x", adev->name,
+			     domain_id, svc_id);
+		break;
 	default:
 		break;
 	}
@@ -316,7 +463,7 @@ static int apr_add_device(struct device *dev, struct device_node *np,
 	of_property_read_string_index(np, "qcom,protection-domain",
 				      1, &adev->service_path);
 
-	dev_info(dev, "Adding APR dev: %s\n", dev_name(&adev->dev));
+	dev_info(dev, "Adding APR/GPR dev: %s\n", dev_name(&adev->dev));
 
 	ret = device_register(&adev->dev);
 	if (ret) {
@@ -446,10 +593,16 @@ static int apr_probe(struct rpmsg_device *rpdev)
 		return -ENOMEM;
 
 	ret = of_property_read_u32(dev->of_node, "qcom,domain", &apr->dest_domain_id);
-	if (ret) /* try deprecated apr-domain property */
-		ret = of_property_read_u32(dev->of_node, "qcom,apr-domain",
-					   &apr->dest_domain_id);
-	apr->type = PR_TYPE_APR;
+
+	if (of_device_is_compatible(dev->of_node, "qcom,gpr")) {
+		apr->type = PR_TYPE_GPR;
+	} else {
+		if (ret) /* try deprecated apr-domain property */
+			ret = of_property_read_u32(dev->of_node, "qcom,apr-domain",
+						   &apr->dest_domain_id);
+		apr->type = PR_TYPE_APR;
+	}
+
 	if (ret) {
 		dev_err(dev, "Domain ID not specified in DT\n");
 		return ret;
@@ -534,6 +687,7 @@ EXPORT_SYMBOL_GPL(apr_driver_unregister);
 static const struct of_device_id pkt_router_of_match[] = {
 	{ .compatible = "qcom,apr"},
 	{ .compatible = "qcom,apr-v2"},
+	{ .compatible = "qcom,gpr"},
 	{}
 };
 MODULE_DEVICE_TABLE(of, pkt_router_of_match);
diff --git a/include/linux/soc/qcom/apr.h b/include/linux/soc/qcom/apr.h
index 7bca213a3f83..23c5b30f3511 100644
--- a/include/linux/soc/qcom/apr.h
+++ b/include/linux/soc/qcom/apr.h
@@ -7,6 +7,7 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <dt-bindings/soc/qcom,apr.h>
+#include <dt-bindings/soc/qcom,gpr.h>
 
 extern struct bus_type aprbus;
 
@@ -75,19 +76,65 @@ struct apr_resp_pkt {
 	int payload_size;
 };
 
+struct gpr_hdr {
+	uint32_t version:4;
+	uint32_t hdr_size:4;
+	uint32_t pkt_size:24;
+	uint32_t dest_domain:8;
+	uint32_t src_domain:8;
+	uint32_t reserved:16;
+	uint32_t src_port;
+	uint32_t dest_port;
+	uint32_t token;
+	uint32_t opcode;
+} __packed;
+
+struct gpr_pkt {
+	struct gpr_hdr hdr;
+	uint32_t payload[];
+};
+
+struct gpr_resp_pkt {
+	struct gpr_hdr hdr;
+	void *payload;
+	int payload_size;
+};
+
+#define GPR_HDR_SIZE			sizeof(struct gpr_hdr)
+#define GPR_PKT_VER			0x0
+#define GPR_PKT_HEADER_WORD_SIZE	((sizeof(struct gpr_pkt) + 3) >> 2)
+#define GPR_PKT_HEADER_BYTE_SIZE	(GPR_PKT_HEADER_WORD_SIZE << 2)
+
+#define GPR_BASIC_RSP_RESULT		0x02001005
+
+struct gpr_ibasic_rsp_result_t {
+	uint32_t opcode;
+	uint32_t status;
+};
+
+#define GPR_BASIC_EVT_ACCEPTED		0x02001006
+
+struct gpr_ibasic_rsp_accepted_t {
+	uint32_t opcode;
+};
+
 /* Bits 0 to 15 -- Minor version,  Bits 16 to 31 -- Major version */
 #define APR_SVC_MAJOR_VERSION(v)	((v >> 16) & 0xFF)
 #define APR_SVC_MINOR_VERSION(v)	(v & 0xFF)
 
+typedef int (*gpr_port_cb) (struct gpr_resp_pkt *d, void *priv, int op);
 struct packet_router;
 struct pkt_router_svc {
 	struct device *dev;
+	gpr_port_cb callback;
 	struct packet_router *pr;
 	spinlock_t lock;
 	int id;
 	void *priv;
 };
 
+typedef struct pkt_router_svc gpr_port_t;
+
 struct apr_device {
 	struct device	dev;
 	uint16_t	svc_id;
@@ -99,6 +146,8 @@ struct apr_device {
 	struct list_head node;
 };
 
+typedef struct apr_device gpr_device_t;
+
 #define to_apr_device(d) container_of(d, struct apr_device, dev)
 #define svc_to_apr_device(d) container_of(d, struct apr_device, svc)
 
@@ -107,10 +156,12 @@ struct apr_driver {
 	int	(*remove)(struct apr_device *sl);
 	int	(*callback)(struct apr_device *a,
 			    struct apr_resp_pkt *d);
+	int	(*gpr_callback)(struct gpr_resp_pkt *d, void *data, int op);
 	struct device_driver		driver;
 	const struct apr_device_id	*id_table;
 };
 
+typedef struct apr_driver gpr_driver_t;
 #define to_apr_driver(d) container_of(d, struct apr_driver, driver)
 
 /*
@@ -133,7 +184,14 @@ void apr_driver_unregister(struct apr_driver *drv);
 #define module_apr_driver(__apr_driver) \
 	module_driver(__apr_driver, apr_driver_register, \
 			apr_driver_unregister)
+#define module_gpr_driver(__gpr_driver) module_apr_driver(__gpr_driver)
 
 int apr_send_pkt(struct apr_device *adev, struct apr_pkt *pkt);
 
+gpr_port_t *gpr_alloc_port(gpr_device_t *gdev, struct device *dev,
+				gpr_port_cb cb, void *priv);
+void gpr_free_port(gpr_port_t *port);
+int gpr_send_port_pkt(gpr_port_t *port, struct gpr_pkt *pkt);
+int gpr_send_pkt(gpr_device_t *gdev, struct gpr_pkt *pkt);
+
 #endif /* __QCOM_APR_H_ */
-- 
cgit v1.2.3


From af3826db74d184bc9c2c9d3ff34548e5f317a6f3 Mon Sep 17 00:00:00 2001
From: Geetha sowjanya <gakula@marvell.com>
Date: Tue, 28 Sep 2021 11:25:26 +0530
Subject: octeontx2-pf: Use hardware register for CQE count

Current driver uses software CQ head pointer to poll on CQE
header in memory to determine if CQE is valid. Software needs
to make sure, that the reads of the CQE do not get re-ordered
so much that it ends up with an inconsistent view of the CQE.
To ensure that DMB barrier after read to first CQE cacheline
and before reading of the rest of the CQE is needed.
But having barrier for every CQE read will impact the performance,
instead use hardware CQ head and tail pointers to find the
valid number of CQEs.

Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/marvell/octeontx2/nic/otx2_common.c   |  3 +
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   |  1 +
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 69 +++++++++++++++++++---
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h |  5 ++
 include/linux/soc/marvell/octeontx2/asm.h          | 14 +++++
 5 files changed, 85 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 78df173e6df2..4c3dbade8cfb 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1006,6 +1006,9 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
 			return err;
 	}
 
+	pfvf->cq_op_addr = (__force u64 *)otx2_get_regaddr(pfvf,
+							   NIX_LF_CQ_OP_STATUS);
+
 	/* Initialize work queue for receive buffer refill */
 	pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
 					sizeof(struct refill_work), GFP_KERNEL);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 0a792fce55f1..069d1b925102 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -343,6 +343,7 @@ struct otx2_nic {
 #define OTX2_FLAG_TC_MATCHALL_INGRESS_ENABLED	BIT_ULL(13)
 #define OTX2_FLAG_DMACFLTR_SUPPORT		BIT_ULL(14)
 	u64			flags;
+	u64			*cq_op_addr;
 
 	struct otx2_qset	qset;
 	struct otx2_hw		hw;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index f42b1d4e0c67..3f3ec8ffc4dd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -18,6 +18,31 @@
 
 #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx)))
 
+static int otx2_nix_cq_op_status(struct otx2_nic *pfvf,
+				 struct otx2_cq_queue *cq)
+{
+	u64 incr = (u64)(cq->cq_idx) << 32;
+	u64 status;
+
+	status = otx2_atomic64_fetch_add(incr, pfvf->cq_op_addr);
+
+	if (unlikely(status & BIT_ULL(CQ_OP_STAT_OP_ERR) ||
+		     status & BIT_ULL(CQ_OP_STAT_CQ_ERR))) {
+		dev_err(pfvf->dev, "CQ stopped due to error");
+		return -EINVAL;
+	}
+
+	cq->cq_tail = status & 0xFFFFF;
+	cq->cq_head = (status >> 20) & 0xFFFFF;
+	if (cq->cq_tail < cq->cq_head)
+		cq->pend_cqe = (cq->cqe_cnt - cq->cq_head) +
+				cq->cq_tail;
+	else
+		cq->pend_cqe = cq->cq_tail - cq->cq_head;
+
+	return 0;
+}
+
 static struct nix_cqe_hdr_s *otx2_get_next_cqe(struct otx2_cq_queue *cq)
 {
 	struct nix_cqe_hdr_s *cqe_hdr;
@@ -318,7 +343,14 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 	struct nix_cqe_rx_s *cqe;
 	int processed_cqe = 0;
 
-	while (likely(processed_cqe < budget)) {
+	if (cq->pend_cqe >= budget)
+		goto process_cqe;
+
+	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+		return 0;
+
+process_cqe:
+	while (likely(processed_cqe < budget) && cq->pend_cqe) {
 		cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head);
 		if (cqe->hdr.cqe_type == NIX_XQE_TYPE_INVALID ||
 		    !cqe->sg.seg_addr) {
@@ -334,6 +366,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
 		cqe->sg.seg_addr = 0x00;
 		processed_cqe++;
+		cq->pend_cqe--;
 	}
 
 	/* Free CQEs to HW */
@@ -368,7 +401,14 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
 	struct nix_cqe_tx_s *cqe;
 	int processed_cqe = 0;
 
-	while (likely(processed_cqe < budget)) {
+	if (cq->pend_cqe >= budget)
+		goto process_cqe;
+
+	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+		return 0;
+
+process_cqe:
+	while (likely(processed_cqe < budget) && cq->pend_cqe) {
 		cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
 		if (unlikely(!cqe)) {
 			if (!processed_cqe)
@@ -380,6 +420,7 @@ static int otx2_tx_napi_handler(struct otx2_nic *pfvf,
 
 		cqe->hdr.cqe_type = NIX_XQE_TYPE_INVALID;
 		processed_cqe++;
+		cq->pend_cqe--;
 	}
 
 	/* Free CQEs to HW */
@@ -936,10 +977,16 @@ void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 	int processed_cqe = 0;
 	u64 iova, pa;
 
-	while ((cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq))) {
-		if (!cqe->sg.subdc)
-			continue;
+	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+		return;
+
+	while (cq->pend_cqe) {
+		cqe = (struct nix_cqe_rx_s *)otx2_get_next_cqe(cq);
 		processed_cqe++;
+		cq->pend_cqe--;
+
+		if (!cqe)
+			continue;
 		if (cqe->sg.segs > 1) {
 			otx2_free_rcv_seg(pfvf, cqe, cq->cq_idx);
 			continue;
@@ -965,7 +1012,16 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 
 	sq = &pfvf->qset.sq[cq->cint_idx];
 
-	while ((cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq))) {
+	if (otx2_nix_cq_op_status(pfvf, cq) || !cq->pend_cqe)
+		return;
+
+	while (cq->pend_cqe) {
+		cqe = (struct nix_cqe_tx_s *)otx2_get_next_cqe(cq);
+		processed_cqe++;
+		cq->pend_cqe--;
+
+		if (!cqe)
+			continue;
 		sg = &sq->sg[cqe->comp.sqe_id];
 		skb = (struct sk_buff *)sg->skb;
 		if (skb) {
@@ -973,7 +1029,6 @@ void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq)
 			dev_kfree_skb_any(skb);
 			sg->skb = (u64)NULL;
 		}
-		processed_cqe++;
 	}
 
 	/* Free CQEs to HW */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 3ff1ad79c001..6a97631ff226 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -56,6 +56,9 @@
  */
 #define CQ_QCOUNT_DEFAULT	1
 
+#define CQ_OP_STAT_OP_ERR       63
+#define CQ_OP_STAT_CQ_ERR       46
+
 struct queue_stats {
 	u64	bytes;
 	u64	pkts;
@@ -122,6 +125,8 @@ struct otx2_cq_queue {
 	u16			pool_ptrs;
 	u32			cqe_cnt;
 	u32			cq_head;
+	u32			cq_tail;
+	u32			pend_cqe;
 	void			*cqe_base;
 	struct qmem		*cqe;
 	struct otx2_pool	*rbpool;
diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h
index fa1d6af0164e..0f79fd7f81a1 100644
--- a/include/linux/soc/marvell/octeontx2/asm.h
+++ b/include/linux/soc/marvell/octeontx2/asm.h
@@ -34,9 +34,23 @@
 			 : [rf] "+r"(val)		\
 			 : [rs] "r"(addr));		\
 })
+
+static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr)
+{
+	u64 result;
+
+	asm volatile (".cpu  generic+lse\n"
+		      "ldadda %x[i], %x[r], [%[b]]"
+		      : [r] "=r" (result), "+m" (*ptr)
+		      : [i] "r" (incr), [b] "r" (ptr)
+		      : "memory");
+	return result;
+}
+
 #else
 #define otx2_lmt_flush(ioaddr)          ({ 0; })
 #define cn10k_lmt_flush(val, addr)	({ addr = val; })
+#define otx2_atomic64_fetch_add(incr, ptr)	({ incr; })
 #endif
 
 #endif /* __SOC_OTX2_ASM_H */
-- 
cgit v1.2.3


From 62b8963cd84df1fc04986cd9b27586acce758f36 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Tue, 28 Sep 2021 14:00:45 +0300
Subject: ieee80211: Add new A-MPDU factor macro for HE 6 GHz peer caps

Add IEEE80211_HE_6GHZ_MAX_AMPDU_FACTOR as per IEEE Std 802.11ax-2021,
9.4.2.263 to use for peer max A-MPDU factor in 6 GHz band.

Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20210913175510.193005-1-jouni@codeaurora.org
---
 include/linux/ieee80211.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 694264503119..a1a7eda35cb5 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2084,6 +2084,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 
 #define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR	20
 #define IEEE80211_HE_HT_MAX_AMPDU_FACTOR	16
+#define IEEE80211_HE_6GHZ_MAX_AMPDU_FACTOR	13
 
 /* 802.11ax HE PHY capabilities */
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
-- 
cgit v1.2.3


From 8de1e9b01b03d2805f58aa490ea5bf0df003bc80 Mon Sep 17 00:00:00 2001
From: Meir Lichtinger <meirl@nvidia.com>
Date: Wed, 22 Sep 2021 11:28:50 +0300
Subject: net/mlx5: Add uid field to UAR allocation structures

Add uid field to mlx5_ifc_alloc_uar_in_bits and
mlx5_ifc_dealloc_uar_out_bits structs.

This field will be used by FW to manage UAR according to uid.

Signed-off-by: Meir Lichtinger <meirl@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3638d09ba77..96f5fb2af811 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7569,7 +7569,7 @@ struct mlx5_ifc_dealloc_uar_out_bits {
 
 struct mlx5_ifc_dealloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -8416,7 +8416,7 @@ struct mlx5_ifc_alloc_uar_out_bits {
 
 struct mlx5_ifc_alloc_uar_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From d2c8a1554c10d5e0443b1f97f480d7dacd55cf55 Mon Sep 17 00:00:00 2001
From: Meir Lichtinger <meirl@nvidia.com>
Date: Wed, 22 Sep 2021 11:28:51 +0300
Subject: IB/mlx5: Enable UAR to have DevX UID

UID field was added to alloc_uar and dealloc_uar PRM command, to specify
DevX UID for UAR. This change enables firmware validating user access to
its own UAR resources.

For the kernel allocated UARs the UID will stay 0 as of today.

Signed-off-by: Meir Lichtinger <meirl@nvidia.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/cmd.c              | 26 +++++++++++++
 drivers/infiniband/hw/mlx5/cmd.h              |  2 +
 drivers/infiniband/hw/mlx5/main.c             | 55 +++++++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 14 +++----
 include/linux/mlx5/driver.h                   |  2 -
 5 files changed, 66 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index a8db8a051170..ff3742b0460a 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -206,3 +206,29 @@ out:
 	kfree(in);
 	return err;
 }
+
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
+	int err;
+
+	MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
+	MLX5_SET(alloc_uar_in, in, uid, uid);
+	err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
+	if (err)
+		return err;
+
+	*uarn = MLX5_GET(alloc_uar_out, out, uar);
+	return 0;
+}
+
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
+
+	MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
+	MLX5_SET(dealloc_uar_in, in, uar, uarn);
+	MLX5_SET(dealloc_uar_in, in, uid, uid);
+	return mlx5_cmd_exec_in(dev, dealloc_uar, in);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 66c96292ed43..ee46638db5de 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -57,4 +57,6 @@ int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
 int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		     u16 opmod, u8 port);
+int mlx5_cmd_uar_alloc(struct mlx5_core_dev *dev, u32 *uarn, u16 uid);
+int mlx5_cmd_uar_dealloc(struct mlx5_core_dev *dev, u32 uarn, u16 uid);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8664bcf6d3f5..5ec8bd2f0b2f 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -1643,7 +1643,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
 
 	bfregi = &context->bfregi;
 	for (i = 0; i < bfregi->num_static_sys_pages; i++) {
-		err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+		err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i],
+					 context->devx_uid);
 		if (err)
 			goto error;
 
@@ -1657,7 +1658,8 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
 
 error:
 	for (--i; i >= 0; i--)
-		if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+		if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+					 context->devx_uid))
 			mlx5_ib_warn(dev, "failed to free uar %d\n", i);
 
 	return err;
@@ -1673,7 +1675,8 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
 	for (i = 0; i < bfregi->num_sys_pages; i++)
 		if (i < bfregi->num_static_sys_pages ||
 		    bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
-			mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+			mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i],
+					     context->devx_uid);
 }
 
 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
@@ -1891,6 +1894,13 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 	if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
 		return -EINVAL;
 
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+		err = mlx5_ib_devx_create(dev, true);
+		if (err < 0)
+			goto out_ctx;
+		context->devx_uid = err;
+	}
+
 	lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
 	lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR;
 	bfregi = &context->bfregi;
@@ -1903,7 +1913,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 	/* updates req->total_num_bfregs */
 	err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
 	if (err)
-		goto out_ctx;
+		goto out_devx;
 
 	mutex_init(&bfregi->lock);
 	bfregi->lib_uar_4k = lib_uar_4k;
@@ -1911,7 +1921,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 				GFP_KERNEL);
 	if (!bfregi->count) {
 		err = -ENOMEM;
-		goto out_ctx;
+		goto out_devx;
 	}
 
 	bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -1927,17 +1937,10 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
 		goto out_sys_pages;
 
 uar_done:
-	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
-		err = mlx5_ib_devx_create(dev, true);
-		if (err < 0)
-			goto out_uars;
-		context->devx_uid = err;
-	}
-
 	err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
 					     context->devx_uid);
 	if (err)
-		goto out_devx;
+		goto out_uars;
 
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
@@ -1972,9 +1975,6 @@ uar_done:
 
 out_mdev:
 	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
-out_devx:
-	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
-		mlx5_ib_devx_destroy(dev, context->devx_uid);
 
 out_uars:
 	deallocate_uars(dev, context);
@@ -1985,6 +1985,10 @@ out_sys_pages:
 out_count:
 	kfree(bfregi->count);
 
+out_devx:
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+		mlx5_ib_devx_destroy(dev, context->devx_uid);
+
 out_ctx:
 	return err;
 }
@@ -2021,12 +2025,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	bfregi = &context->bfregi;
 	mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
 
-	if (context->devx_uid)
-		mlx5_ib_devx_destroy(dev, context->devx_uid);
-
 	deallocate_uars(dev, context);
 	kfree(bfregi->sys_pages);
 	kfree(bfregi->count);
+
+	if (context->devx_uid)
+		mlx5_ib_devx_destroy(dev, context->devx_uid);
 }
 
 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -2119,6 +2123,7 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
 	struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
 	struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
 	struct mlx5_var_table *var_table = &dev->var_table;
+	struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext);
 
 	switch (mentry->mmap_flag) {
 	case MLX5_IB_MMAP_TYPE_MEMIC:
@@ -2133,7 +2138,8 @@ static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
 		break;
 	case MLX5_IB_MMAP_TYPE_UAR_WC:
 	case MLX5_IB_MMAP_TYPE_UAR_NC:
-		mlx5_cmd_free_uar(dev->mdev, mentry->page_idx);
+		mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx,
+				     context->devx_uid);
 		kfree(mentry);
 		break;
 	default:
@@ -2211,7 +2217,8 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 		bfregi->count[bfreg_dyn_idx]++;
 		mutex_unlock(&bfregi->lock);
 
-		err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+		err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index,
+					 context->devx_uid);
 		if (err) {
 			mlx5_ib_warn(dev, "UAR alloc failed\n");
 			goto free_bfreg;
@@ -2240,7 +2247,7 @@ err:
 	if (!dyn_uar)
 		return err;
 
-	mlx5_cmd_free_uar(dev->mdev, idx);
+	mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid);
 
 free_bfreg:
 	mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
@@ -3489,7 +3496,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
 		return ERR_PTR(-ENOMEM);
 
 	dev = to_mdev(c->ibucontext.device);
-	err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
+	err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid);
 	if (err)
 		goto end;
 
@@ -3507,7 +3514,7 @@ alloc_uar_entry(struct mlx5_ib_ucontext *c,
 	return entry;
 
 err_insert:
-	mlx5_cmd_free_uar(dev->mdev, uar_index);
+	mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid);
 end:
 	kfree(entry);
 	return ERR_PTR(err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index da481a7c12f4..01e9c412977c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -36,7 +36,7 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
+static int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
 	u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
@@ -44,13 +44,14 @@ int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 
 	MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
 	err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
-	if (!err)
-		*uarn = MLX5_GET(alloc_uar_out, out, uar);
-	return err;
+	if (err)
+		return err;
+
+	*uarn = MLX5_GET(alloc_uar_out, out, uar);
+	return 0;
 }
-EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
 
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
+static int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
 
@@ -58,7 +59,6 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 	MLX5_SET(dealloc_uar_in, in, uar, uarn);
 	return mlx5_cmd_exec_in(dev, dealloc_uar, in);
 }
-EXPORT_SYMBOL(mlx5_cmd_free_uar);
 
 static int uars_per_sys_page(struct mlx5_core_dev *mdev)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..1b8bae246b28 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1005,8 +1005,6 @@ void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
 bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
 
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
-int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);
-int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 void mlx5_health_flush(struct mlx5_core_dev *dev);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From af9d82626c8f8547910e3e22c76ced3f5d5f5286 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 24 Aug 2021 14:20:51 +0200
Subject: PCI/ACPI: Remove OSC_PCI_SUPPORT_MASKS and OSC_PCI_CONTROL_MASKS

These masks are only used internally in the PCI Host Bridge _OSC
negotiation code, which already makes sure nothing outside of these masks
is set. Remove the masks and simplify the code.

Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20210824122054.29481-2-joro@8bytes.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
---
 drivers/acpi/pci_root.c | 9 +++------
 include/linux/acpi.h    | 2 --
 2 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index d7deedf3548e..0c3030a58219 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -199,18 +199,15 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
 	acpi_status status;
 	u32 result, capbuf[3];
 
-	support &= OSC_PCI_SUPPORT_MASKS;
 	support |= root->osc_support_set;
 
 	capbuf[OSC_QUERY_DWORD] = OSC_QUERY_ENABLE;
 	capbuf[OSC_SUPPORT_DWORD] = support;
-	if (control) {
-		*control &= OSC_PCI_CONTROL_MASKS;
+	if (control)
 		capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set;
-	} else {
+	else
 		/* Run _OSC query only with existing controls. */
 		capbuf[OSC_CONTROL_DWORD] = root->osc_control_set;
-	}
 
 	status = acpi_pci_run_osc(root->device->handle, capbuf, &result);
 	if (ACPI_SUCCESS(status)) {
@@ -357,7 +354,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 r
 	if (!mask)
 		return AE_BAD_PARAMETER;
 
-	ctrl = *mask & OSC_PCI_CONTROL_MASKS;
+	ctrl = *mask;
 	if ((ctrl & req) != req)
 		return AE_TYPE;
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 974d497a897d..eb59fd3052c0 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -577,7 +577,6 @@ extern u32 osc_sb_native_usb4_control;
 #define OSC_PCI_MSI_SUPPORT			0x00000010
 #define OSC_PCI_EDR_SUPPORT			0x00000080
 #define OSC_PCI_HPX_TYPE_3_SUPPORT		0x00000100
-#define OSC_PCI_SUPPORT_MASKS			0x0000019f
 
 /* PCI Host Bridge _OSC: Capabilities DWORD 3: Control Field */
 #define OSC_PCI_EXPRESS_NATIVE_HP_CONTROL	0x00000001
@@ -587,7 +586,6 @@ extern u32 osc_sb_native_usb4_control;
 #define OSC_PCI_EXPRESS_CAPABILITY_CONTROL	0x00000010
 #define OSC_PCI_EXPRESS_LTR_CONTROL		0x00000020
 #define OSC_PCI_EXPRESS_DPC_CONTROL		0x00000080
-#define OSC_PCI_CONTROL_MASKS			0x000000bf
 
 #define ACPI_GSB_ACCESS_ATTRIB_QUICK		0x00000002
 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV         0x00000004
-- 
cgit v1.2.3


From fec2432c9a7370788faab416b38589ac9f4350e5 Mon Sep 17 00:00:00 2001
From: Diana Craciun <diana.craciun@oss.nxp.com>
Date: Wed, 22 Sep 2021 14:05:29 +0300
Subject: bus/fsl-mc: Add generic implementation for open/reset/close commands

The open/reset/close commands format is similar for all objects.
Currently there are multiple implementations for these commands
scattered through various drivers. The code is cavsi-identical.
Create a generic implementation for the open/reset/close commands.
One of the consumer will be the VFIO driver which needs to
be able to reset a device.

Signed-off-by: Diana Craciun <diana.craciun@oss.nxp.com>
Reviewed-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Link: https://lore.kernel.org/r/20210922110530.24736-1-diana.craciun@oss.nxp.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/bus/fsl-mc/Makefile         |   3 +-
 drivers/bus/fsl-mc/fsl-mc-private.h |  39 ++++++++++++--
 drivers/bus/fsl-mc/obj-api.c        | 103 ++++++++++++++++++++++++++++++++++++
 include/linux/fsl/mc.h              |  14 +++++
 4 files changed, 154 insertions(+), 5 deletions(-)
 create mode 100644 drivers/bus/fsl-mc/obj-api.c

(limited to 'include/linux')

diff --git a/drivers/bus/fsl-mc/Makefile b/drivers/bus/fsl-mc/Makefile
index 4ae292a30e53..892946245527 100644
--- a/drivers/bus/fsl-mc/Makefile
+++ b/drivers/bus/fsl-mc/Makefile
@@ -15,7 +15,8 @@ mc-bus-driver-objs := fsl-mc-bus.o \
 		      dprc-driver.o \
 		      fsl-mc-allocator.o \
 		      fsl-mc-msi.o \
-		      dpmcp.o
+		      dpmcp.o \
+		      obj-api.o
 
 # MC userspace support
 obj-$(CONFIG_FSL_MC_UAPI_SUPPORT) += fsl-mc-uapi.o
diff --git a/drivers/bus/fsl-mc/fsl-mc-private.h b/drivers/bus/fsl-mc/fsl-mc-private.h
index 1958fa065360..b3520ea1b9f4 100644
--- a/drivers/bus/fsl-mc/fsl-mc-private.h
+++ b/drivers/bus/fsl-mc/fsl-mc-private.h
@@ -48,7 +48,6 @@ struct dpmng_rsp_get_version {
 
 /* DPMCP command IDs */
 #define DPMCP_CMDID_CLOSE		DPMCP_CMD(0x800)
-#define DPMCP_CMDID_OPEN		DPMCP_CMD(0x80b)
 #define DPMCP_CMDID_RESET		DPMCP_CMD(0x005)
 
 struct dpmcp_cmd_open {
@@ -91,7 +90,6 @@ int dpmcp_reset(struct fsl_mc_io *mc_io,
 
 /* DPRC command IDs */
 #define DPRC_CMDID_CLOSE                        DPRC_CMD(0x800)
-#define DPRC_CMDID_OPEN                         DPRC_CMD(0x805)
 #define DPRC_CMDID_GET_API_VERSION              DPRC_CMD(0xa05)
 
 #define DPRC_CMDID_GET_ATTR                     DPRC_CMD(0x004)
@@ -453,7 +451,6 @@ int dprc_get_connection(struct fsl_mc_io *mc_io,
 
 /* Command IDs */
 #define DPBP_CMDID_CLOSE		DPBP_CMD(0x800)
-#define DPBP_CMDID_OPEN			DPBP_CMD(0x804)
 
 #define DPBP_CMDID_ENABLE		DPBP_CMD(0x002)
 #define DPBP_CMDID_DISABLE		DPBP_CMD(0x003)
@@ -492,7 +489,6 @@ struct dpbp_rsp_get_attributes {
 
 /* Command IDs */
 #define DPCON_CMDID_CLOSE			DPCON_CMD(0x800)
-#define DPCON_CMDID_OPEN			DPCON_CMD(0x808)
 
 #define DPCON_CMDID_ENABLE			DPCON_CMD(0x002)
 #define DPCON_CMDID_DISABLE			DPCON_CMD(0x003)
@@ -524,6 +520,41 @@ struct dpcon_cmd_set_notification {
 	__le64 user_ctx;
 };
 
+/*
+ * Generic FSL MC API
+ */
+
+/* generic command versioning */
+#define OBJ_CMD_BASE_VERSION		1
+#define OBJ_CMD_ID_OFFSET		4
+
+#define OBJ_CMD(id)	(((id) << OBJ_CMD_ID_OFFSET) | OBJ_CMD_BASE_VERSION)
+
+/* open command codes */
+#define DPRTC_CMDID_OPEN		OBJ_CMD(0x810)
+#define DPNI_CMDID_OPEN		OBJ_CMD(0x801)
+#define DPSW_CMDID_OPEN		OBJ_CMD(0x802)
+#define DPIO_CMDID_OPEN		OBJ_CMD(0x803)
+#define DPBP_CMDID_OPEN		OBJ_CMD(0x804)
+#define DPRC_CMDID_OPEN		OBJ_CMD(0x805)
+#define DPDMUX_CMDID_OPEN		OBJ_CMD(0x806)
+#define DPCI_CMDID_OPEN		OBJ_CMD(0x807)
+#define DPCON_CMDID_OPEN		OBJ_CMD(0x808)
+#define DPSECI_CMDID_OPEN		OBJ_CMD(0x809)
+#define DPAIOP_CMDID_OPEN		OBJ_CMD(0x80a)
+#define DPMCP_CMDID_OPEN		OBJ_CMD(0x80b)
+#define DPMAC_CMDID_OPEN		OBJ_CMD(0x80c)
+#define DPDCEI_CMDID_OPEN		OBJ_CMD(0x80d)
+#define DPDMAI_CMDID_OPEN		OBJ_CMD(0x80e)
+#define DPDBG_CMDID_OPEN		OBJ_CMD(0x80f)
+
+/* Generic object command IDs */
+#define OBJ_CMDID_CLOSE		OBJ_CMD(0x800)
+#define OBJ_CMDID_RESET		OBJ_CMD(0x005)
+
+struct fsl_mc_obj_cmd_open {
+	__le32 obj_id;
+};
 
 /**
  * struct fsl_mc_resource_pool - Pool of MC resources of a given
diff --git a/drivers/bus/fsl-mc/obj-api.c b/drivers/bus/fsl-mc/obj-api.c
new file mode 100644
index 000000000000..06c1dd84e38d
--- /dev/null
+++ b/drivers/bus/fsl-mc/obj-api.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2021 NXP
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/fsl/mc.h>
+
+#include "fsl-mc-private.h"
+
+static int fsl_mc_get_open_cmd_id(const char *type)
+{
+	static const struct {
+		int cmd_id;
+		const char *type;
+	} dev_ids[] = {
+		{ DPRTC_CMDID_OPEN, "dprtc" },
+		{ DPRC_CMDID_OPEN, "dprc" },
+		{ DPNI_CMDID_OPEN, "dpni" },
+		{ DPIO_CMDID_OPEN, "dpio" },
+		{ DPSW_CMDID_OPEN, "dpsw" },
+		{ DPBP_CMDID_OPEN, "dpbp" },
+		{ DPCON_CMDID_OPEN, "dpcon" },
+		{ DPMCP_CMDID_OPEN, "dpmcp" },
+		{ DPMAC_CMDID_OPEN, "dpmac" },
+		{ DPSECI_CMDID_OPEN, "dpseci" },
+		{ DPDMUX_CMDID_OPEN, "dpdmux" },
+		{ DPDCEI_CMDID_OPEN, "dpdcei" },
+		{ DPAIOP_CMDID_OPEN, "dpaiop" },
+		{ DPCI_CMDID_OPEN, "dpci" },
+		{ DPDMAI_CMDID_OPEN, "dpdmai" },
+		{ DPDBG_CMDID_OPEN, "dpdbg" },
+		{ 0, NULL }
+	};
+	int i;
+
+	for (i = 0; dev_ids[i].type; i++)
+		if (!strcmp(dev_ids[i].type, type))
+			return dev_ids[i].cmd_id;
+
+	return -1;
+}
+
+int fsl_mc_obj_open(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    int obj_id,
+		    char *obj_type,
+		    u16 *token)
+{
+	struct fsl_mc_command cmd = { 0 };
+	struct fsl_mc_obj_cmd_open *cmd_params;
+	int err = 0;
+	int cmd_id = fsl_mc_get_open_cmd_id(obj_type);
+
+	if (cmd_id == -1)
+		return -ENODEV;
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(cmd_id, cmd_flags, 0);
+	cmd_params = (struct fsl_mc_obj_cmd_open *)cmd.params;
+	cmd_params->obj_id = cpu_to_le32(obj_id);
+
+	/* send command to mc*/
+	err = mc_send_command(mc_io, &cmd);
+	if (err)
+		return err;
+
+	/* retrieve response parameters */
+	*token = mc_cmd_hdr_read_token(&cmd);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(fsl_mc_obj_open);
+
+int fsl_mc_obj_close(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(OBJ_CMDID_CLOSE, cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(fsl_mc_obj_close);
+
+int fsl_mc_obj_reset(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(OBJ_CMDID_RESET, cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+EXPORT_SYMBOL_GPL(fsl_mc_obj_reset);
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index 30ece3ae6df7..e026f6c48b49 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -620,6 +620,20 @@ int dpcon_reset(struct fsl_mc_io *mc_io,
 		u32 cmd_flags,
 		u16 token);
 
+int fsl_mc_obj_open(struct fsl_mc_io *mc_io,
+		    u32 cmd_flags,
+		    int obj_id,
+		    char *obj_type,
+		    u16 *token);
+
+int fsl_mc_obj_close(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token);
+
+int fsl_mc_obj_reset(struct fsl_mc_io *mc_io,
+		     u32 cmd_flags,
+		     u16 token);
+
 /**
  * struct dpcon_attr - Structure representing DPCON attributes
  * @id: DPCON object ID
-- 
cgit v1.2.3


From 3d717fad5081b8e3bda76d86907fad95398cbde8 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 28 Sep 2021 16:09:45 -0700
Subject: bpf: Replace "want address" users of BPF_CAST_CALL with BPF_CALL_IMM

In order to keep ahead of cases in the kernel where Control Flow
Integrity (CFI) may trip over function call casts, enabling
-Wcast-function-type is helpful. To that end, BPF_CAST_CALL causes
various warnings and is one of the last places in the kernel triggering
this warning.

Most places using BPF_CAST_CALL actually just want a void * to perform
math on. It's not actually performing a call, so just use a different
helper to get the void *, by way of the new BPF_CALL_IMM() helper, which
can clean up a common copy/paste idiom as well.

This change results in no object code difference.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://github.com/KSPP/linux/issues/20
Link: https://lore.kernel.org/lkml/CAEf4Bzb46=-J5Fxc3mMZ8JQPtK1uoE0q6+g6WPz53Cvx=CBEhw@mail.gmail.com
Link: https://lore.kernel.org/bpf/20210928230946.4062144-2-keescook@chromium.org
---
 include/linux/filter.h |  6 +++++-
 kernel/bpf/hashtab.c   |  6 +++---
 kernel/bpf/verifier.c  | 26 +++++++++-----------------
 lib/test_bpf.c         |  2 +-
 4 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..6c247663d4ce 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -365,13 +365,17 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 #define BPF_CAST_CALL(x)					\
 		((u64 (*)(u64, u64, u64, u64, u64))(x))
 
+/* Convert function address to BPF immediate */
+
+#define BPF_CALL_IMM(x)	((void *)(x) - (void *)__bpf_call_base)
+
 #define BPF_EMIT_CALL(FUNC)					\
 	((struct bpf_insn) {					\
 		.code  = BPF_JMP | BPF_CALL,			\
 		.dst_reg = 0,					\
 		.src_reg = 0,					\
 		.off   = 0,					\
-		.imm   = ((FUNC) - __bpf_call_base) })
+		.imm   = BPF_CALL_IMM(FUNC) })
 
 /* Raw code statement block */
 
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 32471ba02708..3d8f9d6997d5 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -668,7 +668,7 @@ static int htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 
 	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
 		     (void *(*)(struct bpf_map *map, void *key))NULL));
-	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+	*insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
 				offsetof(struct htab_elem, key) +
@@ -709,7 +709,7 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
 
 	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
 		     (void *(*)(struct bpf_map *map, void *key))NULL));
-	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+	*insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
 	*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
 			      offsetof(struct htab_elem, lru_node) +
@@ -2397,7 +2397,7 @@ static int htab_of_map_gen_lookup(struct bpf_map *map,
 
 	BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
 		     (void *(*)(struct bpf_map *map, void *key))NULL));
-	*insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
+	*insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem);
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
 				offsetof(struct htab_elem, key) +
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7a8351604f67..1433752db740 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1744,7 +1744,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
 
 	desc = &tab->descs[tab->nr_descs++];
 	desc->func_id = func_id;
-	desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
+	desc->imm = BPF_CALL_IMM(addr);
 	err = btf_distill_func_proto(&env->log, btf_vmlinux,
 				     func_proto, func_name,
 				     &desc->func_model);
@@ -12514,8 +12514,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 			if (!bpf_pseudo_call(insn))
 				continue;
 			subprog = insn->off;
-			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
-				    __bpf_call_base;
+			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
 		}
 
 		/* we use the aux data to keep a list of the start addresses
@@ -12995,32 +12994,25 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 patch_map_ops_generic:
 			switch (insn->imm) {
 			case BPF_FUNC_map_lookup_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
 				continue;
 			case BPF_FUNC_map_update_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
 				continue;
 			case BPF_FUNC_map_delete_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
 				continue;
 			case BPF_FUNC_map_push_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
 				continue;
 			case BPF_FUNC_map_pop_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
 				continue;
 			case BPF_FUNC_map_peek_elem:
-				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
 				continue;
 			case BPF_FUNC_redirect_map:
-				insn->imm = BPF_CAST_CALL(ops->map_redirect) -
-					    __bpf_call_base;
+				insn->imm = BPF_CALL_IMM(ops->map_redirect);
 				continue;
 			}
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 08f438e6fe9e..21ea1ab253a1 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -12439,7 +12439,7 @@ static __init int prepare_tail_call_tests(struct bpf_array **pprogs)
 					err = -EFAULT;
 					goto out_err;
 				}
-				*insn = BPF_EMIT_CALL(BPF_CAST_CALL(addr));
+				*insn = BPF_EMIT_CALL(addr);
 				if ((long)__bpf_call_base + insn->imm != addr)
 					*insn = BPF_JMP_A(0); /* Skip: NOP */
 				break;
-- 
cgit v1.2.3


From 102acbacfd9a96d101abd96d1a7a5bf92b7c3e8e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 28 Sep 2021 16:09:46 -0700
Subject: bpf: Replace callers of BPF_CAST_CALL with proper function typedef

In order to keep ahead of cases in the kernel where Control Flow
Integrity (CFI) may trip over function call casts, enabling
-Wcast-function-type is helpful. To that end, BPF_CAST_CALL causes
various warnings and is one of the last places in the kernel
triggering this warning.

For actual function calls, replace BPF_CAST_CALL() with a typedef, which
captures the same details about the given function pointers.

This change results in no object code difference.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://github.com/KSPP/linux/issues/20
Link: https://lore.kernel.org/lkml/CAEf4Bzb46=-J5Fxc3mMZ8JQPtK1uoE0q6+g6WPz53Cvx=CBEhw@mail.gmail.com
Link: https://lore.kernel.org/bpf/20210928230946.4062144-3-keescook@chromium.org
---
 include/linux/bpf.h    | 4 +++-
 include/linux/filter.h | 5 -----
 kernel/bpf/arraymap.c  | 7 +++----
 kernel/bpf/hashtab.c   | 7 +++----
 kernel/bpf/helpers.c   | 5 ++---
 5 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b6c45a6cbbba..19735d59230a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -48,6 +48,7 @@ extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
 extern struct kobject *btf_kobj;
 
+typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
 					struct bpf_iter_aux_info *aux);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
@@ -142,7 +143,8 @@ struct bpf_map_ops {
 	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
 					      struct bpf_func_state *caller,
 					      struct bpf_func_state *callee);
-	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+	int (*map_for_each_callback)(struct bpf_map *map,
+				     bpf_callback_t callback_fn,
 				     void *callback_ctx, u64 flags);
 
 	/* BTF name and id of struct allocated by map_alloc */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6c247663d4ce..47f80adbe744 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -360,11 +360,6 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
 		.off   = 0,					\
 		.imm   = TGT })
 
-/* Function call */
-
-#define BPF_CAST_CALL(x)					\
-		((u64 (*)(u64, u64, u64, u64, u64))(x))
-
 /* Convert function address to BPF immediate */
 
 #define BPF_CALL_IMM(x)	((void *)(x) - (void *)__bpf_call_base)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index cebd4fb06d19..5e1ccfae916b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -645,7 +645,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_array_map_info),
 };
 
-static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
+static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_fn,
 				   void *callback_ctx, u64 flags)
 {
 	u32 i, key, num_elems = 0;
@@ -668,9 +668,8 @@ static int bpf_for_each_array_elem(struct bpf_map *map, void *callback_fn,
 			val = array->value + array->elem_size * i;
 		num_elems++;
 		key = i;
-		ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
-					(u64)(long)&key, (u64)(long)val,
-					(u64)(long)callback_ctx, 0);
+		ret = callback_fn((u64)(long)map, (u64)(long)&key,
+				  (u64)(long)val, (u64)(long)callback_ctx, 0);
 		/* return value: 0 - continue, 1 - stop and return */
 		if (ret)
 			break;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 3d8f9d6997d5..d29af9988f37 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -2049,7 +2049,7 @@ static const struct bpf_iter_seq_info iter_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_hash_map_info),
 };
 
-static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
+static int bpf_for_each_hash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
 				  void *callback_ctx, u64 flags)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
@@ -2089,9 +2089,8 @@ static int bpf_for_each_hash_elem(struct bpf_map *map, void *callback_fn,
 				val = elem->key + roundup_key_size;
 			}
 			num_elems++;
-			ret = BPF_CAST_CALL(callback_fn)((u64)(long)map,
-					(u64)(long)key, (u64)(long)val,
-					(u64)(long)callback_ctx, 0);
+			ret = callback_fn((u64)(long)map, (u64)(long)key,
+					  (u64)(long)val, (u64)(long)callback_ctx, 0);
 			/* return value: 0 - continue, 1 - stop and return */
 			if (ret) {
 				rcu_read_unlock();
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 2c604ff8c7fb..1ffd469c217f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1056,7 +1056,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
 	struct bpf_hrtimer *t = container_of(hrtimer, struct bpf_hrtimer, timer);
 	struct bpf_map *map = t->map;
 	void *value = t->value;
-	void *callback_fn;
+	bpf_callback_t callback_fn;
 	void *key;
 	u32 idx;
 
@@ -1081,8 +1081,7 @@ static enum hrtimer_restart bpf_timer_cb(struct hrtimer *hrtimer)
 		key = value - round_up(map->key_size, 8);
 	}
 
-	BPF_CAST_CALL(callback_fn)((u64)(long)map, (u64)(long)key,
-				   (u64)(long)value, 0, 0);
+	callback_fn((u64)(long)map, (u64)(long)key, (u64)(long)value, 0, 0);
 	/* The verifier checked that return value is zero. */
 
 	this_cpu_write(hrtimer_running, NULL);
-- 
cgit v1.2.3


From 7c2dcfa295b149a58010632c7eb7e73bd0626a7a Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Tue, 28 Sep 2021 20:45:19 +0200
Subject: net: phy: micrel: Add support for LAN8804 PHY

The LAN8804 PHY has same features as that of LAN8814 PHY except that it
doesn't support 1588, SyncE or Q-USGMII.

This PHY is found inside the LAN966X switches.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 74 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/micrel_phy.h |  1 +
 2 files changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 5c928f827173..c330a5a9f665 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -1537,6 +1537,65 @@ static int ksz886x_cable_test_get_status(struct phy_device *phydev,
 	return ret;
 }
 
+#define LAN_EXT_PAGE_ACCESS_CONTROL			0x16
+#define LAN_EXT_PAGE_ACCESS_ADDRESS_DATA		0x17
+#define LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC		0x4000
+
+#define LAN8804_ALIGN_SWAP				0x4a
+#define LAN8804_ALIGN_TX_A_B_SWAP			0x1
+#define LAN8804_ALIGN_TX_A_B_SWAP_MASK			GENMASK(2, 0)
+#define LAN8814_CLOCK_MANAGEMENT			0xd
+#define LAN8814_LINK_QUALITY				0x8e
+
+static int lanphy_read_page_reg(struct phy_device *phydev, int page, u32 addr)
+{
+	u32 data;
+
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr);
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL,
+		  (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC));
+	data = phy_read(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA);
+
+	return data;
+}
+
+static int lanphy_write_page_reg(struct phy_device *phydev, int page, u16 addr,
+				 u16 val)
+{
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL, page);
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, addr);
+	phy_write(phydev, LAN_EXT_PAGE_ACCESS_CONTROL,
+		  (page | LAN_EXT_PAGE_ACCESS_CTRL_EP_FUNC));
+
+	val = phy_write(phydev, LAN_EXT_PAGE_ACCESS_ADDRESS_DATA, val);
+	if (val) {
+		phydev_err(phydev, "Error: phy_write has returned error %d\n",
+			   val);
+		return val;
+	}
+	return 0;
+}
+
+static int lan8804_config_init(struct phy_device *phydev)
+{
+	int val;
+
+	/* MDI-X setting for swap A,B transmit */
+	val = lanphy_read_page_reg(phydev, 2, LAN8804_ALIGN_SWAP);
+	val &= ~LAN8804_ALIGN_TX_A_B_SWAP_MASK;
+	val |= LAN8804_ALIGN_TX_A_B_SWAP;
+	lanphy_write_page_reg(phydev, 2, LAN8804_ALIGN_SWAP, val);
+
+	/* Make sure that the PHY will not stop generating the clock when the
+	 * link partner goes down
+	 */
+	lanphy_write_page_reg(phydev, 31, LAN8814_CLOCK_MANAGEMENT, 0x27e);
+	lanphy_read_page_reg(phydev, 1, LAN8814_LINK_QUALITY);
+
+	return 0;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -1718,6 +1777,20 @@ static struct phy_driver ksphy_driver[] = {
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= kszphy_resume,
+}, {
+	.phy_id		= PHY_ID_LAN8804,
+	.phy_id_mask	= MICREL_PHY_ID_MASK,
+	.name		= "Microchip LAN966X Gigabit PHY",
+	.config_init	= lan8804_config_init,
+	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
+	.soft_reset	= genphy_soft_reset,
+	.read_status	= ksz9031_read_status,
+	.get_sset_count	= kszphy_get_sset_count,
+	.get_strings	= kszphy_get_strings,
+	.get_stats	= kszphy_get_stats,
+	.suspend	= genphy_suspend,
+	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ9131,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
@@ -1794,6 +1867,7 @@ static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ8873MLL, MICREL_PHY_ID_MASK },
 	{ PHY_ID_KSZ886X, MICREL_PHY_ID_MASK },
 	{ PHY_ID_LAN8814, MICREL_PHY_ID_MASK },
+	{ PHY_ID_LAN8804, MICREL_PHY_ID_MASK },
 	{ }
 };
 
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 3d43c60b49fa..1f7c33b2f5a3 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -28,6 +28,7 @@
 #define PHY_ID_KSZ9031		0x00221620
 #define PHY_ID_KSZ9131		0x00221640
 #define PHY_ID_LAN8814		0x00221660
+#define PHY_ID_LAN8804		0x00221670
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From e81e99bacc9f9347bda7808a949c1ce9fcc2bbf4 Mon Sep 17 00:00:00 2001
From: David Stevens <stevensd@chromium.org>
Date: Wed, 29 Sep 2021 11:32:59 +0900
Subject: swiotlb: Support aligned swiotlb buffers

Add an argument to swiotlb_tbl_map_single that specifies the desired
alignment of the allocated buffer. This is used by dma-iommu to ensure
the buffer is aligned to the iova granule size when using swiotlb with
untrusted sub-granule mappings. This addresses an issue where adjacent
slots could be exposed to the untrusted device if IO_TLB_SIZE < iova
granule < PAGE_SIZE.

Signed-off-by: David Stevens <stevensd@chromium.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210929023300.335969-7-stevensd@google.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c |  4 ++--
 drivers/xen/swiotlb-xen.c |  2 +-
 include/linux/swiotlb.h   |  3 ++-
 kernel/dma/swiotlb.c      | 13 ++++++++-----
 4 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 85a005b268f6..289c49ead01a 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -818,8 +818,8 @@ static dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
 		size_t padding_size;
 
 		aligned_size = iova_align(iovad, size);
-		phys = swiotlb_tbl_map_single(dev, phys, size,
-					      aligned_size, dir, attrs);
+		phys = swiotlb_tbl_map_single(dev, phys, size, aligned_size,
+					      iova_mask(iovad), dir, attrs);
 
 		if (phys == DMA_MAPPING_ERROR)
 			return DMA_MAPPING_ERROR;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e56a5faac395..cbdff8979980 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -380,7 +380,7 @@ static dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page,
 	 */
 	trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
 
-	map = swiotlb_tbl_map_single(dev, phys, size, size, dir, attrs);
+	map = swiotlb_tbl_map_single(dev, phys, size, size, 0, dir, attrs);
 	if (map == (phys_addr_t)DMA_MAPPING_ERROR)
 		return DMA_MAPPING_ERROR;
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index b0cb2a9973f4..569272871375 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -45,7 +45,8 @@ extern void __init swiotlb_update_mem_attributes(void);
 
 phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
 		size_t mapping_size, size_t alloc_size,
-		enum dma_data_direction dir, unsigned long attrs);
+		unsigned int alloc_aligned_mask, enum dma_data_direction dir,
+		unsigned long attrs);
 
 extern void swiotlb_tbl_unmap_single(struct device *hwdev,
 				     phys_addr_t tlb_addr,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 87c40517e822..019672b3da1d 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -459,7 +459,7 @@ static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
  * allocate a buffer from that IO TLB pool.
  */
 static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
-			      size_t alloc_size)
+			      size_t alloc_size, unsigned int alloc_align_mask)
 {
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned long boundary_mask = dma_get_seg_boundary(dev);
@@ -483,6 +483,7 @@ static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
 	stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
 	if (alloc_size >= PAGE_SIZE)
 		stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
+	stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
 
 	spin_lock_irqsave(&mem->lock, flags);
 	if (unlikely(nslots > mem->nslabs - mem->used))
@@ -541,7 +542,8 @@ found:
 
 phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		size_t mapping_size, size_t alloc_size,
-		enum dma_data_direction dir, unsigned long attrs)
+		unsigned int alloc_align_mask, enum dma_data_direction dir,
+		unsigned long attrs)
 {
 	struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
 	unsigned int offset = swiotlb_align_offset(dev, orig_addr);
@@ -561,7 +563,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 		return (phys_addr_t)DMA_MAPPING_ERROR;
 	}
 
-	index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset);
+	index = swiotlb_find_slots(dev, orig_addr,
+				   alloc_size + offset, alloc_align_mask);
 	if (index == -1) {
 		if (!(attrs & DMA_ATTR_NO_WARN))
 			dev_warn_ratelimited(dev,
@@ -675,7 +678,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
 	trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
 			      swiotlb_force);
 
-	swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, dir,
+	swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
 			attrs);
 	if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
 		return DMA_MAPPING_ERROR;
@@ -759,7 +762,7 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
 	if (!mem)
 		return NULL;
 
-	index = swiotlb_find_slots(dev, 0, size);
+	index = swiotlb_find_slots(dev, 0, size, 0);
 	if (index == -1)
 		return NULL;
 
-- 
cgit v1.2.3


From e5af50a5df571c1d0268b02f924de49b742c990f Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 23 Sep 2021 18:06:55 -0700
Subject: arm64: kasan: mte: move GCR_EL1 switch to task switch when KASAN
 disabled

It is not necessary to write to GCR_EL1 on every kernel entry and
exit when HW tag-based KASAN is disabled because the kernel will not
execute any IRG instructions in that mode. Since accessing GCR_EL1
can be expensive on some microarchitectures, avoid doing so by moving
the access to task switch when HW tag-based KASAN is disabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://linux-review.googlesource.com/id/I78e90d60612a94c24344526f476ac4ff216e10d2
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210924010655.2886918-1-pcc@google.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/entry.S | 10 +++++-----
 arch/arm64/kernel/mte.c   | 26 ++++++++++++++++++++++++++
 include/linux/kasan.h     |  9 +++++++--
 3 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index bc6d5a970a13..2f69ae43941d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -168,9 +168,9 @@ alternative_else_nop_endif
 
 	.macro mte_set_kernel_gcr, tmp, tmp2
 #ifdef CONFIG_KASAN_HW_TAGS
-alternative_if_not ARM64_MTE
+alternative_cb	kasan_hw_tags_enable
 	b	1f
-alternative_else_nop_endif
+alternative_cb_end
 	mov	\tmp, KERNEL_GCR_EL1
 	msr_s	SYS_GCR_EL1, \tmp
 1:
@@ -178,10 +178,10 @@ alternative_else_nop_endif
 	.endm
 
 	.macro mte_set_user_gcr, tsk, tmp, tmp2
-#ifdef CONFIG_ARM64_MTE
-alternative_if_not ARM64_MTE
+#ifdef CONFIG_KASAN_HW_TAGS
+alternative_cb	kasan_hw_tags_enable
 	b	1f
-alternative_else_nop_endif
+alternative_cb_end
 	ldr	\tmp, [\tsk, #THREAD_MTE_CTRL]
 
 	mte_set_gcr \tmp, \tmp2
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index e5e801bc5312..0cdae086966e 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -179,6 +179,30 @@ static void mte_update_sctlr_user(struct task_struct *task)
 	task->thread.sctlr_user = sctlr;
 }
 
+static void mte_update_gcr_excl(struct task_struct *task)
+{
+	/*
+	 * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
+	 * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
+	 */
+	if (kasan_hw_tags_enabled())
+		return;
+
+	write_sysreg_s(
+		((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+		 SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
+		SYS_GCR_EL1);
+}
+
+void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
+				 __le32 *updptr, int nr_inst)
+{
+	BUG_ON(nr_inst != 1); /* Branch -> NOP */
+
+	if (kasan_hw_tags_enabled())
+		*updptr = cpu_to_le32(aarch64_insn_gen_nop());
+}
+
 void mte_thread_init_user(void)
 {
 	if (!system_supports_mte())
@@ -198,6 +222,7 @@ void mte_thread_switch(struct task_struct *next)
 		return;
 
 	mte_update_sctlr_user(next);
+	mte_update_gcr_excl(next);
 
 	/*
 	 * Check if an async tag exception occurred at EL1.
@@ -243,6 +268,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 	if (task == current) {
 		preempt_disable();
 		mte_update_sctlr_user(task);
+		mte_update_gcr_excl(task);
 		update_sctlr_el1(task->thread.sctlr_user);
 		preempt_enable();
 	}
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index dd874a1ee862..de5f5913374d 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -89,7 +89,7 @@ static __always_inline bool kasan_enabled(void)
 	return static_branch_likely(&kasan_flag_enabled);
 }
 
-static inline bool kasan_has_integrated_init(void)
+static inline bool kasan_hw_tags_enabled(void)
 {
 	return kasan_enabled();
 }
@@ -104,7 +104,7 @@ static inline bool kasan_enabled(void)
 	return IS_ENABLED(CONFIG_KASAN);
 }
 
-static inline bool kasan_has_integrated_init(void)
+static inline bool kasan_hw_tags_enabled(void)
 {
 	return false;
 }
@@ -125,6 +125,11 @@ static __always_inline void kasan_free_pages(struct page *page,
 
 #endif /* CONFIG_KASAN_HW_TAGS */
 
+static inline bool kasan_has_integrated_init(void)
+{
+	return kasan_hw_tags_enabled();
+}
+
 #ifdef CONFIG_KASAN
 
 struct kasan_cache {
-- 
cgit v1.2.3


From 7101c83950e629b83f9d827f288063e52074a6ea Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Wed, 29 Sep 2021 19:23:50 -0700
Subject: platform/chrome: cros_usbpd_notify: Move ec_command()

cros_ec_command() can be used by other modules too. So, move it to a
common location and export it.

This patch does not introduce any functional changes.

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: https://lore.kernel.org/r/20210930022403.3358070-3-pmalani@chromium.org
---
 drivers/platform/chrome/cros_ec_proto.c     | 45 +++++++++++++++++++++++++++++
 drivers/platform/chrome/cros_usbpd_notify.c | 44 ----------------------------
 include/linux/platform_data/cros_ec_proto.h |  3 ++
 3 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index a34cf58c5ef7..67009b604630 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -910,3 +910,48 @@ int cros_ec_get_sensor_count(struct cros_ec_dev *ec)
 	return sensor_count;
 }
 EXPORT_SYMBOL_GPL(cros_ec_get_sensor_count);
+
+/**
+ * cros_ec_command - Send a command to the EC.
+ *
+ * @ec_dev: EC device
+ * @command: EC command
+ * @outdata: EC command output data
+ * @outsize: Size of outdata
+ * @indata: EC command input data
+ * @insize: Size of indata
+ *
+ * Return: >= 0 on success, negative error number on failure.
+ */
+int cros_ec_command(struct cros_ec_device *ec_dev,
+		    int command,
+		    uint8_t *outdata,
+		    int outsize,
+		    uint8_t *indata,
+		    int insize)
+{
+	struct cros_ec_command *msg;
+	int ret;
+
+	msg = kzalloc(sizeof(*msg) + max(insize, outsize), GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	msg->command = command;
+	msg->outsize = outsize;
+	msg->insize = insize;
+
+	if (outsize)
+		memcpy(msg->data, outdata, outsize);
+
+	ret = cros_ec_cmd_xfer_status(ec_dev, msg);
+	if (ret < 0)
+		goto error;
+
+	if (insize)
+		memcpy(indata, msg->data, insize);
+error:
+	kfree(msg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cros_ec_command);
diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c
index e718055f4313..39afdad897ce 100644
--- a/drivers/platform/chrome/cros_usbpd_notify.c
+++ b/drivers/platform/chrome/cros_usbpd_notify.c
@@ -53,50 +53,6 @@ void cros_usbpd_unregister_notify(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(cros_usbpd_unregister_notify);
 
-/**
- * cros_ec_command - Send a command to the EC.
- *
- * @ec_dev: EC device
- * @command: EC command
- * @outdata: EC command output data
- * @outsize: Size of outdata
- * @indata: EC command input data
- * @insize: Size of indata
- *
- * Return: >= 0 on success, negative error number on failure.
- */
-static int cros_ec_command(struct cros_ec_device *ec_dev,
-			   int command,
-			   uint8_t *outdata,
-			   int outsize,
-			   uint8_t *indata,
-			   int insize)
-{
-	struct cros_ec_command *msg;
-	int ret;
-
-	msg = kzalloc(sizeof(*msg) + max(insize, outsize), GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
-
-	msg->command = command;
-	msg->outsize = outsize;
-	msg->insize = insize;
-
-	if (outsize)
-		memcpy(msg->data, outdata, outsize);
-
-	ret = cros_ec_cmd_xfer_status(ec_dev, msg);
-	if (ret < 0)
-		goto error;
-
-	if (insize)
-		memcpy(indata, msg->data, insize);
-error:
-	kfree(msg);
-	return ret;
-}
-
 static void cros_usbpd_get_event_and_notify(struct device  *dev,
 					    struct cros_ec_device *ec_dev)
 {
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 55844ece0b32..20b17c43caeb 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -231,6 +231,9 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature);
 
 int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
 
+int cros_ec_command(struct cros_ec_device *ec_dev, int command, uint8_t *outdata, int outsize,
+		    uint8_t *indata, int insize);
+
 /**
  * cros_ec_get_time_ns() - Return time in ns.
  *
-- 
cgit v1.2.3


From 5d122256f4e5900f7f8de5d8787af570314f6701 Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Wed, 29 Sep 2021 19:23:52 -0700
Subject: platform/chrome: cros_ec_proto: Make data pointers void

Convert the input and output data pointers for cros_ec_command() to
void pointers so that the callers don't have to cast their custom
structs to uint8_t *.

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: https://lore.kernel.org/r/20210930022403.3358070-4-pmalani@chromium.org
---
 drivers/platform/chrome/cros_ec_proto.c     | 4 ++--
 drivers/platform/chrome/cros_usbpd_notify.c | 2 +-
 include/linux/platform_data/cros_ec_proto.h | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index 67009b604630..fd114b57bca2 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -925,9 +925,9 @@ EXPORT_SYMBOL_GPL(cros_ec_get_sensor_count);
  */
 int cros_ec_command(struct cros_ec_device *ec_dev,
 		    int command,
-		    uint8_t *outdata,
+		    void *outdata,
 		    int outsize,
-		    uint8_t *indata,
+		    void *indata,
 		    int insize)
 {
 	struct cros_ec_command *msg;
diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c
index 39afdad897ce..860509474f05 100644
--- a/drivers/platform/chrome/cros_usbpd_notify.c
+++ b/drivers/platform/chrome/cros_usbpd_notify.c
@@ -72,7 +72,7 @@ static void cros_usbpd_get_event_and_notify(struct device  *dev,
 
 	/* Check for PD host events on EC. */
 	ret = cros_ec_command(ec_dev, EC_CMD_PD_HOST_EVENT_STATUS,
-			      NULL, 0, (uint8_t *)&host_event_status, sizeof(host_event_status));
+			      NULL, 0, &host_event_status, sizeof(host_event_status));
 	if (ret < 0) {
 		dev_warn(dev, "Can't get host event status (err: %d)\n", ret);
 		goto send_notify;
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 20b17c43caeb..f833473c5f44 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -231,8 +231,8 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature);
 
 int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
 
-int cros_ec_command(struct cros_ec_device *ec_dev, int command, uint8_t *outdata, int outsize,
-		    uint8_t *indata, int insize);
+int cros_ec_command(struct cros_ec_device *ec_dev, int command, void *outdata, int outsize,
+		    void *indata, int insize);
 
 /**
  * cros_ec_get_time_ns() - Return time in ns.
-- 
cgit v1.2.3


From 4f1406396ed4d97518b8112327bdaf14fc9d4090 Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Wed, 29 Sep 2021 19:23:54 -0700
Subject: platform/chrome: cros_ec_proto: Add version for ec_command

Add a version parameter to cros_ec_command() for callers that may want
to specify which version of the host command they would like to use.

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Link: https://lore.kernel.org/r/20210930022403.3358070-5-pmalani@chromium.org
---
 drivers/platform/chrome/cros_ec_proto.c     | 3 +++
 drivers/platform/chrome/cros_usbpd_notify.c | 2 +-
 include/linux/platform_data/cros_ec_proto.h | 4 ++--
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index fd114b57bca2..a9f1867e5d8f 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -915,6 +915,7 @@ EXPORT_SYMBOL_GPL(cros_ec_get_sensor_count);
  * cros_ec_command - Send a command to the EC.
  *
  * @ec_dev: EC device
+ * @version: EC command version
  * @command: EC command
  * @outdata: EC command output data
  * @outsize: Size of outdata
@@ -924,6 +925,7 @@ EXPORT_SYMBOL_GPL(cros_ec_get_sensor_count);
  * Return: >= 0 on success, negative error number on failure.
  */
 int cros_ec_command(struct cros_ec_device *ec_dev,
+		    unsigned int version,
 		    int command,
 		    void *outdata,
 		    int outsize,
@@ -937,6 +939,7 @@ int cros_ec_command(struct cros_ec_device *ec_dev,
 	if (!msg)
 		return -ENOMEM;
 
+	msg->version = version;
 	msg->command = command;
 	msg->outsize = outsize;
 	msg->insize = insize;
diff --git a/drivers/platform/chrome/cros_usbpd_notify.c b/drivers/platform/chrome/cros_usbpd_notify.c
index 860509474f05..91ce6be91aac 100644
--- a/drivers/platform/chrome/cros_usbpd_notify.c
+++ b/drivers/platform/chrome/cros_usbpd_notify.c
@@ -71,7 +71,7 @@ static void cros_usbpd_get_event_and_notify(struct device  *dev,
 	}
 
 	/* Check for PD host events on EC. */
-	ret = cros_ec_command(ec_dev, EC_CMD_PD_HOST_EVENT_STATUS,
+	ret = cros_ec_command(ec_dev, 0, EC_CMD_PD_HOST_EVENT_STATUS,
 			      NULL, 0, &host_event_status, sizeof(host_event_status));
 	if (ret < 0) {
 		dev_warn(dev, "Can't get host event status (err: %d)\n", ret);
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index f833473c5f44..9d370816a419 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -231,8 +231,8 @@ bool cros_ec_check_features(struct cros_ec_dev *ec, int feature);
 
 int cros_ec_get_sensor_count(struct cros_ec_dev *ec);
 
-int cros_ec_command(struct cros_ec_device *ec_dev, int command, void *outdata, int outsize,
-		    void *indata, int insize);
+int cros_ec_command(struct cros_ec_device *ec_dev, unsigned int version, int command, void *outdata,
+		    int outsize, void *indata, int insize);
 
 /**
  * cros_ec_get_time_ns() - Return time in ns.
-- 
cgit v1.2.3


From 381cecc5d7b777ada7cdf12f5b0bf4caf43bf7aa Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 3 Sep 2021 09:51:38 +0200
Subject: KVM: Drop 'except' parameter from kvm_make_vcpus_request_mask()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both remaining callers of kvm_make_vcpus_request_mask() pass 'NULL' for
'except' parameter so it can just be dropped.

No functional change intended ©.

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210903075141.403071-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c    | 2 +-
 arch/x86/kvm/x86.c       | 2 +-
 include/linux/kvm_host.h | 1 -
 virt/kvm/kvm_main.c      | 3 +--
 4 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 192062f65c97..b4b432a164ae 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1849,7 +1849,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 						    vp_bitmap, vcpu_bitmap);
 
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
-					    NULL, vcpu_mask, &hv_vcpu->tlb_flush);
+					    vcpu_mask, &hv_vcpu->tlb_flush);
 	}
 
 ret_success:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index aabd3a2ec1bc..7212ba654ba2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9247,7 +9247,7 @@ void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
 	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
 
 	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
-				    NULL, vcpu_bitmap, cpus);
+				    vcpu_bitmap, cpus);
 
 	free_cpumask_var(cpus);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0f18df7fe874..89e1a0069833 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -160,7 +160,6 @@ static inline bool is_error_page(struct page *page)
 #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
 
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
-				 struct kvm_vcpu *except,
 				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 61101e7932f7..bcadbc0a70f2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -288,7 +288,6 @@ static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
 }
 
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
-				 struct kvm_vcpu *except,
 				 unsigned long *vcpu_bitmap, cpumask_var_t tmp)
 {
 	struct kvm_vcpu *vcpu;
@@ -299,7 +298,7 @@ bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
 
 	for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
 		vcpu = kvm_get_vcpu(kvm, i);
-		if (!vcpu || vcpu == except)
+		if (!vcpu)
 			continue;
 		kvm_make_vcpu_request(kvm, vcpu, req, tmp, me);
 	}
-- 
cgit v1.2.3


From 620b2438abf98f09e19802cbc3bc2e98179cdbe2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Fri, 3 Sep 2021 09:51:41 +0200
Subject: KVM: Make kvm_make_vcpus_request_mask() use pre-allocated
 cpu_kick_mask

kvm_make_vcpus_request_mask() already disables preemption so just like
kvm_make_all_cpus_request_except() it can be switched to using
pre-allocated per-cpu cpumasks. This allows for improvements for both
users of the function: in Hyper-V emulation code 'tlb_flush' can now be
dropped from 'struct kvm_vcpu_hv' and kvm_make_scan_ioapic_request_mask()
gets rid of dynamic allocation.

cpumask_available() checks in kvm_make_vcpu_request() and
kvm_kick_many_cpus() can now be dropped as they checks for an impossible
condition: kvm_init() makes sure per-cpu masks are allocated.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210903075141.403071-9-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 -
 arch/x86/kvm/hyperv.c           |  5 +----
 arch/x86/kvm/x86.c              |  9 +--------
 include/linux/kvm_host.h        |  2 +-
 virt/kvm/kvm_main.c             | 29 +++++++++--------------------
 5 files changed, 12 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8f48a7ec577..120ac07e4094 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -581,7 +581,6 @@ struct kvm_vcpu_hv {
 	struct kvm_hyperv_exit exit;
 	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
 	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
-	cpumask_t tlb_flush;
 	bool enforce_cpuid;
 	struct {
 		u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index b4b432a164ae..6f11cda2bfa4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1754,7 +1754,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 	int i;
 	gpa_t gpa;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
 	u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
@@ -1836,8 +1835,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 		}
 	}
 
-	cpumask_clear(&hv_vcpu->tlb_flush);
-
 	/*
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
@@ -1849,7 +1846,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool
 						    vp_bitmap, vcpu_bitmap);
 
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
-					    vcpu_mask, &hv_vcpu->tlb_flush);
+					    vcpu_mask);
 	}
 
 ret_success:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7212ba654ba2..03568cbbe8bd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9242,14 +9242,7 @@ static void process_smi(struct kvm_vcpu *vcpu)
 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
 				       unsigned long *vcpu_bitmap)
 {
-	cpumask_var_t cpus;
-
-	zalloc_cpumask_var(&cpus, GFP_ATOMIC);
-
-	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC,
-				    vcpu_bitmap, cpus);
-
-	free_cpumask_var(cpus);
+	kvm_make_vcpus_request_mask(kvm, KVM_REQ_SCAN_IOAPIC, vcpu_bitmap);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 89e1a0069833..f1b96a2ebaa7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -160,7 +160,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_ARCH_REQ(nr)           KVM_ARCH_REQ_FLAGS(nr, 0)
 
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
-				 unsigned long *vcpu_bitmap, cpumask_var_t tmp);
+				 unsigned long *vcpu_bitmap);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
 				      struct kvm_vcpu *except);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9cb0fd1723e6..18d245fe2118 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -237,15 +237,8 @@ static void ack_flush(void *_completed)
 {
 }
 
-static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
+static inline bool kvm_kick_many_cpus(struct cpumask *cpus, bool wait)
 {
-	const struct cpumask *cpus;
-
-	if (likely(cpumask_available(tmp)))
-		cpus = tmp;
-	else
-		cpus = cpu_online_mask;
-
 	if (cpumask_empty(cpus))
 		return false;
 
@@ -254,7 +247,7 @@ static inline bool kvm_kick_many_cpus(cpumask_var_t tmp, bool wait)
 }
 
 static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
-				  unsigned int req, cpumask_var_t tmp,
+				  unsigned int req, struct cpumask *tmp,
 				  int current_cpu)
 {
 	int cpu;
@@ -264,14 +257,6 @@ static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	if (!(req & KVM_REQUEST_NO_WAKEUP) && kvm_vcpu_wake_up(vcpu))
 		return;
 
-	/*
-	 * tmp can be "unavailable" if cpumasks are allocated off stack as
-	 * allocation of the mask is deliberately not fatal and is handled by
-	 * falling back to kicking all online CPUs.
-	 */
-	if (!cpumask_available(tmp))
-		return;
-
 	/*
 	 * Note, the vCPU could get migrated to a different pCPU at any point
 	 * after kvm_request_needs_ipi(), which could result in sending an IPI
@@ -290,22 +275,26 @@ static void kvm_make_vcpu_request(struct kvm *kvm, struct kvm_vcpu *vcpu,
 }
 
 bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req,
-				 unsigned long *vcpu_bitmap, cpumask_var_t tmp)
+				 unsigned long *vcpu_bitmap)
 {
 	struct kvm_vcpu *vcpu;
+	struct cpumask *cpus;
 	int i, me;
 	bool called;
 
 	me = get_cpu();
 
+	cpus = this_cpu_cpumask_var_ptr(cpu_kick_mask);
+	cpumask_clear(cpus);
+
 	for_each_set_bit(i, vcpu_bitmap, KVM_MAX_VCPUS) {
 		vcpu = kvm_get_vcpu(kvm, i);
 		if (!vcpu)
 			continue;
-		kvm_make_vcpu_request(kvm, vcpu, req, tmp, me);
+		kvm_make_vcpu_request(kvm, vcpu, req, cpus, me);
 	}
 
-	called = kvm_kick_many_cpus(tmp, !!(req & KVM_REQUEST_WAIT));
+	called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
 	put_cpu();
 
 	return called;
-- 
cgit v1.2.3


From a1c42ddedf35dbf5f25ea0982ed6e226eef7a78c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Mon, 13 Sep 2021 15:57:44 +0200
Subject: kvm: rename KVM_MAX_VCPU_ID to KVM_MAX_VCPU_IDS

KVM_MAX_VCPU_ID is not specifying the highest allowed vcpu-id, but the
number of allowed vcpu-ids. This has already led to confusion, so
rename KVM_MAX_VCPU_ID to KVM_MAX_VCPU_IDS to make its semantics more
clear

Suggested-by: Eduardo Habkost <ehabkost@redhat.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210913135745.13944-3-jgross@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/devices/xics.rst            | 2 +-
 Documentation/virt/kvm/devices/xive.rst            | 2 +-
 arch/mips/kvm/mips.c                               | 2 +-
 arch/powerpc/include/asm/kvm_book3s.h              | 2 +-
 arch/powerpc/include/asm/kvm_host.h                | 4 ++--
 arch/powerpc/kvm/book3s_xive.c                     | 2 +-
 arch/powerpc/kvm/powerpc.c                         | 2 +-
 arch/x86/include/asm/kvm_host.h                    | 2 +-
 arch/x86/kvm/ioapic.c                              | 2 +-
 arch/x86/kvm/ioapic.h                              | 4 ++--
 arch/x86/kvm/x86.c                                 | 2 +-
 include/linux/kvm_host.h                           | 4 ++--
 tools/testing/selftests/kvm/kvm_create_max_vcpus.c | 2 +-
 virt/kvm/kvm_main.c                                | 2 +-
 14 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/virt/kvm/devices/xics.rst b/Documentation/virt/kvm/devices/xics.rst
index 2d6927e0b776..bf32c77174ab 100644
--- a/Documentation/virt/kvm/devices/xics.rst
+++ b/Documentation/virt/kvm/devices/xics.rst
@@ -22,7 +22,7 @@ Groups:
   Errors:
 
     =======  ==========================================
-    -EINVAL  Value greater than KVM_MAX_VCPU_ID.
+    -EINVAL  Value greater than KVM_MAX_VCPU_IDS.
     -EFAULT  Invalid user pointer for attr->addr.
     -EBUSY   A vcpu is already connected to the device.
     =======  ==========================================
diff --git a/Documentation/virt/kvm/devices/xive.rst b/Documentation/virt/kvm/devices/xive.rst
index 8bdf3dc38f01..8b5e7b40bdf8 100644
--- a/Documentation/virt/kvm/devices/xive.rst
+++ b/Documentation/virt/kvm/devices/xive.rst
@@ -91,7 +91,7 @@ the legacy interrupt mode, referred as XICS (POWER7/8).
     Errors:
 
       =======  ==========================================
-      -EINVAL  Value greater than KVM_MAX_VCPU_ID.
+      -EINVAL  Value greater than KVM_MAX_VCPU_IDS.
       -EFAULT  Invalid user pointer for attr->addr.
       -EBUSY   A vCPU is already connected to the device.
       =======  ==========================================
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 75c6f264c626..562aa878b266 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1073,7 +1073,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_MAX_VCPUS;
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
-		r = KVM_MAX_VCPU_ID;
+		r = KVM_MAX_VCPU_IDS;
 		break;
 	case KVM_CAP_MIPS_FPU:
 		/* We don't handle systems with inconsistent cpu_has_fpu */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index caaa0f592d8e..3d31f2c59e43 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -434,7 +434,7 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
 #define SPLIT_HACK_OFFS			0xfb000000
 
 /*
- * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_IDS) space down to the
  * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
  * (but not its actual threading mode, which is not available) to avoid
  * collisions.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 080a7feb7731..59cb38b04ede 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -33,11 +33,11 @@
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #include <asm/kvm_book3s_asm.h>		/* for MAX_SMT_THREADS */
-#define KVM_MAX_VCPU_ID		(MAX_SMT_THREADS * KVM_MAX_VCORES)
+#define KVM_MAX_VCPU_IDS	(MAX_SMT_THREADS * KVM_MAX_VCORES)
 #define KVM_MAX_NESTED_GUESTS	KVMPPC_NR_LPIDS
 
 #else
-#define KVM_MAX_VCPU_ID		KVM_MAX_VCPUS
+#define KVM_MAX_VCPU_IDS	KVM_MAX_VCPUS
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index a18db9e16ea4..225008882958 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1928,7 +1928,7 @@ int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
 
 	pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
 
-	if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
+	if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
 		return -EINVAL;
 
 	mutex_lock(&xive->lock);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b4e6f70b97b9..8ab90ce8738f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -649,7 +649,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_MAX_VCPUS;
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
-		r = KVM_MAX_VCPU_ID;
+		r = KVM_MAX_VCPU_IDS;
 		break;
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_PPC_GET_SMMU_INFO:
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 120ac07e4094..09c18e54e0a1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -50,7 +50,7 @@
  * so ratio of 4 should be enough.
  */
 #define KVM_VCPU_ID_RATIO 4
-#define KVM_MAX_VCPU_ID (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
+#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
 
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 4e0f52660842..816a82515dcd 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
 static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
 {
 	ioapic->rtc_status.pending_eoi = 0;
-	bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID);
+	bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS);
 }
 
 static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 27e61ff3ac3e..e66e620c3bed 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -39,13 +39,13 @@ struct kvm_vcpu;
 
 struct dest_map {
 	/* vcpu bitmap where IRQ has been sent */
-	DECLARE_BITMAP(map, KVM_MAX_VCPU_ID);
+	DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS);
 
 	/*
 	 * Vector sent to a given vcpu, only valid when
 	 * the vcpu's bit in map is set
 	 */
-	u8 vectors[KVM_MAX_VCPU_ID];
+	u8 vectors[KVM_MAX_VCPU_IDS];
 };
 
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 03568cbbe8bd..6ad2f55c78a5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4077,7 +4077,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		r = KVM_MAX_VCPUS;
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
-		r = KVM_MAX_VCPU_ID;
+		r = KVM_MAX_VCPU_IDS;
 		break;
 	case KVM_CAP_PV_MMU:	/* obsolete */
 		r = 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f1b96a2ebaa7..1f9e80ce4723 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -39,8 +39,8 @@
 #include <asm/kvm_host.h>
 #include <linux/kvm_dirty_ring.h>
 
-#ifndef KVM_MAX_VCPU_ID
-#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
+#ifndef KVM_MAX_VCPU_IDS
+#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
 #endif
 
 /*
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 0299cd81b8ba..f968dfd4ee88 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -53,7 +53,7 @@ int main(int argc, char *argv[])
 		kvm_max_vcpu_id = kvm_max_vcpus;
 
 	TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
-		    "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+		    "KVM_MAX_VCPU_IDS (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
 		    kvm_max_vcpu_id, kvm_max_vcpus);
 
 	test_vcpu_creation(0, kvm_max_vcpus);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 18d245fe2118..3f6d450355f0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3595,7 +3595,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 	struct kvm_vcpu *vcpu;
 	struct page *page;
 
-	if (id >= KVM_MAX_VCPU_ID)
+	if (id >= KVM_MAX_VCPU_IDS)
 		return -EINVAL;
 
 	mutex_lock(&kvm->lock);
-- 
cgit v1.2.3


From 515a0c79e7963cc4556ca61516cc09d39e592712 Mon Sep 17 00:00:00 2001
From: "Longpeng(Mike)" <longpeng2@huawei.com>
Date: Fri, 27 Aug 2021 16:00:03 +0800
Subject: kvm: irqfd: avoid update unmodified entries of the routing

All of the irqfds would to be updated when update the irq
routing, it's too expensive if there're too many irqfds.

However we can reduce the cost by avoid some unnecessary
updates. For irqs of MSI type on X86, the update can be
saved if the msi values are not change.

The vfio migration could receives benefit from this optimi-
zaiton. The test VM has 128 vcpus and 8 VF (with 65 vectors
enabled), so the VM has more than 520 irqfds. We mesure the
cost of the vfio_msix_enable (in QEMU, it would set routing
for each irqfd) for each VF, and we can see the total cost
can be significantly reduced.

                Origin         Apply this Patch
1st             8              4
2nd             15             5
3rd             22             6
4th             24             6
5th             36             7
6th             44             7
7th             51             8
8th             58             8
Total           258ms          51ms

We're also tring to optimize the QEMU part [1], but it's still
worth to optimize the KVM to gain more benefits.

[1] https://lists.gnu.org/archive/html/qemu-devel/2021-08/msg04215.html

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Message-Id: <20210827080003.2689-1-longpeng2@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       |  9 +++++++++
 include/linux/kvm_host.h |  2 ++
 virt/kvm/eventfd.c       | 15 ++++++++++++++-
 3 files changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 25bfc12c0d08..ee1b9e168d46 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12062,6 +12062,15 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 	return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set);
 }
 
+bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *old,
+				  struct kvm_kernel_irq_routing_entry *new)
+{
+	if (new->type != KVM_IRQ_ROUTING_MSI)
+		return true;
+
+	return !!memcmp(&old->msi, &new->msi, sizeof(new->msi));
+}
+
 bool kvm_vector_hashing_enabled(void)
 {
 	return vector_hashing;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1f9e80ce4723..3f87d6ad20bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1764,6 +1764,8 @@ void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *);
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *);
 int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 				  uint32_t guest_irq, bool set);
+bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *,
+				  struct kvm_kernel_irq_routing_entry *);
 #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */
 
 #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e996989cd580..2ad013b8bde9 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -281,6 +281,13 @@ int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
 {
 	return 0;
 }
+
+bool __attribute__((weak)) kvm_arch_irqfd_route_changed(
+				struct kvm_kernel_irq_routing_entry *old,
+				struct kvm_kernel_irq_routing_entry *new)
+{
+	return true;
+}
 #endif
 
 static int
@@ -615,10 +622,16 @@ void kvm_irq_routing_update(struct kvm *kvm)
 	spin_lock_irq(&kvm->irqfds.lock);
 
 	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+		/* Under irqfds.lock, so can read irq_entry safely */
+		struct kvm_kernel_irq_routing_entry old = irqfd->irq_entry;
+#endif
+
 		irqfd_update(kvm, irqfd);
 
 #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
-		if (irqfd->producer) {
+		if (irqfd->producer &&
+		    kvm_arch_irqfd_route_changed(&old, &irqfd->irq_entry)) {
 			int ret = kvm_arch_update_irqfd_routing(
 					irqfd->kvm, irqfd->producer->irq,
 					irqfd->gsi, 1);
-- 
cgit v1.2.3


From bcf9033e5449bdcaa9bed46467a7141a8049dadb Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 14 Sep 2021 14:10:33 +0200
Subject: sched: move CPU field back into thread_info if THREAD_INFO_IN_TASK=y

THREAD_INFO_IN_TASK moved the CPU field out of thread_info, but this
causes some issues on architectures that define raw_smp_processor_id()
in terms of this field, due to the fact that #include'ing linux/sched.h
to get at struct task_struct is problematic in terms of circular
dependencies.

Given that thread_info and task_struct are the same data structure
anyway when THREAD_INFO_IN_TASK=y, let's move it back so that having
access to the type definition of struct thread_info is sufficient to
reference the CPU number of the current task.

Note that this requires THREAD_INFO_IN_TASK's definition of the
task_thread_info() helper to be updated, as task_cpu() takes a
pointer-to-const, whereas task_thread_info() (which is used to generate
lvalues as well), needs a non-const pointer. So make it a macro instead.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/arm64/kernel/asm-offsets.c   |  1 -
 arch/arm64/kernel/head.S          |  2 +-
 arch/powerpc/kernel/asm-offsets.c |  2 +-
 arch/powerpc/kernel/smp.c         |  2 +-
 include/linux/sched.h             | 13 +------------
 kernel/sched/sched.h              |  4 ----
 6 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index cee9f3e9f906..0bfc048221af 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -27,7 +27,6 @@
 int main(void)
 {
   DEFINE(TSK_ACTIVE_MM,		offsetof(struct task_struct, active_mm));
-  DEFINE(TSK_CPU,		offsetof(struct task_struct, cpu));
   BLANK();
   DEFINE(TSK_TI_CPU,		offsetof(struct task_struct, thread_info.cpu));
   DEFINE(TSK_TI_FLAGS,		offsetof(struct task_struct, thread_info.flags));
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 17962452e31d..6a98f1a38c29 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -412,7 +412,7 @@ SYM_FUNC_END(__create_page_tables)
 	scs_load \tsk
 
 	adr_l	\tmp1, __per_cpu_offset
-	ldr	w\tmp2, [\tsk, #TSK_CPU]
+	ldr	w\tmp2, [\tsk, #TSK_TI_CPU]
 	ldr	\tmp1, [\tmp1, \tmp2, lsl #3]
 	set_this_cpu_offset \tmp1
 	.endm
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index e563d3222d69..e37e4546034e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -93,7 +93,7 @@ int main(void)
 #endif /* CONFIG_PPC64 */
 	OFFSET(TASK_STACK, task_struct, stack);
 #ifdef CONFIG_SMP
-	OFFSET(TASK_CPU, task_struct, cpu);
+	OFFSET(TASK_CPU, task_struct, thread_info.cpu);
 #endif
 
 #ifdef CONFIG_LIVEPATCH
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9cc7d3dbf439..512d875b45e0 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1223,7 +1223,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
 	paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
 				 THREAD_SIZE - STACK_FRAME_OVERHEAD;
 #endif
-	idle->cpu = cpu;
+	task_thread_info(idle)->cpu = cpu;
 	secondary_current = current_set[cpu] = idle;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 39039ce8ac4c..8699594e3f99 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -750,10 +750,6 @@ struct task_struct {
 #ifdef CONFIG_SMP
 	int				on_cpu;
 	struct __call_single_node	wake_entry;
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-	/* Current CPU: */
-	unsigned int			cpu;
-#endif
 	unsigned int			wakee_flips;
 	unsigned long			wakee_flip_decay_ts;
 	struct task_struct		*last_wakee;
@@ -1886,10 +1882,7 @@ extern struct thread_info init_thread_info;
 extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)];
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
-static inline struct thread_info *task_thread_info(struct task_struct *task)
-{
-	return &task->thread_info;
-}
+# define task_thread_info(task)	(&(task)->thread_info)
 #elif !defined(__HAVE_THREAD_FUNCTIONS)
 # define task_thread_info(task)	((struct thread_info *)(task)->stack)
 #endif
@@ -2114,11 +2107,7 @@ static __always_inline bool need_resched(void)
 
 static inline unsigned int task_cpu(const struct task_struct *p)
 {
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-	return READ_ONCE(p->cpu);
-#else
 	return READ_ONCE(task_thread_info(p)->cpu);
-#endif
 }
 
 extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3d3e5793e117..79fcbad11450 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1926,11 +1926,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 	 * per-task data have been completed by this moment.
 	 */
 	smp_wmb();
-#ifdef CONFIG_THREAD_INFO_IN_TASK
-	WRITE_ONCE(p->cpu, cpu);
-#else
 	WRITE_ONCE(task_thread_info(p)->cpu, cpu);
-#endif
 	p->wake_cpu = cpu;
 #endif
 }
-- 
cgit v1.2.3


From 38a68934aa72459217986cf6b461d87f7602648a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 24 Sep 2021 17:56:51 +0200
Subject: vfio: Move vfio_iommu_group_get() to vfio_register_group_dev()

We don't need to hold a reference to the group in the driver as well as
obtain a reference to the same group as the first thing
vfio_register_group_dev() does.

Since the drivers never use the group move this all into the core code.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-2-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/fsl-mc/vfio_fsl_mc.c            | 17 ++-----------
 drivers/vfio/pci/vfio_pci_core.c             | 13 ++--------
 drivers/vfio/platform/vfio_platform_common.c | 13 +---------
 drivers/vfio/vfio.c                          | 36 +++++++++++-----------------
 include/linux/vfio.h                         |  3 ---
 5 files changed, 19 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 0ead91bfa838..9e838fed5603 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -505,22 +505,13 @@ static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
 
 static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
 {
-	struct iommu_group *group;
 	struct vfio_fsl_mc_device *vdev;
 	struct device *dev = &mc_dev->dev;
 	int ret;
 
-	group = vfio_iommu_group_get(dev);
-	if (!group) {
-		dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
-		return -EINVAL;
-	}
-
 	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
-	if (!vdev) {
-		ret = -ENOMEM;
-		goto out_group_put;
-	}
+	if (!vdev)
+		return -ENOMEM;
 
 	vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
 	vdev->mc_dev = mc_dev;
@@ -556,8 +547,6 @@ out_device:
 out_uninit:
 	vfio_uninit_group_dev(&vdev->vdev);
 	kfree(vdev);
-out_group_put:
-	vfio_iommu_group_put(group, dev);
 	return ret;
 }
 
@@ -574,8 +563,6 @@ static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
 
 	vfio_uninit_group_dev(&vdev->vdev);
 	kfree(vdev);
-	vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
-
 	return 0;
 }
 
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 68198e0f2a63..43bab0ca3e68 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1806,7 +1806,6 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);
 int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 {
 	struct pci_dev *pdev = vdev->pdev;
-	struct iommu_group *group;
 	int ret;
 
 	if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
@@ -1825,10 +1824,6 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 		return -EBUSY;
 	}
 
-	group = vfio_iommu_group_get(&pdev->dev);
-	if (!group)
-		return -EINVAL;
-
 	if (pci_is_root_bus(pdev->bus)) {
 		ret = vfio_assign_device_set(&vdev->vdev, vdev);
 	} else if (!pci_probe_reset_slot(pdev->slot)) {
@@ -1842,10 +1837,10 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 	}
 
 	if (ret)
-		goto out_group_put;
+		return ret;
 	ret = vfio_pci_vf_init(vdev);
 	if (ret)
-		goto out_group_put;
+		return ret;
 	ret = vfio_pci_vga_init(vdev);
 	if (ret)
 		goto out_vf;
@@ -1876,8 +1871,6 @@ out_power:
 		vfio_pci_set_power_state(vdev, PCI_D0);
 out_vf:
 	vfio_pci_vf_uninit(vdev);
-out_group_put:
-	vfio_iommu_group_put(group, &pdev->dev);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_register_device);
@@ -1893,8 +1886,6 @@ void vfio_pci_core_unregister_device(struct vfio_pci_core_device *vdev)
 	vfio_pci_vf_uninit(vdev);
 	vfio_pci_vga_uninit(vdev);
 
-	vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
-
 	if (!disable_idle_d3)
 		vfio_pci_set_power_state(vdev, PCI_D0);
 }
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 6af7ce7d619c..256f55b84e70 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -642,7 +642,6 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
 int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 			       struct device *dev)
 {
-	struct iommu_group *group;
 	int ret;
 
 	vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
@@ -663,24 +662,15 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev,
 		goto out_uninit;
 	}
 
-	group = vfio_iommu_group_get(dev);
-	if (!group) {
-		dev_err(dev, "No IOMMU group for device %s\n", vdev->name);
-		ret = -EINVAL;
-		goto put_reset;
-	}
-
 	ret = vfio_register_group_dev(&vdev->vdev);
 	if (ret)
-		goto put_iommu;
+		goto put_reset;
 
 	mutex_init(&vdev->igate);
 
 	pm_runtime_enable(dev);
 	return 0;
 
-put_iommu:
-	vfio_iommu_group_put(group, dev);
 put_reset:
 	vfio_platform_put_reset(vdev);
 out_uninit:
@@ -696,7 +686,6 @@ void vfio_platform_remove_common(struct vfio_platform_device *vdev)
 	pm_runtime_disable(vdev->device);
 	vfio_platform_put_reset(vdev);
 	vfio_uninit_group_dev(&vdev->vdev);
-	vfio_iommu_group_put(vdev->vdev.dev->iommu_group, vdev->vdev.dev);
 }
 EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
 
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 3c034fe14ccb..b483b61b7c22 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -169,15 +169,7 @@ static void vfio_release_device_set(struct vfio_device *device)
 	xa_unlock(&vfio_device_set_xa);
 }
 
-/*
- * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
- * and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_register_group_dev() or removing
- * that symmetric reference after vfio_unregister_group_dev() should use the raw
- * iommu_group_{get,put} functions.  In particular, vfio_iommu_group_put()
- * removes the device from the dummy group and cannot be nested.
- */
-struct iommu_group *vfio_iommu_group_get(struct device *dev)
+static struct iommu_group *vfio_iommu_group_get(struct device *dev)
 {
 	struct iommu_group *group;
 	int __maybe_unused ret;
@@ -220,18 +212,6 @@ struct iommu_group *vfio_iommu_group_get(struct device *dev)
 
 	return group;
 }
-EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
-
-void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
-{
-#ifdef CONFIG_VFIO_NOIOMMU
-	if (iommu_group_get_iommudata(group) == &noiommu)
-		iommu_group_remove_device(dev);
-#endif
-
-	iommu_group_put(group);
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
 
 #ifdef CONFIG_VFIO_NOIOMMU
 static void *vfio_noiommu_open(unsigned long arg)
@@ -841,7 +821,7 @@ int vfio_register_group_dev(struct vfio_device *device)
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	iommu_group = iommu_group_get(device->dev);
+	iommu_group = vfio_iommu_group_get(device->dev);
 	if (!iommu_group)
 		return -EINVAL;
 
@@ -849,6 +829,10 @@ int vfio_register_group_dev(struct vfio_device *device)
 	if (!group) {
 		group = vfio_create_group(iommu_group);
 		if (IS_ERR(group)) {
+#ifdef CONFIG_VFIO_NOIOMMU
+			if (iommu_group_get_iommudata(iommu_group) == &noiommu)
+				iommu_group_remove_device(device->dev);
+#endif
 			iommu_group_put(iommu_group);
 			return PTR_ERR(group);
 		}
@@ -865,6 +849,10 @@ int vfio_register_group_dev(struct vfio_device *device)
 		dev_WARN(device->dev, "Device already exists on group %d\n",
 			 iommu_group_id(iommu_group));
 		vfio_device_put(existing_device);
+#ifdef CONFIG_VFIO_NOIOMMU
+		if (iommu_group_get_iommudata(iommu_group) == &noiommu)
+			iommu_group_remove_device(device->dev);
+#endif
 		vfio_group_put(group);
 		return -EBUSY;
 	}
@@ -1010,6 +998,10 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 	if (list_empty(&group->device_list))
 		wait_event(group->container_q, !group->container);
 
+#ifdef CONFIG_VFIO_NOIOMMU
+	if (iommu_group_get_iommudata(group->iommu_group) == &noiommu)
+		iommu_group_remove_device(device->dev);
+#endif
 	/* Matches the get in vfio_register_group_dev() */
 	vfio_group_put(group);
 }
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index b53a9557884a..f7083c2fd0d0 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -71,9 +71,6 @@ struct vfio_device_ops {
 	int	(*match)(struct vfio_device *vdev, char *buf);
 };
 
-extern struct iommu_group *vfio_iommu_group_get(struct device *dev);
-extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
-
 void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
 			 const struct vfio_device_ops *ops);
 void vfio_uninit_group_dev(struct vfio_device *device);
-- 
cgit v1.2.3


From c68ea0d00ad82428154aed890ec9f793e460fa1c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Sep 2021 17:56:57 +0200
Subject: vfio: simplify iommu group allocation for mediated devices

Reuse the logic in vfio_noiommu_group_alloc to allocate a fake
single-device iommu group for mediated devices by factoring out a common
function, and replacing the noiommu boolean field in struct vfio_group
with an enum to distinguish the three different kinds of groups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-8-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/crypto/vfio_ap_ops.c |  2 +-
 drivers/vfio/mdev/mdev_driver.c   | 45 ++-----------------
 drivers/vfio/mdev/vfio_mdev.c     |  2 +-
 drivers/vfio/vfio.c               | 92 ++++++++++++++++++++++++++++-----------
 include/linux/vfio.h              |  1 +
 samples/vfio-mdev/mbochs.c        |  2 +-
 samples/vfio-mdev/mdpy.c          |  2 +-
 samples/vfio-mdev/mtty.c          |  2 +-
 8 files changed, 76 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 118939a7729a..24755d1aedd5 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -351,7 +351,7 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev)
 	list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
 	mutex_unlock(&matrix_dev->lock);
 
-	ret = vfio_register_group_dev(&matrix_mdev->vdev);
+	ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
 	if (ret)
 		goto err_list;
 	dev_set_drvdata(&mdev->dev, matrix_mdev);
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index e2cb1ff56f6c..7927ed4f1711 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -13,60 +13,23 @@
 
 #include "mdev_private.h"
 
-static int mdev_attach_iommu(struct mdev_device *mdev)
-{
-	int ret;
-	struct iommu_group *group;
-
-	group = iommu_group_alloc();
-	if (IS_ERR(group))
-		return PTR_ERR(group);
-
-	ret = iommu_group_add_device(group, &mdev->dev);
-	if (!ret)
-		dev_info(&mdev->dev, "MDEV: group_id = %d\n",
-			 iommu_group_id(group));
-
-	iommu_group_put(group);
-	return ret;
-}
-
-static void mdev_detach_iommu(struct mdev_device *mdev)
-{
-	iommu_group_remove_device(&mdev->dev);
-	dev_info(&mdev->dev, "MDEV: detaching iommu\n");
-}
-
 static int mdev_probe(struct device *dev)
 {
 	struct mdev_driver *drv =
 		container_of(dev->driver, struct mdev_driver, driver);
-	struct mdev_device *mdev = to_mdev_device(dev);
-	int ret;
 
-	ret = mdev_attach_iommu(mdev);
-	if (ret)
-		return ret;
-
-	if (drv->probe) {
-		ret = drv->probe(mdev);
-		if (ret)
-			mdev_detach_iommu(mdev);
-	}
-
-	return ret;
+	if (!drv->probe)
+		return 0;
+	return drv->probe(to_mdev_device(dev));
 }
 
 static void mdev_remove(struct device *dev)
 {
 	struct mdev_driver *drv =
 		container_of(dev->driver, struct mdev_driver, driver);
-	struct mdev_device *mdev = to_mdev_device(dev);
 
 	if (drv->remove)
-		drv->remove(mdev);
-
-	mdev_detach_iommu(mdev);
+		drv->remove(to_mdev_device(dev));
 }
 
 static int mdev_match(struct device *dev, struct device_driver *drv)
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
index 7a9883048216..a90e24b0c851 100644
--- a/drivers/vfio/mdev/vfio_mdev.c
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -119,7 +119,7 @@ static int vfio_mdev_probe(struct mdev_device *mdev)
 		return -ENOMEM;
 
 	vfio_init_group_dev(vdev, &mdev->dev, &vfio_mdev_dev_ops);
-	ret = vfio_register_group_dev(vdev);
+	ret = vfio_register_emulated_iommu_dev(vdev);
 	if (ret)
 		goto out_uninit;
 
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 23eaebd2e28c..2508c8c39840 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -67,6 +67,30 @@ struct vfio_unbound_dev {
 	struct list_head		unbound_next;
 };
 
+enum vfio_group_type {
+	/*
+	 * Physical device with IOMMU backing.
+	 */
+	VFIO_IOMMU,
+
+	/*
+	 * Virtual device without IOMMU backing. The VFIO core fakes up an
+	 * iommu_group as the iommu_group sysfs interface is part of the
+	 * userspace ABI.  The user of these devices must not be able to
+	 * directly trigger unmediated DMA.
+	 */
+	VFIO_EMULATED_IOMMU,
+
+	/*
+	 * Physical device without IOMMU backing. The VFIO core fakes up an
+	 * iommu_group as the iommu_group sysfs interface is part of the
+	 * userspace ABI.  Users can trigger unmediated DMA by the device,
+	 * usage is highly dangerous, requires an explicit opt-in and will
+	 * taint the kernel.
+	 */
+	VFIO_NO_IOMMU,
+};
+
 struct vfio_group {
 	struct kref			kref;
 	int				minor;
@@ -83,7 +107,7 @@ struct vfio_group {
 	struct mutex			unbound_lock;
 	atomic_t			opened;
 	wait_queue_head_t		container_q;
-	bool				noiommu;
+	enum vfio_group_type		type;
 	unsigned int			dev_counter;
 	struct kvm			*kvm;
 	struct blocking_notifier_head	notifier;
@@ -336,7 +360,7 @@ static void vfio_group_unlock_and_free(struct vfio_group *group)
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-		bool noiommu)
+		enum vfio_group_type type)
 {
 	struct vfio_group *group, *tmp;
 	struct device *dev;
@@ -355,7 +379,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	atomic_set(&group->opened, 0);
 	init_waitqueue_head(&group->container_q);
 	group->iommu_group = iommu_group;
-	group->noiommu = noiommu;
+	group->type = type;
 	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 
 	group->nb.notifier_call = vfio_iommu_group_notifier;
@@ -391,8 +415,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	}
 
 	dev = device_create(vfio.class, NULL,
-			    MKDEV(MAJOR(vfio.group_devt), minor),
-			    group, "%s%d", group->noiommu ? "noiommu-" : "",
+			    MKDEV(MAJOR(vfio.group_devt), minor), group, "%s%d",
+			    group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
 			    iommu_group_id(iommu_group));
 	if (IS_ERR(dev)) {
 		vfio_free_group_minor(minor);
@@ -778,8 +802,8 @@ void vfio_uninit_group_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
 
-#ifdef CONFIG_VFIO_NOIOMMU
-static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
+static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
+		enum vfio_group_type type)
 {
 	struct iommu_group *iommu_group;
 	struct vfio_group *group;
@@ -794,7 +818,7 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev)
 	if (ret)
 		goto out_put_group;
 
-	group = vfio_create_group(iommu_group, true);
+	group = vfio_create_group(iommu_group, type);
 	if (IS_ERR(group)) {
 		ret = PTR_ERR(group);
 		goto out_remove_device;
@@ -808,7 +832,6 @@ out_put_group:
 	iommu_group_put(iommu_group);
 	return ERR_PTR(ret);
 }
-#endif
 
 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 {
@@ -824,7 +847,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 		 * bus.  Taint the kernel because we're about to give a DMA
 		 * capable device to a user without IOMMU protection.
 		 */
-		group = vfio_noiommu_group_alloc(dev);
+		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
 		if (!IS_ERR(group)) {
 			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
 			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
@@ -841,7 +864,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 		goto out_put;
 
 	/* a newly created vfio_group keeps the reference. */
-	group = vfio_create_group(iommu_group, false);
+	group = vfio_create_group(iommu_group, VFIO_IOMMU);
 	if (IS_ERR(group))
 		goto out_put;
 	return group;
@@ -851,10 +874,13 @@ out_put:
 	return group;
 }
 
-int vfio_register_group_dev(struct vfio_device *device)
+static int __vfio_register_dev(struct vfio_device *device,
+		struct vfio_group *group)
 {
 	struct vfio_device *existing_device;
-	struct vfio_group *group;
+
+	if (IS_ERR(group))
+		return PTR_ERR(group);
 
 	/*
 	 * If the driver doesn't specify a set then the device is added to a
@@ -863,16 +889,13 @@ int vfio_register_group_dev(struct vfio_device *device)
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	group = vfio_group_find_or_alloc(device->dev);
-	if (IS_ERR(group))
-		return PTR_ERR(group);
-
 	existing_device = vfio_group_get_device(group, device->dev);
 	if (existing_device) {
 		dev_WARN(device->dev, "Device already exists on group %d\n",
 			 iommu_group_id(group->iommu_group));
 		vfio_device_put(existing_device);
-		if (group->noiommu)
+		if (group->type == VFIO_NO_IOMMU ||
+		    group->type == VFIO_EMULATED_IOMMU)
 			iommu_group_remove_device(device->dev);
 		vfio_group_put(group);
 		return -EBUSY;
@@ -891,8 +914,25 @@ int vfio_register_group_dev(struct vfio_device *device)
 
 	return 0;
 }
+
+int vfio_register_group_dev(struct vfio_device *device)
+{
+	return __vfio_register_dev(device,
+		vfio_group_find_or_alloc(device->dev));
+}
 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
 
+/*
+ * Register a virtual device without IOMMU backing.  The user of this
+ * device must not be able to directly trigger unmediated DMA.
+ */
+int vfio_register_emulated_iommu_dev(struct vfio_device *device)
+{
+	return __vfio_register_dev(device,
+		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
+}
+EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
+
 /**
  * Get a reference to the vfio_device for a device.  Even if the
  * caller thinks they own the device, they could be racing with a
@@ -1019,7 +1059,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 	if (list_empty(&group->device_list))
 		wait_event(group->container_q, !group->container);
 
-	if (group->noiommu)
+	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
 		iommu_group_remove_device(device->dev);
 
 	/* Matches the get in vfio_register_group_dev() */
@@ -1368,7 +1408,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	if (atomic_read(&group->container_users))
 		return -EINVAL;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
 	f = fdget(container_fd);
@@ -1388,7 +1428,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 
 	/* Real groups and fake groups cannot mix */
 	if (!list_empty(&container->group_list) &&
-	    container->noiommu != group->noiommu) {
+	    container->noiommu != (group->type == VFIO_NO_IOMMU)) {
 		ret = -EPERM;
 		goto unlock_out;
 	}
@@ -1402,7 +1442,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
 	}
 
 	group->container = container;
-	container->noiommu = group->noiommu;
+	container->noiommu = (group->type == VFIO_NO_IOMMU);
 	list_add(&group->container_next, &container->group_list);
 
 	/* Get a reference on the container and mark a user within the group */
@@ -1426,7 +1466,7 @@ static int vfio_group_add_container_user(struct vfio_group *group)
 	if (!atomic_inc_not_zero(&group->container_users))
 		return -EINVAL;
 
-	if (group->noiommu) {
+	if (group->type == VFIO_NO_IOMMU) {
 		atomic_dec(&group->container_users);
 		return -EPERM;
 	}
@@ -1451,7 +1491,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 	    !group->container->iommu_driver || !vfio_group_viable(group))
 		return -EINVAL;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO))
+	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
 	device = vfio_device_get_from_name(group, buf);
@@ -1498,7 +1538,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
 
 	fd_install(fdno, filep);
 
-	if (group->noiommu)
+	if (group->type == VFIO_NO_IOMMU)
 		dev_warn(device->dev, "vfio-noiommu device opened by user "
 			 "(%s:%d)\n", current->comm, task_pid_nr(current));
 	return fdno;
@@ -1594,7 +1634,7 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
 	if (!group)
 		return -ENODEV;
 
-	if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
+	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
 		vfio_group_put(group);
 		return -EPERM;
 	}
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index f7083c2fd0d0..bbe293008626 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -75,6 +75,7 @@ void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
 			 const struct vfio_device_ops *ops);
 void vfio_uninit_group_dev(struct vfio_device *device);
 int vfio_register_group_dev(struct vfio_device *device);
+int vfio_register_emulated_iommu_dev(struct vfio_device *device);
 void vfio_unregister_group_dev(struct vfio_device *device);
 extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
 extern void vfio_device_put(struct vfio_device *device);
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index c313ab4d1f4e..cd41bec5fdeb 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -553,7 +553,7 @@ static int mbochs_probe(struct mdev_device *mdev)
 	mbochs_create_config_space(mdev_state);
 	mbochs_reset(mdev_state);
 
-	ret = vfio_register_group_dev(&mdev_state->vdev);
+	ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
 	if (ret)
 		goto err_mem;
 	dev_set_drvdata(&mdev->dev, mdev_state);
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 8d1a80a0722a..fe5d43e797b6 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -258,7 +258,7 @@ static int mdpy_probe(struct mdev_device *mdev)
 
 	mdpy_count++;
 
-	ret = vfio_register_group_dev(&mdev_state->vdev);
+	ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
 	if (ret)
 		goto err_mem;
 	dev_set_drvdata(&mdev->dev, mdev_state);
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 5983cdb16e3d..a0e1a469bd47 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -741,7 +741,7 @@ static int mtty_probe(struct mdev_device *mdev)
 
 	mtty_create_config_space(mdev_state);
 
-	ret = vfio_register_group_dev(&mdev_state->vdev);
+	ret = vfio_register_emulated_iommu_dev(&mdev_state->vdev);
 	if (ret)
 		goto err_vconfig;
 	dev_set_drvdata(&mdev->dev, mdev_state);
-- 
cgit v1.2.3


From 67462037872d5ca57dc4674cccff191947b9b43e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Sep 2021 17:56:58 +0200
Subject: vfio: remove unused method from vfio_iommu_driver_ops

The read, write and mmap methods are never implemented, so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-9-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c  | 50 --------------------------------------------------
 include/linux/vfio.h |  5 -----
 2 files changed, 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2508c8c39840..2c1c7316aa19 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1276,62 +1276,12 @@ static int vfio_fops_release(struct inode *inode, struct file *filep)
 	return 0;
 }
 
-/*
- * Once an iommu driver is set, we optionally pass read/write/mmap
- * on to the driver, allowing management interfaces beyond ioctl.
- */
-static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
-			      size_t count, loff_t *ppos)
-{
-	struct vfio_container *container = filep->private_data;
-	struct vfio_iommu_driver *driver;
-	ssize_t ret = -EINVAL;
-
-	driver = container->iommu_driver;
-	if (likely(driver && driver->ops->read))
-		ret = driver->ops->read(container->iommu_data,
-					buf, count, ppos);
-
-	return ret;
-}
-
-static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
-			       size_t count, loff_t *ppos)
-{
-	struct vfio_container *container = filep->private_data;
-	struct vfio_iommu_driver *driver;
-	ssize_t ret = -EINVAL;
-
-	driver = container->iommu_driver;
-	if (likely(driver && driver->ops->write))
-		ret = driver->ops->write(container->iommu_data,
-					 buf, count, ppos);
-
-	return ret;
-}
-
-static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
-{
-	struct vfio_container *container = filep->private_data;
-	struct vfio_iommu_driver *driver;
-	int ret = -EINVAL;
-
-	driver = container->iommu_driver;
-	if (likely(driver && driver->ops->mmap))
-		ret = driver->ops->mmap(container->iommu_data, vma);
-
-	return ret;
-}
-
 static const struct file_operations vfio_fops = {
 	.owner		= THIS_MODULE,
 	.open		= vfio_fops_open,
 	.release	= vfio_fops_release,
-	.read		= vfio_fops_read,
-	.write		= vfio_fops_write,
 	.unlocked_ioctl	= vfio_fops_unl_ioctl,
 	.compat_ioctl	= compat_ptr_ioctl,
-	.mmap		= vfio_fops_mmap,
 };
 
 /**
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index bbe293008626..7a57a0077f96 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -95,13 +95,8 @@ struct vfio_iommu_driver_ops {
 	struct module	*owner;
 	void		*(*open)(unsigned long arg);
 	void		(*release)(void *iommu_data);
-	ssize_t		(*read)(void *iommu_data, char __user *buf,
-				size_t count, loff_t *ppos);
-	ssize_t		(*write)(void *iommu_data, const char __user *buf,
-				 size_t count, loff_t *size);
 	long		(*ioctl)(void *iommu_data, unsigned int cmd,
 				 unsigned long arg);
-	int		(*mmap)(void *iommu_data, struct vm_area_struct *vma);
 	int		(*attach_group)(void *iommu_data,
 					struct iommu_group *group);
 	void		(*detach_group)(void *iommu_data,
-- 
cgit v1.2.3


From 8cc02d22d7e1596ed687c4ff967c32056c2bef3e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Sep 2021 17:56:59 +0200
Subject: vfio: move the vfio_iommu_driver_ops interface out of <linux/vfio.h>

Create a new private drivers/vfio/vfio.h header for the interface between
the VFIO core and the iommu drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-10-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio.c                 |  1 +
 drivers/vfio/vfio.h                 | 47 +++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_spapr_tce.c |  1 +
 drivers/vfio/vfio_iommu_type1.c     |  1 +
 include/linux/vfio.h                | 44 ----------------------------------
 5 files changed, 50 insertions(+), 44 deletions(-)
 create mode 100644 drivers/vfio/vfio.h

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2c1c7316aa19..6589e296ef34 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,6 +32,7 @@
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
 #define DRIVER_AUTHOR	"Alex Williamson <alex.williamson@redhat.com>"
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
new file mode 100644
index 000000000000..a78de649eb2f
--- /dev/null
+++ b/drivers/vfio/vfio.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ */
+
+/* events for the backend driver notify callback */
+enum vfio_iommu_notify_type {
+	VFIO_IOMMU_CONTAINER_CLOSE = 0,
+};
+
+/**
+ * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
+ */
+struct vfio_iommu_driver_ops {
+	char		*name;
+	struct module	*owner;
+	void		*(*open)(unsigned long arg);
+	void		(*release)(void *iommu_data);
+	long		(*ioctl)(void *iommu_data, unsigned int cmd,
+				 unsigned long arg);
+	int		(*attach_group)(void *iommu_data,
+					struct iommu_group *group);
+	void		(*detach_group)(void *iommu_data,
+					struct iommu_group *group);
+	int		(*pin_pages)(void *iommu_data,
+				     struct iommu_group *group,
+				     unsigned long *user_pfn,
+				     int npage, int prot,
+				     unsigned long *phys_pfn);
+	int		(*unpin_pages)(void *iommu_data,
+				       unsigned long *user_pfn, int npage);
+	int		(*register_notifier)(void *iommu_data,
+					     unsigned long *events,
+					     struct notifier_block *nb);
+	int		(*unregister_notifier)(void *iommu_data,
+					       struct notifier_block *nb);
+	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
+				  void *data, size_t count, bool write);
+	struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
+						   struct iommu_group *group);
+	void		(*notify)(void *iommu_data,
+				  enum vfio_iommu_notify_type event);
+};
+
+int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
+void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index fe888b5dcc00..3efd09faeca4 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -20,6 +20,7 @@
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/mm.h>
+#include "vfio.h"
 
 #include <asm/iommu.h>
 #include <asm/tce.h>
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0e9217687f5c..2e51e4390c15 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -40,6 +40,7 @@
 #include <linux/notifier.h>
 #include <linux/dma-iommu.h>
 #include <linux/irqdomain.h>
+#include "vfio.h"
 
 #define DRIVER_VERSION  "0.2"
 #define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 7a57a0077f96..76191d7abed1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -82,50 +82,6 @@ extern void vfio_device_put(struct vfio_device *device);
 
 int vfio_assign_device_set(struct vfio_device *device, void *set_id);
 
-/* events for the backend driver notify callback */
-enum vfio_iommu_notify_type {
-	VFIO_IOMMU_CONTAINER_CLOSE = 0,
-};
-
-/**
- * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
- */
-struct vfio_iommu_driver_ops {
-	char		*name;
-	struct module	*owner;
-	void		*(*open)(unsigned long arg);
-	void		(*release)(void *iommu_data);
-	long		(*ioctl)(void *iommu_data, unsigned int cmd,
-				 unsigned long arg);
-	int		(*attach_group)(void *iommu_data,
-					struct iommu_group *group);
-	void		(*detach_group)(void *iommu_data,
-					struct iommu_group *group);
-	int		(*pin_pages)(void *iommu_data,
-				     struct iommu_group *group,
-				     unsigned long *user_pfn,
-				     int npage, int prot,
-				     unsigned long *phys_pfn);
-	int		(*unpin_pages)(void *iommu_data,
-				       unsigned long *user_pfn, int npage);
-	int		(*register_notifier)(void *iommu_data,
-					     unsigned long *events,
-					     struct notifier_block *nb);
-	int		(*unregister_notifier)(void *iommu_data,
-					       struct notifier_block *nb);
-	int		(*dma_rw)(void *iommu_data, dma_addr_t user_iova,
-				  void *data, size_t count, bool write);
-	struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
-						   struct iommu_group *group);
-	void		(*notify)(void *iommu_data,
-				  enum vfio_iommu_notify_type event);
-};
-
-extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
-
-extern void vfio_unregister_iommu_driver(
-				const struct vfio_iommu_driver_ops *ops);
-
 /*
  * External user API
  */
-- 
cgit v1.2.3


From fda49d97f2c4faf0b42e8796d3e6c868d992f3af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Sep 2021 17:57:00 +0200
Subject: vfio: remove the unused mdev iommu hook

The iommu_device field in struct mdev_device has never been used
since it was added more than 2 years ago.

This is a manual revert of commit 7bd50f0cd2
("vfio/type1: Add domain at(de)taching group helpers").

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210924155705.4258-11-hch@lst.de
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_type1.c | 133 ++++++++--------------------------------
 include/linux/mdev.h            |  20 ------
 2 files changed, 26 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2e51e4390c15..42a6be1fb726 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -114,7 +114,6 @@ struct vfio_batch {
 struct vfio_iommu_group {
 	struct iommu_group	*iommu_group;
 	struct list_head	next;
-	bool			mdev_group;	/* An mdev group */
 	bool			pinned_page_dirty_scope;
 };
 
@@ -1935,61 +1934,6 @@ static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
 	return ret;
 }
 
-static int vfio_mdev_attach_domain(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct iommu_domain *domain = data;
-	struct device *iommu_device;
-
-	iommu_device = mdev_get_iommu_device(mdev);
-	if (iommu_device) {
-		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			return iommu_aux_attach_device(domain, iommu_device);
-		else
-			return iommu_attach_device(domain, iommu_device);
-	}
-
-	return -EINVAL;
-}
-
-static int vfio_mdev_detach_domain(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct iommu_domain *domain = data;
-	struct device *iommu_device;
-
-	iommu_device = mdev_get_iommu_device(mdev);
-	if (iommu_device) {
-		if (iommu_dev_feature_enabled(iommu_device, IOMMU_DEV_FEAT_AUX))
-			iommu_aux_detach_device(domain, iommu_device);
-		else
-			iommu_detach_device(domain, iommu_device);
-	}
-
-	return 0;
-}
-
-static int vfio_iommu_attach_group(struct vfio_domain *domain,
-				   struct vfio_iommu_group *group)
-{
-	if (group->mdev_group)
-		return iommu_group_for_each_dev(group->iommu_group,
-						domain->domain,
-						vfio_mdev_attach_domain);
-	else
-		return iommu_attach_group(domain->domain, group->iommu_group);
-}
-
-static void vfio_iommu_detach_group(struct vfio_domain *domain,
-				    struct vfio_iommu_group *group)
-{
-	if (group->mdev_group)
-		iommu_group_for_each_dev(group->iommu_group, domain->domain,
-					 vfio_mdev_detach_domain);
-	else
-		iommu_detach_group(domain->domain, group->iommu_group);
-}
-
 static bool vfio_bus_is_mdev(struct bus_type *bus)
 {
 	struct bus_type *mdev_bus;
@@ -2004,20 +1948,6 @@ static bool vfio_bus_is_mdev(struct bus_type *bus)
 	return ret;
 }
 
-static int vfio_mdev_iommu_device(struct device *dev, void *data)
-{
-	struct mdev_device *mdev = to_mdev_device(dev);
-	struct device **old = data, *new;
-
-	new = mdev_get_iommu_device(mdev);
-	if (!new || (*old && *old != new))
-		return -EINVAL;
-
-	*old = new;
-
-	return 0;
-}
-
 /*
  * This is a helper function to insert an address range to iova list.
  * The list is initially created with a single entry corresponding to
@@ -2278,38 +2208,25 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 		goto out_free;
 
 	if (vfio_bus_is_mdev(bus)) {
-		struct device *iommu_device = NULL;
-
-		group->mdev_group = true;
-
-		/* Determine the isolation type */
-		ret = iommu_group_for_each_dev(iommu_group, &iommu_device,
-					       vfio_mdev_iommu_device);
-		if (ret || !iommu_device) {
-			if (!iommu->external_domain) {
-				INIT_LIST_HEAD(&domain->group_list);
-				iommu->external_domain = domain;
-				vfio_update_pgsize_bitmap(iommu);
-			} else {
-				kfree(domain);
-			}
-
-			list_add(&group->next,
-				 &iommu->external_domain->group_list);
-			/*
-			 * Non-iommu backed group cannot dirty memory directly,
-			 * it can only use interfaces that provide dirty
-			 * tracking.
-			 * The iommu scope can only be promoted with the
-			 * addition of a dirty tracking group.
-			 */
-			group->pinned_page_dirty_scope = true;
-			mutex_unlock(&iommu->lock);
-
-			return 0;
+		if (!iommu->external_domain) {
+			INIT_LIST_HEAD(&domain->group_list);
+			iommu->external_domain = domain;
+			vfio_update_pgsize_bitmap(iommu);
+		} else {
+			kfree(domain);
 		}
 
-		bus = iommu_device->bus;
+		list_add(&group->next, &iommu->external_domain->group_list);
+		/*
+		 * Non-iommu backed group cannot dirty memory directly, it can
+		 * only use interfaces that provide dirty tracking.
+		 * The iommu scope can only be promoted with the addition of a
+		 * dirty tracking group.
+		 */
+		group->pinned_page_dirty_scope = true;
+		mutex_unlock(&iommu->lock);
+
+		return 0;
 	}
 
 	domain->domain = iommu_domain_alloc(bus);
@@ -2324,7 +2241,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 			goto out_domain;
 	}
 
-	ret = vfio_iommu_attach_group(domain, group);
+	ret = iommu_attach_group(domain->domain, group->iommu_group);
 	if (ret)
 		goto out_domain;
 
@@ -2391,15 +2308,17 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
 	list_for_each_entry(d, &iommu->domain_list, next) {
 		if (d->domain->ops == domain->domain->ops &&
 		    d->prot == domain->prot) {
-			vfio_iommu_detach_group(domain, group);
-			if (!vfio_iommu_attach_group(d, group)) {
+			iommu_detach_group(domain->domain, group->iommu_group);
+			if (!iommu_attach_group(d->domain,
+						group->iommu_group)) {
 				list_add(&group->next, &d->group_list);
 				iommu_domain_free(domain->domain);
 				kfree(domain);
 				goto done;
 			}
 
-			ret = vfio_iommu_attach_group(domain, group);
+			ret = iommu_attach_group(domain->domain,
+						 group->iommu_group);
 			if (ret)
 				goto out_domain;
 		}
@@ -2436,7 +2355,7 @@ done:
 	return 0;
 
 out_detach:
-	vfio_iommu_detach_group(domain, group);
+	iommu_detach_group(domain->domain, group->iommu_group);
 out_domain:
 	iommu_domain_free(domain->domain);
 	vfio_iommu_iova_free(&iova_copy);
@@ -2601,7 +2520,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
 		if (!group)
 			continue;
 
-		vfio_iommu_detach_group(domain, group);
+		iommu_detach_group(domain->domain, group->iommu_group);
 		update_dirty_scope = !group->pinned_page_dirty_scope;
 		list_del(&group->next);
 		kfree(group);
@@ -2689,7 +2608,7 @@ static void vfio_release_domain(struct vfio_domain *domain, bool external)
 	list_for_each_entry_safe(group, group_tmp,
 				 &domain->group_list, next) {
 		if (!external)
-			vfio_iommu_detach_group(domain, group);
+			iommu_detach_group(domain->domain, group->iommu_group);
 		list_del(&group->next);
 		kfree(group);
 	}
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 68427e8fadeb..15d03f6532d0 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -18,7 +18,6 @@ struct mdev_device {
 	void *driver_data;
 	struct list_head next;
 	struct mdev_type *type;
-	struct device *iommu_device;
 	bool active;
 };
 
@@ -27,25 +26,6 @@ static inline struct mdev_device *to_mdev_device(struct device *dev)
 	return container_of(dev, struct mdev_device, dev);
 }
 
-/*
- * Called by the parent device driver to set the device which represents
- * this mdev in iommu protection scope. By default, the iommu device is
- * NULL, that indicates using vendor defined isolation.
- *
- * @dev: the mediated device that iommu will isolate.
- * @iommu_device: a pci device which represents the iommu for @dev.
- */
-static inline void mdev_set_iommu_device(struct mdev_device *mdev,
-					 struct device *iommu_device)
-{
-	mdev->iommu_device = iommu_device;
-}
-
-static inline struct device *mdev_get_iommu_device(struct mdev_device *mdev)
-{
-	return mdev->iommu_device;
-}
-
 unsigned int mdev_get_type_group_id(struct mdev_device *mdev);
 unsigned int mtype_get_type_group_id(struct mdev_type *mtype);
 struct device *mtype_get_parent_dev(struct mdev_type *mtype);
-- 
cgit v1.2.3


From 02afb8d6048d6526619e6e2dcdc95ce9c2bdb52f Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punitagrawal@gmail.com>
Date: Tue, 14 Sep 2021 23:38:57 +0900
Subject: kprobe: Simplify prepare_kprobe() by dropping redundant version

The function prepare_kprobe() is called during kprobe registration and
is responsible for ensuring any architecture related preparation for
the kprobe is done before returning.

One of two versions of prepare_kprobe() is chosen depending on the
availability of KPROBE_ON_FTRACE in the kernel configuration.

Simplify the code by dropping the version when KPROBE_ON_FTRACE is not
selected - instead relying on kprobe_ftrace() to return false when
KPROBE_ON_FTRACE is not set.

No functional change.

Link: https://lkml.kernel.org/r/163163033696.489837.9264661820279300788.stgit@devnote2

Signed-off-by: Punit Agrawal <punitagrawal@gmail.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h |  5 +++++
 kernel/kprobes.c        | 23 +++++++++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e4f3bfe08757..0b75549b2815 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -354,6 +354,11 @@ static inline void wait_for_kprobe_optimizer(void) { }
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct ftrace_regs *fregs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
+#else
+static inline int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	return -EINVAL;
+}
 #endif
 
 int arch_check_ftrace_location(struct kprobe *p);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 26fc9904c3b1..cfa9d3c263eb 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1033,15 +1033,6 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
 static int kprobe_ipmodify_enabled;
 static int kprobe_ftrace_enabled;
 
-/* Must ensure p->addr is really on ftrace */
-static int prepare_kprobe(struct kprobe *p)
-{
-	if (!kprobe_ftrace(p))
-		return arch_prepare_kprobe(p);
-
-	return arch_prepare_kprobe_ftrace(p);
-}
-
 /* Caller must lock kprobe_mutex */
 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
 			       int *cnt)
@@ -1113,11 +1104,6 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
 }
 #else	/* !CONFIG_KPROBES_ON_FTRACE */
-static inline int prepare_kprobe(struct kprobe *p)
-{
-	return arch_prepare_kprobe(p);
-}
-
 static inline int arm_kprobe_ftrace(struct kprobe *p)
 {
 	return -ENODEV;
@@ -1129,6 +1115,15 @@ static inline int disarm_kprobe_ftrace(struct kprobe *p)
 }
 #endif
 
+static int prepare_kprobe(struct kprobe *p)
+{
+	/* Must ensure p->addr is really on ftrace */
+	if (kprobe_ftrace(p))
+		return arch_prepare_kprobe_ftrace(p);
+
+	return arch_prepare_kprobe(p);
+}
+
 /* Arm a kprobe with text_mutex */
 static int arm_kprobe(struct kprobe *kp)
 {
-- 
cgit v1.2.3


From 4402deae8993fb0e25a19bb999b38df13e25a7e0 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punitagrawal@gmail.com>
Date: Tue, 14 Sep 2021 23:39:16 +0900
Subject: kprobes: Make arch_check_ftrace_location static

arch_check_ftrace_location() was introduced as a weak function in
commit f7f242ff004499 ("kprobes: introduce weak
arch_check_ftrace_location() helper function") to allow architectures
to handle kprobes call site on their own.

Recently, the only architecture (csky) to implement
arch_check_ftrace_location() was migrated to using the common
version.

As a result, further cleanup the code to drop the weak attribute and
rename the function to remove the architecture specific
implementation.

Link: https://lkml.kernel.org/r/163163035673.489837.2367816318195254104.stgit@devnote2

Signed-off-by: Punit Agrawal <punitagrawal@gmail.com>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 2 --
 kernel/kprobes.c        | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0b75549b2815..8a9412bb0d5e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -361,8 +361,6 @@ static inline int arch_prepare_kprobe_ftrace(struct kprobe *p)
 }
 #endif
 
-int arch_check_ftrace_location(struct kprobe *p);
-
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index cfa9d3c263eb..30199bfcc74a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1524,7 +1524,7 @@ static inline int warn_kprobe_rereg(struct kprobe *p)
 	return ret;
 }
 
-int __weak arch_check_ftrace_location(struct kprobe *p)
+static int check_ftrace_location(struct kprobe *p)
 {
 	unsigned long ftrace_addr;
 
@@ -1547,7 +1547,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
 {
 	int ret;
 
-	ret = arch_check_ftrace_location(p);
+	ret = check_ftrace_location(p);
 	if (ret)
 		return ret;
 	jump_label_lock();
-- 
cgit v1.2.3


From 223a76b268c9cfa265d454879ae09e2c9c808f87 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:39:34 +0900
Subject: kprobes: Fix coding style issues

Fix coding style issues reported by checkpatch.pl and update
comments to quote variable names and add "()" to function
name.
One TODO comment in __disarm_kprobe() is removed because
it has been done by following commit.

Link: https://lkml.kernel.org/r/163163037468.489837.4282347782492003960.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h |  40 ++++----
 kernel/kprobes.c        | 236 +++++++++++++++++++++++++-----------------------
 2 files changed, 145 insertions(+), 131 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 8a9412bb0d5e..756d3d23ce37 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -3,7 +3,6 @@
 #define _LINUX_KPROBES_H
 /*
  *  Kernel Probes (KProbes)
- *  include/linux/kprobes.h
  *
  * Copyright (C) IBM Corporation, 2002, 2004
  *
@@ -39,7 +38,7 @@
 #define KPROBE_REENTER		0x00000004
 #define KPROBE_HIT_SSDONE	0x00000008
 
-#else /* CONFIG_KPROBES */
+#else /* !CONFIG_KPROBES */
 #include <asm-generic/kprobes.h>
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
@@ -228,7 +227,7 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance
 	return READ_ONCE(ri->rph->rp);
 }
 
-#else /* CONFIG_KRETPROBES */
+#else /* !CONFIG_KRETPROBES */
 static inline void arch_prepare_kretprobe(struct kretprobe *rp,
 					struct pt_regs *regs)
 {
@@ -239,11 +238,15 @@ static inline int arch_trampoline_kprobe(struct kprobe *p)
 }
 #endif /* CONFIG_KRETPROBES */
 
+/* Markers of '_kprobe_blacklist' section */
+extern unsigned long __start_kprobe_blacklist[];
+extern unsigned long __stop_kprobe_blacklist[];
+
 extern struct kretprobe_blackpoint kretprobe_blacklist[];
 
 #ifdef CONFIG_KPROBES_SANITY_TEST
 extern int init_test_probes(void);
-#else
+#else /* !CONFIG_KPROBES_SANITY_TEST */
 static inline int init_test_probes(void)
 {
 	return 0;
@@ -303,7 +306,7 @@ static inline bool is_kprobe_##__name##_slot(unsigned long addr)	\
 #define KPROBE_OPTINSN_PAGE_SYM		"kprobe_optinsn_page"
 int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
 			     unsigned long *value, char *type, char *sym);
-#else /* __ARCH_WANT_KPROBES_INSN_SLOT */
+#else /* !__ARCH_WANT_KPROBES_INSN_SLOT */
 #define DEFINE_INSN_CACHE_OPS(__name)					\
 static inline bool is_kprobe_##__name##_slot(unsigned long addr)	\
 {									\
@@ -345,11 +348,12 @@ extern int sysctl_kprobes_optimization;
 extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 					     int write, void *buffer,
 					     size_t *length, loff_t *ppos);
-#endif
+#endif /* CONFIG_SYSCTL */
 extern void wait_for_kprobe_optimizer(void);
-#else
+#else /* !CONFIG_OPTPROBES */
 static inline void wait_for_kprobe_optimizer(void) { }
 #endif /* CONFIG_OPTPROBES */
+
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *ops, struct ftrace_regs *fregs);
@@ -359,7 +363,7 @@ static inline int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
 	return -EINVAL;
 }
-#endif
+#endif /* CONFIG_KPROBES_ON_FTRACE */
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
@@ -367,7 +371,7 @@ struct kprobe *get_kprobe(void *addr);
 /* kprobe_running() will just return the current_kprobe on this CPU */
 static inline struct kprobe *kprobe_running(void)
 {
-	return (__this_cpu_read(current_kprobe));
+	return __this_cpu_read(current_kprobe);
 }
 
 static inline void reset_current_kprobe(void)
@@ -431,11 +435,11 @@ static inline struct kprobe *kprobe_running(void)
 }
 static inline int register_kprobe(struct kprobe *p)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 static inline int register_kprobes(struct kprobe **kps, int num)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 static inline void unregister_kprobe(struct kprobe *p)
 {
@@ -445,11 +449,11 @@ static inline void unregister_kprobes(struct kprobe **kps, int num)
 }
 static inline int register_kretprobe(struct kretprobe *rp)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 static inline int register_kretprobes(struct kretprobe **rps, int num)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 static inline void unregister_kretprobe(struct kretprobe *rp)
 {
@@ -465,11 +469,11 @@ static inline void kprobe_free_init_mem(void)
 }
 static inline int disable_kprobe(struct kprobe *kp)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 static inline int enable_kprobe(struct kprobe *kp)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline bool within_kprobe_blacklist(unsigned long addr)
@@ -482,6 +486,7 @@ static inline int kprobe_get_kallsym(unsigned int symnum, unsigned long *value,
 	return -ERANGE;
 }
 #endif /* CONFIG_KPROBES */
+
 static inline int disable_kretprobe(struct kretprobe *rp)
 {
 	return disable_kprobe(&rp->kp);
@@ -496,13 +501,14 @@ static inline bool is_kprobe_insn_slot(unsigned long addr)
 {
 	return false;
 }
-#endif
+#endif /* !CONFIG_KPROBES */
+
 #ifndef CONFIG_OPTPROBES
 static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 {
 	return false;
 }
-#endif
+#endif /* !CONFIG_OPTPROBES */
 
 /* Returns true if kprobes handled the fault */
 static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7663c8a51889..ad39eeaa4371 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  *  Kernel Probes (KProbes)
- *  kernel/kprobes.c
  *
  * Copyright (C) IBM Corporation, 2002, 2004
  *
@@ -52,18 +51,18 @@
 
 static int kprobes_initialized;
 /* kprobe_table can be accessed by
- * - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
+ * - Normal hlist traversal and RCU add/del under 'kprobe_mutex' is held.
  * Or
  * - RCU hlist traversal under disabling preempt (breakpoint handlers)
  */
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 
-/* NOTE: change this value only with kprobe_mutex held */
+/* NOTE: change this value only with 'kprobe_mutex' held */
 static bool kprobes_all_disarmed;
 
-/* This protects kprobe_table and optimizing_list */
+/* This protects 'kprobe_table' and 'optimizing_list' */
 static DEFINE_MUTEX(kprobe_mutex);
-static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
 
 kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
 					unsigned int __unused)
@@ -71,12 +70,15 @@ kprobe_opcode_t * __weak kprobe_lookup_name(const char *name,
 	return ((kprobe_opcode_t *)(kallsyms_lookup_name(name)));
 }
 
-/* Blacklist -- list of struct kprobe_blacklist_entry */
+/*
+ * Blacklist -- list of 'struct kprobe_blacklist_entry' to store info where
+ * kprobes can not probe.
+ */
 static LIST_HEAD(kprobe_blacklist);
 
 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT
 /*
- * kprobe->ainsn.insn points to the copy of the instruction to be
+ * 'kprobe::ainsn.insn' points to the copy of the instruction to be
  * single-stepped. x86_64, POWER4 and above have no-exec support and
  * stepping on the instruction on a vmalloced/kmalloced/data page
  * is a recipe for disaster
@@ -107,6 +109,12 @@ enum kprobe_slot_state {
 
 void __weak *alloc_insn_page(void)
 {
+	/*
+	 * Use module_alloc() so this page is within +/- 2GB of where the
+	 * kernel image and loaded module images reside. This is required
+	 * for most of the architectures.
+	 * (e.g. x86-64 needs this to handle the %rip-relative fixups.)
+	 */
 	return module_alloc(PAGE_SIZE);
 }
 
@@ -142,6 +150,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 	list_for_each_entry_rcu(kip, &c->pages, list) {
 		if (kip->nused < slots_per_page(c)) {
 			int i;
+
 			for (i = 0; i < slots_per_page(c); i++) {
 				if (kip->slot_used[i] == SLOT_CLEAN) {
 					kip->slot_used[i] = SLOT_USED;
@@ -167,11 +176,6 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
 	if (!kip)
 		goto out;
 
-	/*
-	 * Use module_alloc so this page is within +/- 2GB of where the
-	 * kernel image and loaded module images reside. This is required
-	 * so x86_64 can correctly handle the %rip-relative fixups.
-	 */
 	kip->insns = c->alloc();
 	if (!kip->insns) {
 		kfree(kip);
@@ -233,6 +237,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c)
 
 	list_for_each_entry_safe(kip, next, &c->pages, list) {
 		int i;
+
 		if (kip->ngarbage == 0)
 			continue;
 		kip->ngarbage = 0;	/* we will collect all garbages */
@@ -313,7 +318,7 @@ int kprobe_cache_get_kallsym(struct kprobe_insn_cache *c, unsigned int *symnum,
 	list_for_each_entry_rcu(kip, &c->pages, list) {
 		if ((*symnum)--)
 			continue;
-		strlcpy(sym, c->sym, KSYM_NAME_LEN);
+		strscpy(sym, c->sym, KSYM_NAME_LEN);
 		*type = 't';
 		*value = (unsigned long)kip->insns;
 		ret = 0;
@@ -361,9 +366,9 @@ static inline void reset_kprobe_instance(void)
 
 /*
  * This routine is called either:
- * 	- under the kprobe_mutex - during kprobe_[un]register()
- * 				OR
- * 	- with preemption disabled - from arch/xxx/kernel/kprobes.c
+ *	- under the 'kprobe_mutex' - during kprobe_[un]register().
+ *				OR
+ *	- with preemption disabled - from architecture specific code.
  */
 struct kprobe *get_kprobe(void *addr)
 {
@@ -383,22 +388,20 @@ NOKPROBE_SYMBOL(get_kprobe);
 
 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
-/* Return true if the kprobe is an aggregator */
+/* Return true if 'p' is an aggregator */
 static inline int kprobe_aggrprobe(struct kprobe *p)
 {
 	return p->pre_handler == aggr_pre_handler;
 }
 
-/* Return true(!0) if the kprobe is unused */
+/* Return true if 'p' is unused */
 static inline int kprobe_unused(struct kprobe *p)
 {
 	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
 	       list_empty(&p->list);
 }
 
-/*
- * Keep all fields in the kprobe consistent
- */
+/* Keep all fields in the kprobe consistent. */
 static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	memcpy(&p->opcode, &ap->opcode, sizeof(kprobe_opcode_t));
@@ -406,11 +409,11 @@ static inline void copy_kprobe(struct kprobe *ap, struct kprobe *p)
 }
 
 #ifdef CONFIG_OPTPROBES
-/* NOTE: change this value only with kprobe_mutex held */
+/* NOTE: This is protected by 'kprobe_mutex'. */
 static bool kprobes_allow_optimization;
 
 /*
- * Call all pre_handler on the list, but ignores its return value.
+ * Call all 'kprobe::pre_handler' on the list, but ignores its return value.
  * This must be called from arch-dep optimized caller.
  */
 void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
@@ -438,7 +441,7 @@ static void free_aggr_kprobe(struct kprobe *p)
 	kfree(op);
 }
 
-/* Return true(!0) if the kprobe is ready for optimization. */
+/* Return true if the kprobe is ready for optimization. */
 static inline int kprobe_optready(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
@@ -451,7 +454,7 @@ static inline int kprobe_optready(struct kprobe *p)
 	return 0;
 }
 
-/* Return true(!0) if the kprobe is disarmed. Note: p must be on hash list */
+/* Return true if the kprobe is disarmed. Note: p must be on hash list */
 static inline int kprobe_disarmed(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
@@ -465,7 +468,7 @@ static inline int kprobe_disarmed(struct kprobe *p)
 	return kprobe_disabled(p) && list_empty(&op->list);
 }
 
-/* Return true(!0) if the probe is queued on (un)optimizing lists */
+/* Return true if the probe is queued on (un)optimizing lists */
 static int kprobe_queued(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
@@ -480,7 +483,7 @@ static int kprobe_queued(struct kprobe *p)
 
 /*
  * Return an optimized kprobe whose optimizing code replaces
- * instructions including addr (exclude breakpoint).
+ * instructions including 'addr' (exclude breakpoint).
  */
 static struct kprobe *get_optimized_kprobe(unsigned long addr)
 {
@@ -501,7 +504,7 @@ static struct kprobe *get_optimized_kprobe(unsigned long addr)
 	return NULL;
 }
 
-/* Optimization staging list, protected by kprobe_mutex */
+/* Optimization staging list, protected by 'kprobe_mutex' */
 static LIST_HEAD(optimizing_list);
 static LIST_HEAD(unoptimizing_list);
 static LIST_HEAD(freeing_list);
@@ -512,20 +515,20 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
 
 /*
  * Optimize (replace a breakpoint with a jump) kprobes listed on
- * optimizing_list.
+ * 'optimizing_list'.
  */
 static void do_optimize_kprobes(void)
 {
 	lockdep_assert_held(&text_mutex);
 	/*
-	 * The optimization/unoptimization refers online_cpus via
-	 * stop_machine() and cpu-hotplug modifies online_cpus.
-	 * And same time, text_mutex will be held in cpu-hotplug and here.
-	 * This combination can cause a deadlock (cpu-hotplug try to lock
-	 * text_mutex but stop_machine can not be done because online_cpus
-	 * has been changed)
-	 * To avoid this deadlock, caller must have locked cpu hotplug
-	 * for preventing cpu-hotplug outside of text_mutex locking.
+	 * The optimization/unoptimization refers 'online_cpus' via
+	 * stop_machine() and cpu-hotplug modifies the 'online_cpus'.
+	 * And same time, 'text_mutex' will be held in cpu-hotplug and here.
+	 * This combination can cause a deadlock (cpu-hotplug tries to lock
+	 * 'text_mutex' but stop_machine() can not be done because
+	 * the 'online_cpus' has been changed)
+	 * To avoid this deadlock, caller must have locked cpu-hotplug
+	 * for preventing cpu-hotplug outside of 'text_mutex' locking.
 	 */
 	lockdep_assert_cpus_held();
 
@@ -539,7 +542,7 @@ static void do_optimize_kprobes(void)
 
 /*
  * Unoptimize (replace a jump with a breakpoint and remove the breakpoint
- * if need) kprobes listed on unoptimizing_list.
+ * if need) kprobes listed on 'unoptimizing_list'.
  */
 static void do_unoptimize_kprobes(void)
 {
@@ -554,7 +557,7 @@ static void do_unoptimize_kprobes(void)
 		return;
 
 	arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
-	/* Loop free_list for disarming */
+	/* Loop on 'freeing_list' for disarming */
 	list_for_each_entry_safe(op, tmp, &freeing_list, list) {
 		/* Switching from detour code to origin */
 		op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
@@ -565,7 +568,7 @@ static void do_unoptimize_kprobes(void)
 			/*
 			 * Remove unused probes from hash list. After waiting
 			 * for synchronization, these probes are reclaimed.
-			 * (reclaiming is done by do_free_cleaned_kprobes.)
+			 * (reclaiming is done by do_free_cleaned_kprobes().)
 			 */
 			hlist_del_rcu(&op->kp.hlist);
 		} else
@@ -573,7 +576,7 @@ static void do_unoptimize_kprobes(void)
 	}
 }
 
-/* Reclaim all kprobes on the free_list */
+/* Reclaim all kprobes on the 'freeing_list' */
 static void do_free_cleaned_kprobes(void)
 {
 	struct optimized_kprobe *op, *tmp;
@@ -645,9 +648,9 @@ void wait_for_kprobe_optimizer(void)
 	while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
 		mutex_unlock(&kprobe_mutex);
 
-		/* this will also make optimizing_work execute immmediately */
+		/* This will also make 'optimizing_work' execute immmediately */
 		flush_delayed_work(&optimizing_work);
-		/* @optimizing_work might not have been queued yet, relax */
+		/* 'optimizing_work' might not have been queued yet, relax */
 		cpu_relax();
 
 		mutex_lock(&kprobe_mutex);
@@ -678,7 +681,7 @@ static void optimize_kprobe(struct kprobe *p)
 	    (kprobe_disabled(p) || kprobes_all_disarmed))
 		return;
 
-	/* kprobes with post_handler can not be optimized */
+	/* kprobes with 'post_handler' can not be optimized */
 	if (p->post_handler)
 		return;
 
@@ -698,7 +701,10 @@ static void optimize_kprobe(struct kprobe *p)
 	}
 	op->kp.flags |= KPROBE_FLAG_OPTIMIZED;
 
-	/* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */
+	/*
+	 * On the 'unoptimizing_list' and 'optimizing_list',
+	 * 'op' must have OPTIMIZED flag
+	 */
 	if (WARN_ON_ONCE(!list_empty(&op->list)))
 		return;
 
@@ -768,7 +774,7 @@ static int reuse_unused_kprobe(struct kprobe *ap)
 	WARN_ON_ONCE(list_empty(&op->list));
 	/* Enable the probe again */
 	ap->flags &= ~KPROBE_FLAG_DISABLED;
-	/* Optimize it again (remove from op->list) */
+	/* Optimize it again. (remove from 'op->list') */
 	if (!kprobe_optready(ap))
 		return -EINVAL;
 
@@ -818,7 +824,7 @@ static void prepare_optimized_kprobe(struct kprobe *p)
 	__prepare_optimized_kprobe(op, p);
 }
 
-/* Allocate new optimized_kprobe and try to prepare optimized instructions */
+/* Allocate new optimized_kprobe and try to prepare optimized instructions. */
 static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
@@ -837,19 +843,19 @@ static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
 
 /*
- * Prepare an optimized_kprobe and optimize it
- * NOTE: p must be a normal registered kprobe
+ * Prepare an optimized_kprobe and optimize it.
+ * NOTE: 'p' must be a normal registered kprobe.
  */
 static void try_to_optimize_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap;
 	struct optimized_kprobe *op;
 
-	/* Impossible to optimize ftrace-based kprobe */
+	/* Impossible to optimize ftrace-based kprobe. */
 	if (kprobe_ftrace(p))
 		return;
 
-	/* For preparing optimization, jump_label_text_reserved() is called */
+	/* For preparing optimization, jump_label_text_reserved() is called. */
 	cpus_read_lock();
 	jump_label_lock();
 	mutex_lock(&text_mutex);
@@ -860,14 +866,14 @@ static void try_to_optimize_kprobe(struct kprobe *p)
 
 	op = container_of(ap, struct optimized_kprobe, kp);
 	if (!arch_prepared_optinsn(&op->optinsn)) {
-		/* If failed to setup optimizing, fallback to kprobe */
+		/* If failed to setup optimizing, fallback to kprobe. */
 		arch_remove_optimized_kprobe(op);
 		kfree(op);
 		goto out;
 	}
 
 	init_aggr_kprobe(ap, p);
-	optimize_kprobe(ap);	/* This just kicks optimizer thread */
+	optimize_kprobe(ap);	/* This just kicks optimizer thread. */
 
 out:
 	mutex_unlock(&text_mutex);
@@ -882,7 +888,7 @@ static void optimize_all_kprobes(void)
 	unsigned int i;
 
 	mutex_lock(&kprobe_mutex);
-	/* If optimization is already allowed, just return */
+	/* If optimization is already allowed, just return. */
 	if (kprobes_allow_optimization)
 		goto out;
 
@@ -908,7 +914,7 @@ static void unoptimize_all_kprobes(void)
 	unsigned int i;
 
 	mutex_lock(&kprobe_mutex);
-	/* If optimization is already prohibited, just return */
+	/* If optimization is already prohibited, just return. */
 	if (!kprobes_allow_optimization) {
 		mutex_unlock(&kprobe_mutex);
 		return;
@@ -926,7 +932,7 @@ static void unoptimize_all_kprobes(void)
 	cpus_read_unlock();
 	mutex_unlock(&kprobe_mutex);
 
-	/* Wait for unoptimizing completion */
+	/* Wait for unoptimizing completion. */
 	wait_for_kprobe_optimizer();
 	pr_info("kprobe jump-optimization is disabled. All kprobes are based on software breakpoint.\n");
 }
@@ -953,12 +959,12 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
 }
 #endif /* CONFIG_SYSCTL */
 
-/* Put a breakpoint for a probe. Must be called with text_mutex locked */
+/* Put a breakpoint for a probe. Must be called with 'text_mutex' locked. */
 static void __arm_kprobe(struct kprobe *p)
 {
 	struct kprobe *_p;
 
-	/* Check collision with other optimized kprobes */
+	/* Find the overlapping optimized kprobes. */
 	_p = get_optimized_kprobe((unsigned long)p->addr);
 	if (unlikely(_p))
 		/* Fallback to unoptimized kprobe */
@@ -968,7 +974,7 @@ static void __arm_kprobe(struct kprobe *p)
 	optimize_kprobe(p);	/* Try to optimize (add kprobe to a list) */
 }
 
-/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
+/* Remove the breakpoint of a probe. Must be called with 'text_mutex' locked. */
 static void __disarm_kprobe(struct kprobe *p, bool reopt)
 {
 	struct kprobe *_p;
@@ -978,12 +984,17 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt)
 
 	if (!kprobe_queued(p)) {
 		arch_disarm_kprobe(p);
-		/* If another kprobe was blocked, optimize it. */
+		/* If another kprobe was blocked, re-optimize it. */
 		_p = get_optimized_kprobe((unsigned long)p->addr);
 		if (unlikely(_p) && reopt)
 			optimize_kprobe(_p);
 	}
-	/* TODO: reoptimize others after unoptimized this probe */
+	/*
+	 * TODO: Since unoptimization and real disarming will be done by
+	 * the worker thread, we can not check whether another probe are
+	 * unoptimized because of this probe here. It should be re-optimized
+	 * by the worker thread.
+	 */
 }
 
 #else /* !CONFIG_OPTPROBES */
@@ -1036,7 +1047,7 @@ static struct ftrace_ops kprobe_ipmodify_ops __read_mostly = {
 static int kprobe_ipmodify_enabled;
 static int kprobe_ftrace_enabled;
 
-/* Caller must lock kprobe_mutex */
+/* Caller must lock 'kprobe_mutex' */
 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
 			       int *cnt)
 {
@@ -1073,7 +1084,7 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 		ipmodify ? &kprobe_ipmodify_enabled : &kprobe_ftrace_enabled);
 }
 
-/* Caller must lock kprobe_mutex */
+/* Caller must lock 'kprobe_mutex'. */
 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
 				  int *cnt)
 {
@@ -1122,7 +1133,7 @@ static int prepare_kprobe(struct kprobe *p)
 	return arch_prepare_kprobe(p);
 }
 
-/* Arm a kprobe with text_mutex */
+/* Arm a kprobe with 'text_mutex'. */
 static int arm_kprobe(struct kprobe *kp)
 {
 	if (unlikely(kprobe_ftrace(kp)))
@@ -1137,7 +1148,7 @@ static int arm_kprobe(struct kprobe *kp)
 	return 0;
 }
 
-/* Disarm a kprobe with text_mutex */
+/* Disarm a kprobe with 'text_mutex'. */
 static int disarm_kprobe(struct kprobe *kp, bool reopt)
 {
 	if (unlikely(kprobe_ftrace(kp)))
@@ -1187,17 +1198,17 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
 }
 NOKPROBE_SYMBOL(aggr_post_handler);
 
-/* Walks the list and increments nmissed count for multiprobe case */
+/* Walks the list and increments 'nmissed' if 'p' has child probes. */
 void kprobes_inc_nmissed_count(struct kprobe *p)
 {
 	struct kprobe *kp;
+
 	if (!kprobe_aggrprobe(p)) {
 		p->nmissed++;
 	} else {
 		list_for_each_entry_rcu(kp, &p->list, list)
 			kp->nmissed++;
 	}
-	return;
 }
 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
 
@@ -1215,9 +1226,9 @@ static void recycle_rp_inst(struct kretprobe_instance *ri)
 {
 	struct kretprobe *rp = get_kretprobe(ri);
 
-	if (likely(rp)) {
+	if (likely(rp))
 		freelist_add(&ri->freelist, &rp->freelist);
-	} else
+	else
 		call_rcu(&ri->rcu, free_rp_inst_rcu);
 }
 NOKPROBE_SYMBOL(recycle_rp_inst);
@@ -1243,8 +1254,8 @@ void kprobe_busy_end(void)
 }
 
 /*
- * This function is called from finish_task_switch when task tk becomes dead,
- * so that we can recycle any function-return probe instances associated
+ * This function is called from finish_task_switch() when task 'tk' becomes
+ * dead, so that we can recycle any kretprobe instances associated
  * with this task. These left over instances represent probed functions
  * that have been called but will never return.
  */
@@ -1292,7 +1303,7 @@ static inline void free_rp_inst(struct kretprobe *rp)
 	}
 }
 
-/* Add the new probe to ap->list */
+/* Add the new probe to 'ap->list'. */
 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	if (p->post_handler)
@@ -1306,12 +1317,12 @@ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
 }
 
 /*
- * Fill in the required fields of the "manager kprobe". Replace the
- * earlier kprobe in the hlist with the manager kprobe
+ * Fill in the required fields of the aggregator kprobe. Replace the
+ * earlier kprobe in the hlist with the aggregator kprobe.
  */
 static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
-	/* Copy p's insn slot to ap */
+	/* Copy the insn slot of 'p' to 'ap'. */
 	copy_kprobe(p, ap);
 	flush_insn_slot(ap);
 	ap->addr = p->addr;
@@ -1329,8 +1340,7 @@ static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 }
 
 /*
- * This is the second or subsequent kprobe at the address - handle
- * the intricacies
+ * This registers the second or subsequent kprobe at the same address.
  */
 static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 {
@@ -1344,7 +1354,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 	mutex_lock(&text_mutex);
 
 	if (!kprobe_aggrprobe(orig_p)) {
-		/* If orig_p is not an aggr_kprobe, create new aggr_kprobe. */
+		/* If 'orig_p' is not an 'aggr_kprobe', create new one. */
 		ap = alloc_aggr_kprobe(orig_p);
 		if (!ap) {
 			ret = -ENOMEM;
@@ -1369,8 +1379,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 		if (ret)
 			/*
 			 * Even if fail to allocate new slot, don't need to
-			 * free aggr_probe. It will be used next time, or
-			 * freed by unregister_kprobe.
+			 * free the 'ap'. It will be used next time, or
+			 * freed by unregister_kprobe().
 			 */
 			goto out;
 
@@ -1385,7 +1395,7 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
 			    | KPROBE_FLAG_DISABLED;
 	}
 
-	/* Copy ap's insn slot to p */
+	/* Copy the insn slot of 'p' to 'ap'. */
 	copy_kprobe(ap, p);
 	ret = add_new_kprobe(ap, p);
 
@@ -1411,7 +1421,7 @@ out:
 
 bool __weak arch_within_kprobe_blacklist(unsigned long addr)
 {
-	/* The __kprobes marked functions and entry code must not be probed */
+	/* The '__kprobes' functions and entry code must not be probed. */
 	return addr >= (unsigned long)__kprobes_text_start &&
 	       addr < (unsigned long)__kprobes_text_end;
 }
@@ -1423,8 +1433,8 @@ static bool __within_kprobe_blacklist(unsigned long addr)
 	if (arch_within_kprobe_blacklist(addr))
 		return true;
 	/*
-	 * If there exists a kprobe_blacklist, verify and
-	 * fail any probe registration in the prohibited area
+	 * If 'kprobe_blacklist' is defined, check the address and
+	 * reject any probe registration in the prohibited area.
 	 */
 	list_for_each_entry(ent, &kprobe_blacklist, list) {
 		if (addr >= ent->start_addr && addr < ent->end_addr)
@@ -1454,7 +1464,7 @@ bool within_kprobe_blacklist(unsigned long addr)
 }
 
 /*
- * If we have a symbol_name argument, look it up and add the offset field
+ * If 'symbol_name' is specified, look it up and add the 'offset'
  * to it. This way, we can specify a relative address to a symbol.
  * This returns encoded errors if it fails to look up symbol or invalid
  * combination of parameters.
@@ -1484,7 +1494,10 @@ static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
 	return _kprobe_addr(p->addr, p->symbol_name, p->offset);
 }
 
-/* Check passed kprobe is valid and return kprobe in kprobe_table. */
+/*
+ * Check the 'p' is valid and return the aggregator kprobe
+ * at the same address.
+ */
 static struct kprobe *__get_valid_kprobe(struct kprobe *p)
 {
 	struct kprobe *ap, *list_p;
@@ -1561,7 +1574,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
 		goto out;
 	}
 
-	/* Check if are we probing a module */
+	/* Check if 'p' is probing a module. */
 	*probed_mod = __module_text_address((unsigned long) p->addr);
 	if (*probed_mod) {
 		/*
@@ -1574,7 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
 		}
 
 		/*
-		 * If the module freed .init.text, we couldn't insert
+		 * If the module freed '.init.text', we couldn't insert
 		 * kprobes in there.
 		 */
 		if (within_module_init((unsigned long)p->addr, *probed_mod) &&
@@ -1621,7 +1634,7 @@ int register_kprobe(struct kprobe *p)
 
 	old_p = get_kprobe(p->addr);
 	if (old_p) {
-		/* Since this may unoptimize old_p, locking text_mutex. */
+		/* Since this may unoptimize 'old_p', locking 'text_mutex'. */
 		ret = register_aggr_kprobe(old_p, p);
 		goto out;
 	}
@@ -1660,7 +1673,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(register_kprobe);
 
-/* Check if all probes on the aggrprobe are disabled */
+/* Check if all probes on the 'ap' are disabled. */
 static int aggr_kprobe_disabled(struct kprobe *ap)
 {
 	struct kprobe *kp;
@@ -1670,15 +1683,15 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
 	list_for_each_entry(kp, &ap->list, list)
 		if (!kprobe_disabled(kp))
 			/*
-			 * There is an active probe on the list.
-			 * We can't disable this ap.
+			 * Since there is an active probe on the list,
+			 * we can't disable this 'ap'.
 			 */
 			return 0;
 
 	return 1;
 }
 
-/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
+/* Disable one kprobe: Make sure called under 'kprobe_mutex' is locked. */
 static struct kprobe *__disable_kprobe(struct kprobe *p)
 {
 	struct kprobe *orig_p;
@@ -1697,7 +1710,7 @@ static struct kprobe *__disable_kprobe(struct kprobe *p)
 		/* Try to disarm and disable this/parent probe */
 		if (p == orig_p || aggr_kprobe_disabled(orig_p)) {
 			/*
-			 * If kprobes_all_disarmed is set, orig_p
+			 * If 'kprobes_all_disarmed' is set, 'orig_p'
 			 * should have already been disarmed, so
 			 * skip unneed disarming process.
 			 */
@@ -1984,7 +1997,7 @@ int register_kretprobe(struct kretprobe *rp)
 	if (ret)
 		return ret;
 
-	/* If only rp->kp.addr is specified, check reregistering kprobes */
+	/* If only 'rp->kp.addr' is specified, check reregistering kprobes */
 	if (rp->kp.addr && warn_kprobe_rereg(&rp->kp))
 		return -EINVAL;
 
@@ -2089,13 +2102,13 @@ EXPORT_SYMBOL_GPL(unregister_kretprobes);
 #else /* CONFIG_KRETPROBES */
 int register_kretprobe(struct kretprobe *rp)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
 
 int register_kretprobes(struct kretprobe **rps, int num)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(register_kretprobes);
 
@@ -2144,7 +2157,7 @@ static void kill_kprobe(struct kprobe *p)
 	/*
 	 * The module is going away. We should disarm the kprobe which
 	 * is using ftrace, because ftrace framework is still available at
-	 * MODULE_STATE_GOING notification.
+	 * 'MODULE_STATE_GOING' notification.
 	 */
 	if (kprobe_ftrace(p) && !kprobe_disabled(p) && !kprobes_all_disarmed)
 		disarm_kprobe_ftrace(p);
@@ -2317,13 +2330,13 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
 			return ret;
 	}
 
-	/* Symbols in __kprobes_text are blacklisted */
+	/* Symbols in '__kprobes_text' are blacklisted */
 	ret = kprobe_add_area_blacklist((unsigned long)__kprobes_text_start,
 					(unsigned long)__kprobes_text_end);
 	if (ret)
 		return ret;
 
-	/* Symbols in noinstr section are blacklisted */
+	/* Symbols in 'noinstr' section are blacklisted */
 	ret = kprobe_add_area_blacklist((unsigned long)__noinstr_text_start,
 					(unsigned long)__noinstr_text_end);
 
@@ -2395,9 +2408,9 @@ static int kprobes_module_callback(struct notifier_block *nb,
 		return NOTIFY_DONE;
 
 	/*
-	 * When MODULE_STATE_GOING was notified, both of module .text and
-	 * .init.text sections would be freed. When MODULE_STATE_LIVE was
-	 * notified, only .init.text section would be freed. We need to
+	 * When 'MODULE_STATE_GOING' was notified, both of module '.text' and
+	 * '.init.text' sections would be freed. When 'MODULE_STATE_LIVE' was
+	 * notified, only '.init.text' section would be freed. We need to
 	 * disable kprobes which have been inserted in the sections.
 	 */
 	mutex_lock(&kprobe_mutex);
@@ -2414,9 +2427,9 @@ static int kprobes_module_callback(struct notifier_block *nb,
 				 *
 				 * Note, this will also move any optimized probes
 				 * that are pending to be removed from their
-				 * corresponding lists to the freeing_list and
+				 * corresponding lists to the 'freeing_list' and
 				 * will not be touched by the delayed
-				 * kprobe_optimizer work handler.
+				 * kprobe_optimizer() work handler.
 				 */
 				kill_kprobe(p);
 			}
@@ -2432,10 +2445,6 @@ static struct notifier_block kprobe_module_nb = {
 	.priority = 0
 };
 
-/* Markers of _kprobe_blacklist section */
-extern unsigned long __start_kprobe_blacklist[];
-extern unsigned long __stop_kprobe_blacklist[];
-
 void kprobe_free_init_mem(void)
 {
 	void *start = (void *)(&__init_begin);
@@ -2446,7 +2455,7 @@ void kprobe_free_init_mem(void)
 
 	mutex_lock(&kprobe_mutex);
 
-	/* Kill all kprobes on initmem */
+	/* Kill all kprobes on initmem because the target code has been freed. */
 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
 		head = &kprobe_table[i];
 		hlist_for_each_entry(p, head, hlist) {
@@ -2469,9 +2478,8 @@ static int __init init_kprobes(void)
 
 	err = populate_kprobe_blacklist(__start_kprobe_blacklist,
 					__stop_kprobe_blacklist);
-	if (err) {
+	if (err)
 		pr_err("Failed to populate blacklist (error %d), kprobes not restricted, be careful using them!\n", err);
-	}
 
 	if (kretprobe_blacklist_size) {
 		/* lookup the function address from its name */
@@ -2488,7 +2496,7 @@ static int __init init_kprobes(void)
 	kprobes_all_disarmed = false;
 
 #if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT)
-	/* Init kprobe_optinsn_slots for allocation */
+	/* Init 'kprobe_optinsn_slots' for allocation */
 	kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
 #endif
 
@@ -2622,7 +2630,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
 		list_entry(v, struct kprobe_blacklist_entry, list);
 
 	/*
-	 * If /proc/kallsyms is not showing kernel address, we won't
+	 * If '/proc/kallsyms' is not showing kernel address, we won't
 	 * show them here either.
 	 */
 	if (!kallsyms_show_value(m->file->f_cred))
-- 
cgit v1.2.3


From dfc05b55c3c6b15dbd889e9901ecb3fb695421bd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:39:46 +0900
Subject: kprobes: Use IS_ENABLED() instead of kprobes_built_in()

Use IS_ENABLED(CONFIG_KPROBES) instead of kprobes_built_in().
This inline function is introduced only for avoiding #ifdef.
But since now we have IS_ENABLED(), it is no longer needed.

Link: https://lkml.kernel.org/r/163163038581.489837.2805250706507372658.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 756d3d23ce37..9c28fbb18e74 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -180,14 +180,6 @@ struct kprobe_blacklist_entry {
 DECLARE_PER_CPU(struct kprobe *, current_kprobe);
 DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-/*
- * For #ifdef avoidance:
- */
-static inline int kprobes_built_in(void)
-{
-	return 1;
-}
-
 extern void kprobe_busy_begin(void);
 extern void kprobe_busy_end(void);
 
@@ -417,10 +409,6 @@ int arch_kprobe_get_kallsym(unsigned int *symnum, unsigned long *value,
 			    char *type, char *sym);
 #else /* !CONFIG_KPROBES: */
 
-static inline int kprobes_built_in(void)
-{
-	return 0;
-}
 static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	return 0;
@@ -514,7 +502,7 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
 					      unsigned int trap)
 {
-	if (!kprobes_built_in())
+	if (!IS_ENABLED(CONFIG_KPROBES))
 		return false;
 	if (user_mode(regs))
 		return false;
-- 
cgit v1.2.3


From c42421e205fc2570a4d019184ea7d6c382c93f4c Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:40:07 +0900
Subject: kprobes: treewide: Use 'kprobe_opcode_t *' for the code address in
 get_optimized_kprobe()

Since get_optimized_kprobe() is only used inside kprobes,
it doesn't need to use 'unsigned long' type for 'addr' parameter.
Make it use 'kprobe_opcode_t *' for the 'addr' parameter and
subsequent call of arch_within_optimized_kprobe() also should use
'kprobe_opcode_t *'.

Note that MAX_OPTIMIZED_LENGTH and RELATIVEJUMP_SIZE are defined
by byte-size, but the size of 'kprobe_opcode_t' depends on the
architecture. Therefore, we must be careful when calculating
addresses using those macros.

Link: https://lkml.kernel.org/r/163163040680.489837.12133032364499833736.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/probes/kprobes/opt-arm.c |  7 ++++---
 arch/powerpc/kernel/optprobes.c   |  6 +++---
 arch/x86/kernel/kprobes/opt.c     |  6 +++---
 include/linux/kprobes.h           |  2 +-
 kernel/kprobes.c                  | 10 +++++-----
 5 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index c78180172120..dbef34ed933f 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -347,10 +347,11 @@ void arch_unoptimize_kprobes(struct list_head *oplist,
 }
 
 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
-				unsigned long addr)
+				 kprobe_opcode_t *addr)
 {
-	return ((unsigned long)op->kp.addr <= addr &&
-		(unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
+	return (op->kp.addr <= addr &&
+		op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr);
+
 }
 
 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index c79899abcec8..325ba544883c 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -301,8 +301,8 @@ void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_li
 	}
 }
 
-int arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr)
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, kprobe_opcode_t *addr)
 {
-	return ((unsigned long)op->kp.addr <= addr &&
-		(unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
+	return (op->kp.addr <= addr &&
+		op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr);
 }
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 71425ebba98a..b4a54a52aa59 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -367,10 +367,10 @@ int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 
 /* Check the addr is within the optimized instructions. */
 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
-				 unsigned long addr)
+				 kprobe_opcode_t *addr)
 {
-	return ((unsigned long)op->kp.addr <= addr &&
-		(unsigned long)op->kp.addr + op->optinsn.size > addr);
+	return (op->kp.addr <= addr &&
+		op->kp.addr + op->optinsn.size > addr);
 }
 
 /* Free optimized instruction slot */
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 9c28fbb18e74..6a5995f334a0 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -329,7 +329,7 @@ extern void arch_unoptimize_kprobes(struct list_head *oplist,
 				    struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
 extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
-					unsigned long addr);
+					kprobe_opcode_t *addr);
 
 extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ec3d97fd8c6b..b6f1dcf4bff3 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -485,15 +485,15 @@ static int kprobe_queued(struct kprobe *p)
  * Return an optimized kprobe whose optimizing code replaces
  * instructions including 'addr' (exclude breakpoint).
  */
-static struct kprobe *get_optimized_kprobe(unsigned long addr)
+static struct kprobe *get_optimized_kprobe(kprobe_opcode_t *addr)
 {
 	int i;
 	struct kprobe *p = NULL;
 	struct optimized_kprobe *op;
 
 	/* Don't check i == 0, since that is a breakpoint case. */
-	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH; i++)
-		p = get_kprobe((void *)(addr - i));
+	for (i = 1; !p && i < MAX_OPTIMIZED_LENGTH / sizeof(kprobe_opcode_t); i++)
+		p = get_kprobe(addr - i);
 
 	if (p && kprobe_optready(p)) {
 		op = container_of(p, struct optimized_kprobe, kp);
@@ -967,7 +967,7 @@ static void __arm_kprobe(struct kprobe *p)
 	lockdep_assert_held(&text_mutex);
 
 	/* Find the overlapping optimized kprobes. */
-	_p = get_optimized_kprobe((unsigned long)p->addr);
+	_p = get_optimized_kprobe(p->addr);
 	if (unlikely(_p))
 		/* Fallback to unoptimized kprobe */
 		unoptimize_kprobe(_p, true);
@@ -989,7 +989,7 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt)
 	if (!kprobe_queued(p)) {
 		arch_disarm_kprobe(p);
 		/* If another kprobe was blocked, re-optimize it. */
-		_p = get_optimized_kprobe((unsigned long)p->addr);
+		_p = get_optimized_kprobe(p->addr);
 		if (unlikely(_p) && reopt)
 			optimize_kprobe(_p);
 	}
-- 
cgit v1.2.3


From 29e8077ae2beea6a85ad2d0bae9c550bd5d05ed9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:40:16 +0900
Subject: kprobes: Use bool type for functions which returns boolean value

Use the 'bool' type instead of 'int' for the functions which
returns a boolean value, because this makes clear that those
functions don't return any error code.

Link: https://lkml.kernel.org/r/163163041649.489837.17311187321419747536.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h     |  8 ++++----
 kernel/kprobes.c            | 26 +++++++++++++-------------
 kernel/trace/trace_kprobe.c |  2 +-
 3 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 6a5995f334a0..0ba3f9e316d4 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -104,25 +104,25 @@ struct kprobe {
 #define KPROBE_FLAG_FTRACE	8 /* probe is using ftrace */
 
 /* Has this kprobe gone ? */
-static inline int kprobe_gone(struct kprobe *p)
+static inline bool kprobe_gone(struct kprobe *p)
 {
 	return p->flags & KPROBE_FLAG_GONE;
 }
 
 /* Is this kprobe disabled ? */
-static inline int kprobe_disabled(struct kprobe *p)
+static inline bool kprobe_disabled(struct kprobe *p)
 {
 	return p->flags & (KPROBE_FLAG_DISABLED | KPROBE_FLAG_GONE);
 }
 
 /* Is this kprobe really running optimized path ? */
-static inline int kprobe_optimized(struct kprobe *p)
+static inline bool kprobe_optimized(struct kprobe *p)
 {
 	return p->flags & KPROBE_FLAG_OPTIMIZED;
 }
 
 /* Is this kprobe uses ftrace ? */
-static inline int kprobe_ftrace(struct kprobe *p)
+static inline bool kprobe_ftrace(struct kprobe *p)
 {
 	return p->flags & KPROBE_FLAG_FTRACE;
 }
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index b6f1dcf4bff3..8021bccb7770 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -198,8 +198,8 @@ out:
 	return slot;
 }
 
-/* Return 1 if all garbages are collected, otherwise 0. */
-static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
+/* Return true if all garbages are collected, otherwise false. */
+static bool collect_one_slot(struct kprobe_insn_page *kip, int idx)
 {
 	kip->slot_used[idx] = SLOT_CLEAN;
 	kip->nused--;
@@ -223,9 +223,9 @@ static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
 			kip->cache->free(kip->insns);
 			kfree(kip);
 		}
-		return 1;
+		return true;
 	}
-	return 0;
+	return false;
 }
 
 static int collect_garbage_slots(struct kprobe_insn_cache *c)
@@ -389,13 +389,13 @@ NOKPROBE_SYMBOL(get_kprobe);
 static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
 /* Return true if 'p' is an aggregator */
-static inline int kprobe_aggrprobe(struct kprobe *p)
+static inline bool kprobe_aggrprobe(struct kprobe *p)
 {
 	return p->pre_handler == aggr_pre_handler;
 }
 
 /* Return true if 'p' is unused */
-static inline int kprobe_unused(struct kprobe *p)
+static inline bool kprobe_unused(struct kprobe *p)
 {
 	return kprobe_aggrprobe(p) && kprobe_disabled(p) &&
 	       list_empty(&p->list);
@@ -455,7 +455,7 @@ static inline int kprobe_optready(struct kprobe *p)
 }
 
 /* Return true if the kprobe is disarmed. Note: p must be on hash list */
-static inline int kprobe_disarmed(struct kprobe *p)
+static inline bool kprobe_disarmed(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
@@ -469,16 +469,16 @@ static inline int kprobe_disarmed(struct kprobe *p)
 }
 
 /* Return true if the probe is queued on (un)optimizing lists */
-static int kprobe_queued(struct kprobe *p)
+static bool kprobe_queued(struct kprobe *p)
 {
 	struct optimized_kprobe *op;
 
 	if (kprobe_aggrprobe(p)) {
 		op = container_of(p, struct optimized_kprobe, kp);
 		if (!list_empty(&op->list))
-			return 1;
+			return true;
 	}
-	return 0;
+	return false;
 }
 
 /*
@@ -1678,7 +1678,7 @@ out:
 EXPORT_SYMBOL_GPL(register_kprobe);
 
 /* Check if all probes on the 'ap' are disabled. */
-static int aggr_kprobe_disabled(struct kprobe *ap)
+static bool aggr_kprobe_disabled(struct kprobe *ap)
 {
 	struct kprobe *kp;
 
@@ -1690,9 +1690,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
 			 * Since there is an active probe on the list,
 			 * we can't disable this 'ap'.
 			 */
-			return 0;
+			return false;
 
-	return 1;
+	return true;
 }
 
 static struct kprobe *__disable_kprobe(struct kprobe *p)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3a64ba4bbad6..0e1e7ce5f7ed 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -97,7 +97,7 @@ static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk
 
 static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 {
-	return !!(kprobe_gone(&tk->rp.kp));
+	return kprobe_gone(&tk->rp.kp);
 }
 
 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
-- 
cgit v1.2.3


From f2ec8d9a3b8c0f22cd6a2b4f5a2d9aee5206e3b7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:40:36 +0900
Subject: kprobes: treewide: Replace arch_deref_entry_point() with
 dereference_symbol_descriptor()

~15 years ago kprobes grew the 'arch_deref_entry_point()' __weak function:

  3d7e33825d87: ("jprobes: make jprobes a little safer for users")

But this is just open-coded dereference_symbol_descriptor() in essence, and
its obscure nature was causing bugs.

Just use the real thing and remove arch_deref_entry_point().

Link: https://lkml.kernel.org/r/163163043630.489837.7924988885652708696.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/ia64/kernel/kprobes.c    |  5 -----
 arch/powerpc/kernel/kprobes.c | 11 -----------
 include/linux/kprobes.h       |  1 -
 kernel/kprobes.c              |  7 +------
 lib/error-inject.c            |  3 ++-
 5 files changed, 3 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index d4048518a1d7..0f8573bbf520 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -891,11 +891,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 	return ret;
 }
 
-unsigned long arch_deref_entry_point(void *entry)
-{
-	return ((struct fnptr *)entry)->ip;
-}
-
 static struct kprobe trampoline_p = {
 	.pre_handler = trampoline_probe_handler
 };
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 7a7cd6bda53e..d422e297978b 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -542,17 +542,6 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 }
 NOKPROBE_SYMBOL(kprobe_fault_handler);
 
-unsigned long arch_deref_entry_point(void *entry)
-{
-#ifdef PPC64_ELF_ABI_v1
-	if (!kernel_text_address((unsigned long)entry))
-		return ppc_global_function_entry(entry);
-	else
-#endif
-		return (unsigned long)entry;
-}
-NOKPROBE_SYMBOL(arch_deref_entry_point);
-
 static struct kprobe trampoline_p = {
 	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0ba3f9e316d4..2ed61fcbc89c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -381,7 +381,6 @@ int register_kprobe(struct kprobe *p);
 void unregister_kprobe(struct kprobe *p);
 int register_kprobes(struct kprobe **kps, int num);
 void unregister_kprobes(struct kprobe **kps, int num);
-unsigned long arch_deref_entry_point(void *);
 
 int register_kretprobe(struct kretprobe *rp);
 void unregister_kretprobe(struct kretprobe *rp);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8021bccb7770..550042d9a6ef 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1861,11 +1861,6 @@ static struct notifier_block kprobe_exceptions_nb = {
 	.priority = 0x7fffffff /* we need to be notified first */
 };
 
-unsigned long __weak arch_deref_entry_point(void *entry)
-{
-	return (unsigned long)entry;
-}
-
 #ifdef CONFIG_KRETPROBES
 
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
@@ -2327,7 +2322,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
 	int ret;
 
 	for (iter = start; iter < end; iter++) {
-		entry = arch_deref_entry_point((void *)*iter);
+		entry = (unsigned long)dereference_symbol_descriptor((void *)*iter);
 		ret = kprobe_add_ksym_blacklist(entry);
 		if (ret == -EINVAL)
 			continue;
diff --git a/lib/error-inject.c b/lib/error-inject.c
index c73651b15b76..2ff5ef689d72 100644
--- a/lib/error-inject.c
+++ b/lib/error-inject.c
@@ -8,6 +8,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <asm/sections.h>
 
 /* Whitelist of symbols that can be overridden for error injection. */
 static LIST_HEAD(error_injection_list);
@@ -64,7 +65,7 @@ static void populate_error_injection_list(struct error_injection_entry *start,
 
 	mutex_lock(&ei_mutex);
 	for (iter = start; iter < end; iter++) {
-		entry = arch_deref_entry_point((void *)iter->addr);
+		entry = (unsigned long)dereference_symbol_descriptor((void *)iter->addr);
 
 		if (!kernel_text_address(entry) ||
 		    !kallsyms_lookup_size_offset(entry, &size, &offset)) {
-- 
cgit v1.2.3


From 96fed8ac2bb64ab45497fdd8e3d390165b7a9be8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:40:45 +0900
Subject: kprobes: treewide: Remove trampoline_address from
 kretprobe_trampoline_handler()

The __kretprobe_trampoline_handler() callback, called from low level
arch kprobes methods, has the 'trampoline_address' parameter, which is
entirely superfluous as it basically just replicates:

  dereference_kernel_function_descriptor(kretprobe_trampoline)

In fact we had bugs in arch code where it wasn't replicated correctly.

So remove this superfluous parameter and use kretprobe_trampoline_addr()
instead.

Link: https://lkml.kernel.org/r/163163044546.489837.13505751885476015002.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arc/kernel/kprobes.c          |  2 +-
 arch/arm/probes/kprobes/core.c     |  3 +--
 arch/arm64/kernel/probes/kprobes.c |  3 +--
 arch/csky/kernel/probes/kprobes.c  |  2 +-
 arch/ia64/kernel/kprobes.c         |  5 ++---
 arch/mips/kernel/kprobes.c         |  3 +--
 arch/parisc/kernel/kprobes.c       |  4 ++--
 arch/powerpc/kernel/kprobes.c      |  2 +-
 arch/riscv/kernel/probes/kprobes.c |  2 +-
 arch/s390/kernel/kprobes.c         |  2 +-
 arch/sh/kernel/kprobes.c           |  2 +-
 arch/sparc/kernel/kprobes.c        |  2 +-
 arch/x86/include/asm/kprobes.h     |  1 -
 arch/x86/kernel/kprobes/core.c     |  2 +-
 include/linux/kprobes.h            | 18 +++++++++++++-----
 kernel/kprobes.c                   |  3 +--
 16 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 5f0415fc7328..3cee75c87f97 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -381,7 +381,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
 					      struct pt_regs *regs)
 {
-	regs->ret = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	regs->ret = __kretprobe_trampoline_handler(regs, NULL);
 
 	/* By returning a non zero value, we are telling the kprobe handler
 	 * that we don't want the post_handler to run
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index a59e38de4a03..08098ed6f035 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -392,8 +392,7 @@ void __naked __kprobes kretprobe_trampoline(void)
 /* Called from kretprobe_trampoline */
 static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 {
-	return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline,
-						    (void *)regs->ARM_fp);
+	return (void *)kretprobe_trampoline_handler(regs, (void *)regs->ARM_fp);
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index ce429cbacd35..f627a12984a8 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -401,8 +401,7 @@ int __init arch_populate_kprobe_blacklist(void)
 
 void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
 {
-	return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline,
-					(void *)kernel_stack_pointer(regs));
+	return (void *)kretprobe_trampoline_handler(regs, (void *)kernel_stack_pointer(regs));
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 632407bf45d5..784c5aba7f66 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -386,7 +386,7 @@ int __init arch_populate_kprobe_blacklist(void)
 
 void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
 {
-	return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	return (void *)kretprobe_trampoline_handler(regs, NULL);
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 0f8573bbf520..44c84c20b626 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -392,14 +392,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p,
 	__this_cpu_write(current_kprobe, p);
 }
 
-static void kretprobe_trampoline(void)
+void kretprobe_trampoline(void)
 {
 }
 
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	regs->cr_iip = __kretprobe_trampoline_handler(regs,
-		dereference_function_descriptor(kretprobe_trampoline), NULL);
+	regs->cr_iip = __kretprobe_trampoline_handler(regs, NULL);
 	/*
 	 * By returning a non-zero value, we are telling
 	 * kprobe_handler() that we don't want the post_handler
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index b0934a0d7aed..b33bd2498651 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -485,8 +485,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
 						struct pt_regs *regs)
 {
-	instruction_pointer(regs) = __kretprobe_trampoline_handler(regs,
-						kretprobe_trampoline, NULL);
+	instruction_pointer(regs) = __kretprobe_trampoline_handler(regs, NULL);
 	/*
 	 * By returning a non-zero value, we are telling
 	 * kprobe_handler() that we don't want the post_handler
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index 6d21a515eea5..4a35ac6e2ca2 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -175,7 +175,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
 	return 1;
 }
 
-static inline void kretprobe_trampoline(void)
+void kretprobe_trampoline(void)
 {
 	asm volatile("nop");
 	asm volatile("nop");
@@ -193,7 +193,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 {
 	unsigned long orig_ret_address;
 
-	orig_ret_address = __kretprobe_trampoline_handler(regs, trampoline_p.addr, NULL);
+	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
 	instruction_pointer_set(regs, orig_ret_address);
 
 	return 1;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index d422e297978b..43c77142a262 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -417,7 +417,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	unsigned long orig_ret_address;
 
-	orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
 	/*
 	 * We get here through one of two paths:
 	 * 1. by taking a trap -> kprobe_handler() -> here
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index cab6f874358e..62d477cf11da 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -347,7 +347,7 @@ int __init arch_populate_kprobe_blacklist(void)
 
 void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
 {
-	return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	return (void *)kretprobe_trampoline_handler(regs, NULL);
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 952d44b0610b..5fa86e54f129 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -343,7 +343,7 @@ static void __used kretprobe_trampoline_holder(void)
  */
 static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	regs->psw.addr = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	regs->psw.addr = __kretprobe_trampoline_handler(regs, NULL);
 	/*
 	 * By returning a non-zero value, we are telling
 	 * kprobe_handler() that we don't want the post_handler
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 1c7f358ef0be..8e76a35e6e33 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -303,7 +303,7 @@ static void __used kretprobe_trampoline_holder(void)
  */
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-	regs->pc = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	regs->pc = __kretprobe_trampoline_handler(regs, NULL);
 
 	return 1;
 }
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 4c05a4ee6a0e..401534236c2e 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -451,7 +451,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 {
 	unsigned long orig_ret_address = 0;
 
-	orig_ret_address = __kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+	orig_ret_address = __kretprobe_trampoline_handler(regs, NULL);
 	regs->tpc = orig_ret_address;
 	regs->tnpc = orig_ret_address + 4;
 
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index bd7f5886a789..71ea2eab43d5 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -49,7 +49,6 @@ extern __visible kprobe_opcode_t optprobe_template_end[];
 extern const int kretprobe_blacklist_size;
 
 void arch_remove_kprobe(struct kprobe *p);
-asmlinkage void kretprobe_trampoline(void);
 
 extern void arch_kprobe_override_function(struct pt_regs *regs);
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index b6e046e4b289..0c59ef5971de 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1064,7 +1064,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
 	regs->ip = (unsigned long)&kretprobe_trampoline;
 	regs->orig_ax = ~0UL;
 
-	return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, &regs->sp);
+	return (void *)kretprobe_trampoline_handler(regs, &regs->sp);
 }
 NOKPROBE_SYMBOL(trampoline_handler);
 
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 2ed61fcbc89c..96f5df93e36e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -188,15 +188,23 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
+void kretprobe_trampoline(void);
+/*
+ * Since some architecture uses structured function pointer,
+ * use dereference_function_descriptor() to get real function address.
+ */
+static nokprobe_inline void *kretprobe_trampoline_addr(void)
+{
+	return dereference_kernel_function_descriptor(kretprobe_trampoline);
+}
+
 /* If the trampoline handler called from a kprobe, use this version */
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
-				void *trampoline_address,
-				void *frame_pointer);
+					     void *frame_pointer);
 
 static nokprobe_inline
 unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
-				void *trampoline_address,
-				void *frame_pointer)
+					   void *frame_pointer)
 {
 	unsigned long ret;
 	/*
@@ -205,7 +213,7 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
 	 * be running at this point.
 	 */
 	kprobe_busy_begin();
-	ret = __kretprobe_trampoline_handler(regs, trampoline_address, frame_pointer);
+	ret = __kretprobe_trampoline_handler(regs, frame_pointer);
 	kprobe_busy_end();
 
 	return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 550042d9a6ef..6ed755111eea 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1864,7 +1864,6 @@ static struct notifier_block kprobe_exceptions_nb = {
 #ifdef CONFIG_KRETPROBES
 
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
-					     void *trampoline_address,
 					     void *frame_pointer)
 {
 	kprobe_opcode_t *correct_ret_addr = NULL;
@@ -1879,7 +1878,7 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
 
 		BUG_ON(ri->fp != frame_pointer);
 
-		if (ri->ret_addr != trampoline_address) {
+		if (ri->ret_addr != kretprobe_trampoline_addr()) {
 			correct_ret_addr = ri->ret_addr;
 			/*
 			 * This is the real return address. Any other
-- 
cgit v1.2.3


From adf8a61a940c49fea6fab9c3865f2b69b8ceef28 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:40:54 +0900
Subject: kprobes: treewide: Make it harder to refer kretprobe_trampoline
 directly

Since now there is kretprobe_trampoline_addr() for referring the
address of kretprobe trampoline code, we don't need to access
kretprobe_trampoline directly.

Make it harder to refer by renaming it to __kretprobe_trampoline().

Link: https://lkml.kernel.org/r/163163045446.489837.14510577516938803097.stgit@devnote2

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arc/include/asm/kprobes.h                |  2 +-
 arch/arc/kernel/kprobes.c                     | 11 ++++++-----
 arch/arm/probes/kprobes/core.c                |  6 +++---
 arch/arm64/include/asm/kprobes.h              |  2 +-
 arch/arm64/kernel/probes/kprobes.c            |  2 +-
 arch/arm64/kernel/probes/kprobes_trampoline.S |  4 ++--
 arch/csky/include/asm/kprobes.h               |  2 +-
 arch/csky/kernel/probes/kprobes.c             |  2 +-
 arch/csky/kernel/probes/kprobes_trampoline.S  |  4 ++--
 arch/ia64/kernel/kprobes.c                    |  8 ++++----
 arch/mips/kernel/kprobes.c                    | 12 ++++++------
 arch/parisc/kernel/kprobes.c                  |  4 ++--
 arch/powerpc/include/asm/kprobes.h            |  2 +-
 arch/powerpc/kernel/kprobes.c                 | 16 ++++++++--------
 arch/powerpc/kernel/optprobes.c               |  2 +-
 arch/powerpc/kernel/stacktrace.c              |  2 +-
 arch/riscv/include/asm/kprobes.h              |  2 +-
 arch/riscv/kernel/probes/kprobes.c            |  2 +-
 arch/riscv/kernel/probes/kprobes_trampoline.S |  4 ++--
 arch/s390/include/asm/kprobes.h               |  2 +-
 arch/s390/kernel/kprobes.c                    | 10 +++++-----
 arch/s390/kernel/stacktrace.c                 |  2 +-
 arch/sh/include/asm/kprobes.h                 |  2 +-
 arch/sh/kernel/kprobes.c                      | 10 +++++-----
 arch/sparc/include/asm/kprobes.h              |  2 +-
 arch/sparc/kernel/kprobes.c                   | 10 +++++-----
 arch/x86/kernel/kprobes/core.c                | 18 +++++++++---------
 include/linux/kprobes.h                       |  4 ++--
 kernel/trace/trace_output.c                   |  2 +-
 29 files changed, 76 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h
index 2134721dce44..de1566e32cb8 100644
--- a/arch/arc/include/asm/kprobes.h
+++ b/arch/arc/include/asm/kprobes.h
@@ -46,7 +46,7 @@ struct kprobe_ctlblk {
 };
 
 int kprobe_fault_handler(struct pt_regs *regs, unsigned long cause);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void trap_is_kprobe(unsigned long address, struct pt_regs *regs);
 #else
 #define trap_is_kprobe(address, regs)
diff --git a/arch/arc/kernel/kprobes.c b/arch/arc/kernel/kprobes.c
index 3cee75c87f97..e71d64119d71 100644
--- a/arch/arc/kernel/kprobes.c
+++ b/arch/arc/kernel/kprobes.c
@@ -363,8 +363,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 
 static void __used kretprobe_trampoline_holder(void)
 {
-	__asm__ __volatile__(".global kretprobe_trampoline\n"
-			     "kretprobe_trampoline:\n" "nop\n");
+	__asm__ __volatile__(".global __kretprobe_trampoline\n"
+			     "__kretprobe_trampoline:\n"
+			     "nop\n");
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
@@ -375,7 +376,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->blink = (unsigned long)&kretprobe_trampoline;
+	regs->blink = (unsigned long)&__kretprobe_trampoline;
 }
 
 static int __kprobes trampoline_probe_handler(struct kprobe *p,
@@ -390,7 +391,7 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 }
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
@@ -402,7 +403,7 @@ int __init arch_init_kprobes(void)
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *) &kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline)
 		return 1;
 
 	return 0;
diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c
index 08098ed6f035..67ce7eb8f285 100644
--- a/arch/arm/probes/kprobes/core.c
+++ b/arch/arm/probes/kprobes/core.c
@@ -373,7 +373,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
  * for kretprobe handlers which should normally be interested in r0 only
  * anyway.
  */
-void __naked __kprobes kretprobe_trampoline(void)
+void __naked __kprobes __kretprobe_trampoline(void)
 {
 	__asm__ __volatile__ (
 		"stmdb	sp!, {r0 - r11}		\n\t"
@@ -389,7 +389,7 @@ void __naked __kprobes kretprobe_trampoline(void)
 		: : : "memory");
 }
 
-/* Called from kretprobe_trampoline */
+/* Called from __kretprobe_trampoline */
 static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
 {
 	return (void *)kretprobe_trampoline_handler(regs, (void *)regs->ARM_fp);
@@ -402,7 +402,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = (void *)regs->ARM_fp;
 
 	/* Replace the return addr with trampoline addr. */
-	regs->ARM_lr = (unsigned long)&kretprobe_trampoline;
+	regs->ARM_lr = (unsigned long)&__kretprobe_trampoline;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 5d38ff4a4806..05cd82eeca13 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -39,7 +39,7 @@ void arch_remove_kprobe(struct kprobe *);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
 int kprobe_exceptions_notify(struct notifier_block *self,
 			     unsigned long val, void *data);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
 #endif /* CONFIG_KPROBES */
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index f627a12984a8..e7ad6da980e8 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -411,7 +411,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = (void *)kernel_stack_pointer(regs);
 
 	/* replace return addr (x30) with trampoline */
-	regs->regs[30] = (long)&kretprobe_trampoline;
+	regs->regs[30] = (long)&__kretprobe_trampoline;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S
index 288a84e253cc..520ee8711db1 100644
--- a/arch/arm64/kernel/probes/kprobes_trampoline.S
+++ b/arch/arm64/kernel/probes/kprobes_trampoline.S
@@ -61,7 +61,7 @@
 	ldp x28, x29, [sp, #S_X28]
 	.endm
 
-SYM_CODE_START(kretprobe_trampoline)
+SYM_CODE_START(__kretprobe_trampoline)
 	sub sp, sp, #PT_REGS_SIZE
 
 	save_all_base_regs
@@ -79,4 +79,4 @@ SYM_CODE_START(kretprobe_trampoline)
 	add sp, sp, #PT_REGS_SIZE
 	ret
 
-SYM_CODE_END(kretprobe_trampoline)
+SYM_CODE_END(__kretprobe_trampoline)
diff --git a/arch/csky/include/asm/kprobes.h b/arch/csky/include/asm/kprobes.h
index b647bbde4d6d..55267cbf5204 100644
--- a/arch/csky/include/asm/kprobes.h
+++ b/arch/csky/include/asm/kprobes.h
@@ -41,7 +41,7 @@ void arch_remove_kprobe(struct kprobe *p);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
 int kprobe_breakpoint_handler(struct pt_regs *regs);
 int kprobe_single_step_handler(struct pt_regs *regs);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
 #endif /* CONFIG_KPROBES */
diff --git a/arch/csky/kernel/probes/kprobes.c b/arch/csky/kernel/probes/kprobes.c
index 784c5aba7f66..42920f25e73c 100644
--- a/arch/csky/kernel/probes/kprobes.c
+++ b/arch/csky/kernel/probes/kprobes.c
@@ -394,7 +394,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 {
 	ri->ret_addr = (kprobe_opcode_t *)regs->lr;
 	ri->fp = NULL;
-	regs->lr = (unsigned long) &kretprobe_trampoline;
+	regs->lr = (unsigned long) &__kretprobe_trampoline;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/csky/kernel/probes/kprobes_trampoline.S b/arch/csky/kernel/probes/kprobes_trampoline.S
index b1fe3af24f03..ba48ad04a847 100644
--- a/arch/csky/kernel/probes/kprobes_trampoline.S
+++ b/arch/csky/kernel/probes/kprobes_trampoline.S
@@ -4,7 +4,7 @@
 
 #include <abi/entry.h>
 
-ENTRY(kretprobe_trampoline)
+ENTRY(__kretprobe_trampoline)
 	SAVE_REGS_FTRACE
 
 	mov	a0, sp /* pt_regs */
@@ -16,4 +16,4 @@ ENTRY(kretprobe_trampoline)
 
 	RESTORE_REGS_FTRACE
 	rts
-ENDPROC(kretprobe_trampoline)
+ENDPROC(__kretprobe_trampoline)
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 44c84c20b626..1a7bab1c5d7c 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -392,7 +392,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p,
 	__this_cpu_write(current_kprobe, p);
 }
 
-void kretprobe_trampoline(void)
+void __kretprobe_trampoline(void)
 {
 }
 
@@ -414,7 +414,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline);
+	regs->b0 = (unsigned long)dereference_function_descriptor(__kretprobe_trampoline);
 }
 
 /* Check the instruction in the slot is break */
@@ -897,14 +897,14 @@ static struct kprobe trampoline_p = {
 int __init arch_init_kprobes(void)
 {
 	trampoline_p.addr =
-		dereference_function_descriptor(kretprobe_trampoline);
+		dereference_function_descriptor(__kretprobe_trampoline);
 	return register_kprobe(&trampoline_p);
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
 	if (p->addr ==
-		dereference_function_descriptor(kretprobe_trampoline))
+		dereference_function_descriptor(__kretprobe_trampoline))
 		return 1;
 
 	return 0;
diff --git a/arch/mips/kernel/kprobes.c b/arch/mips/kernel/kprobes.c
index b33bd2498651..6c7f3b143fdc 100644
--- a/arch/mips/kernel/kprobes.c
+++ b/arch/mips/kernel/kprobes.c
@@ -460,14 +460,14 @@ static void __used kretprobe_trampoline_holder(void)
 		/* Keep the assembler from reordering and placing JR here. */
 		".set noreorder\n\t"
 		"nop\n\t"
-		".global kretprobe_trampoline\n"
-		"kretprobe_trampoline:\n\t"
+		".global __kretprobe_trampoline\n"
+		"__kretprobe_trampoline:\n\t"
 		"nop\n\t"
 		".set pop"
 		: : : "memory");
 }
 
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				      struct pt_regs *regs)
@@ -476,7 +476,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->regs[31] = (unsigned long)kretprobe_trampoline;
+	regs->regs[31] = (unsigned long)__kretprobe_trampoline;
 }
 
 /*
@@ -496,14 +496,14 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *)kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *)__kretprobe_trampoline)
 		return 1;
 
 	return 0;
 }
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *)kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *)__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
index 4a35ac6e2ca2..e2bdb5a5f93e 100644
--- a/arch/parisc/kernel/kprobes.c
+++ b/arch/parisc/kernel/kprobes.c
@@ -175,7 +175,7 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
 	return 1;
 }
 
-void kretprobe_trampoline(void)
+void __kretprobe_trampoline(void)
 {
 	asm volatile("nop");
 	asm volatile("nop");
@@ -217,6 +217,6 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 int __init arch_init_kprobes(void)
 {
 	trampoline_p.addr = (kprobe_opcode_t *)
-		dereference_function_descriptor(kretprobe_trampoline);
+		dereference_function_descriptor(__kretprobe_trampoline);
 	return register_kprobe(&trampoline_p);
 }
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index 4fc0e15e23a5..bab364152b29 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -51,7 +51,7 @@ extern kprobe_opcode_t optprobe_template_end[];
 #define flush_insn_slot(p)	do { } while (0)
 #define kretprobe_blacklist_size 0
 
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 extern void arch_remove_kprobe(struct kprobe *p);
 
 /* Architecture specific copy of original instruction */
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 43c77142a262..86d77ff056a6 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -237,7 +237,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->link = (unsigned long)kretprobe_trampoline;
+	regs->link = (unsigned long)__kretprobe_trampoline;
 }
 NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
@@ -403,12 +403,12 @@ NOKPROBE_SYMBOL(kprobe_handler);
  * 	- When the probed function returns, this probe
  * 		causes the handlers to fire
  */
-asm(".global kretprobe_trampoline\n"
-	".type kretprobe_trampoline, @function\n"
-	"kretprobe_trampoline:\n"
+asm(".global __kretprobe_trampoline\n"
+	".type __kretprobe_trampoline, @function\n"
+	"__kretprobe_trampoline:\n"
 	"nop\n"
 	"blr\n"
-	".size kretprobe_trampoline, .-kretprobe_trampoline\n");
+	".size __kretprobe_trampoline, .-__kretprobe_trampoline\n");
 
 /*
  * Called when the probe at kretprobe trampoline is hit
@@ -427,7 +427,7 @@ static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 	 * as it is used to determine the return address from the trap.
 	 * For (2), since nip is not honoured with optprobes, we instead setup
 	 * the link register properly so that the subsequent 'blr' in
-	 * kretprobe_trampoline jumps back to the right instruction.
+	 * __kretprobe_trampoline jumps back to the right instruction.
 	 *
 	 * For nip, we should set the address to the previous instruction since
 	 * we end up emulating it in kprobe_handler(), which increments the nip
@@ -543,7 +543,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 NOKPROBE_SYMBOL(kprobe_fault_handler);
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
@@ -554,7 +554,7 @@ int __init arch_init_kprobes(void)
 
 int arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
 		return 1;
 
 	return 0;
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 325ba544883c..ce1903064031 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p)
 	 * has a 'nop' instruction, which can be emulated.
 	 * So further checks can be skipped.
 	 */
-	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
 		return addr + sizeof(kprobe_opcode_t);
 
 	/*
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 9e4a4a7af380..a2443d61728e 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -155,7 +155,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
 		 * Mark stacktraces with kretprobed functions on them
 		 * as unreliable.
 		 */
-		if (ip == (unsigned long)kretprobe_trampoline)
+		if (ip == (unsigned long)__kretprobe_trampoline)
 			return -EINVAL;
 #endif
 
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 9ea9b5ec3113..217ef89f22b9 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -40,7 +40,7 @@ void arch_remove_kprobe(struct kprobe *p);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
 bool kprobe_breakpoint_handler(struct pt_regs *regs);
 bool kprobe_single_step_handler(struct pt_regs *regs);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
 #endif /* CONFIG_KPROBES */
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index 62d477cf11da..e6e950b7cf32 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -355,7 +355,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 {
 	ri->ret_addr = (kprobe_opcode_t *)regs->ra;
 	ri->fp = NULL;
-	regs->ra = (unsigned long) &kretprobe_trampoline;
+	regs->ra = (unsigned long) &__kretprobe_trampoline;
 }
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S
index 6e85d021e2a2..7bdb09ded39b 100644
--- a/arch/riscv/kernel/probes/kprobes_trampoline.S
+++ b/arch/riscv/kernel/probes/kprobes_trampoline.S
@@ -75,7 +75,7 @@
 	REG_L x31, PT_T6(sp)
 	.endm
 
-ENTRY(kretprobe_trampoline)
+ENTRY(__kretprobe_trampoline)
 	addi sp, sp, -(PT_SIZE_ON_STACK)
 	save_all_base_regs
 
@@ -90,4 +90,4 @@ ENTRY(kretprobe_trampoline)
 	addi sp, sp, PT_SIZE_ON_STACK
 
 	ret
-ENDPROC(kretprobe_trampoline)
+ENDPROC(__kretprobe_trampoline)
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 09cdb632a490..5eb722c984e4 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -70,7 +70,7 @@ struct kprobe_ctlblk {
 };
 
 void arch_remove_kprobe(struct kprobe *p);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 int kprobe_exceptions_notify(struct notifier_block *self,
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 5fa86e54f129..c505c0ee5f47 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -242,7 +242,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
+	regs->gprs[14] = (unsigned long) &__kretprobe_trampoline;
 }
 NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
@@ -334,8 +334,8 @@ NOKPROBE_SYMBOL(kprobe_handler);
  */
 static void __used kretprobe_trampoline_holder(void)
 {
-	asm volatile(".global kretprobe_trampoline\n"
-		     "kretprobe_trampoline: bcr 0,0\n");
+	asm volatile(".global __kretprobe_trampoline\n"
+		     "__kretprobe_trampoline: bcr 0,0\n");
 }
 
 /*
@@ -509,7 +509,7 @@ int kprobe_exceptions_notify(struct notifier_block *self,
 NOKPROBE_SYMBOL(kprobe_exceptions_notify);
 
 static struct kprobe trampoline = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
@@ -520,6 +520,6 @@ int __init arch_init_kprobes(void)
 
 int arch_trampoline_kprobe(struct kprobe *p)
 {
-	return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;
+	return p->addr == (kprobe_opcode_t *) &__kretprobe_trampoline;
 }
 NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 101477b3e263..b7bb1981e9ee 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -46,7 +46,7 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
 		 * Mark stacktraces with kretprobed functions on them
 		 * as unreliable.
 		 */
-		if (state.ip == (unsigned long)kretprobe_trampoline)
+		if (state.ip == (unsigned long)__kretprobe_trampoline)
 			return -EINVAL;
 #endif
 
diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h
index 6171682f7798..eeba83e0a7d2 100644
--- a/arch/sh/include/asm/kprobes.h
+++ b/arch/sh/include/asm/kprobes.h
@@ -26,7 +26,7 @@ typedef insn_size_t kprobe_opcode_t;
 struct kprobe;
 
 void arch_remove_kprobe(struct kprobe *);
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
diff --git a/arch/sh/kernel/kprobes.c b/arch/sh/kernel/kprobes.c
index 8e76a35e6e33..aed1ea8e2c2f 100644
--- a/arch/sh/kernel/kprobes.c
+++ b/arch/sh/kernel/kprobes.c
@@ -207,7 +207,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	ri->fp = NULL;
 
 	/* Replace the return addr with trampoline addr */
-	regs->pr = (unsigned long)kretprobe_trampoline;
+	regs->pr = (unsigned long)__kretprobe_trampoline;
 }
 
 static int __kprobes kprobe_handler(struct pt_regs *regs)
@@ -293,13 +293,13 @@ no_kprobe:
  */
 static void __used kretprobe_trampoline_holder(void)
 {
-	asm volatile (".globl kretprobe_trampoline\n"
-		      "kretprobe_trampoline:\n\t"
+	asm volatile (".globl __kretprobe_trampoline\n"
+		      "__kretprobe_trampoline:\n\t"
 		      "nop\n");
 }
 
 /*
- * Called when we hit the probe point at kretprobe_trampoline
+ * Called when we hit the probe point at __kretprobe_trampoline
  */
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
@@ -442,7 +442,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
 }
 
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *)&kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *)&__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h
index bfcaa6326c20..06c2bc767ef7 100644
--- a/arch/sparc/include/asm/kprobes.h
+++ b/arch/sparc/include/asm/kprobes.h
@@ -24,7 +24,7 @@ do { 	flushi(&(p)->ainsn.insn[0]);	\
 	flushi(&(p)->ainsn.insn[1]);	\
 } while (0)
 
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
diff --git a/arch/sparc/kernel/kprobes.c b/arch/sparc/kernel/kprobes.c
index 401534236c2e..535c7b35cb59 100644
--- a/arch/sparc/kernel/kprobes.c
+++ b/arch/sparc/kernel/kprobes.c
@@ -440,7 +440,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 
 	/* Replace the return addr with trampoline addr */
 	regs->u_regs[UREG_RETPC] =
-		((unsigned long)kretprobe_trampoline) - 8;
+		((unsigned long)__kretprobe_trampoline) - 8;
 }
 
 /*
@@ -465,13 +465,13 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
 
 static void __used kretprobe_trampoline_holder(void)
 {
-	asm volatile(".global kretprobe_trampoline\n"
-		     "kretprobe_trampoline:\n"
+	asm volatile(".global __kretprobe_trampoline\n"
+		     "__kretprobe_trampoline:\n"
 		     "\tnop\n"
 		     "\tnop\n");
 }
 static struct kprobe trampoline_p = {
-	.addr = (kprobe_opcode_t *) &kretprobe_trampoline,
+	.addr = (kprobe_opcode_t *) &__kretprobe_trampoline,
 	.pre_handler = trampoline_probe_handler
 };
 
@@ -482,7 +482,7 @@ int __init arch_init_kprobes(void)
 
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
-	if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+	if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline)
 		return 1;
 
 	return 0;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 0c59ef5971de..79cd23dba5b5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -809,7 +809,7 @@ void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
 	ri->fp = sara;
 
 	/* Replace the return addr with trampoline addr */
-	*sara = (unsigned long) &kretprobe_trampoline;
+	*sara = (unsigned long) &__kretprobe_trampoline;
 }
 NOKPROBE_SYMBOL(arch_prepare_kretprobe);
 
@@ -1019,9 +1019,9 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
  */
 asm(
 	".text\n"
-	".global kretprobe_trampoline\n"
-	".type kretprobe_trampoline, @function\n"
-	"kretprobe_trampoline:\n"
+	".global __kretprobe_trampoline\n"
+	".type __kretprobe_trampoline, @function\n"
+	"__kretprobe_trampoline:\n"
 	/* We don't bother saving the ss register */
 #ifdef CONFIG_X86_64
 	"	pushq %rsp\n"
@@ -1045,14 +1045,14 @@ asm(
 	"	popfl\n"
 #endif
 	"	ret\n"
-	".size kretprobe_trampoline, .-kretprobe_trampoline\n"
+	".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"
 );
-NOKPROBE_SYMBOL(kretprobe_trampoline);
-STACK_FRAME_NON_STANDARD(kretprobe_trampoline);
+NOKPROBE_SYMBOL(__kretprobe_trampoline);
+STACK_FRAME_NON_STANDARD(__kretprobe_trampoline);
 
 
 /*
- * Called from kretprobe_trampoline
+ * Called from __kretprobe_trampoline
  */
 __used __visible void *trampoline_handler(struct pt_regs *regs)
 {
@@ -1061,7 +1061,7 @@ __used __visible void *trampoline_handler(struct pt_regs *regs)
 #ifdef CONFIG_X86_32
 	regs->gs = 0;
 #endif
-	regs->ip = (unsigned long)&kretprobe_trampoline;
+	regs->ip = (unsigned long)&__kretprobe_trampoline;
 	regs->orig_ax = ~0UL;
 
 	return (void *)kretprobe_trampoline_handler(regs, &regs->sp);
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 96f5df93e36e..b6b2370f4a4c 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -188,14 +188,14 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
 /*
  * Since some architecture uses structured function pointer,
  * use dereference_function_descriptor() to get real function address.
  */
 static nokprobe_inline void *kretprobe_trampoline_addr(void)
 {
-	return dereference_kernel_function_descriptor(kretprobe_trampoline);
+	return dereference_kernel_function_descriptor(__kretprobe_trampoline);
 }
 
 /* If the trampoline handler called from a kprobe, use this version */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index c2ca40e8595b..5a5949c659d0 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -349,7 +349,7 @@ EXPORT_SYMBOL_GPL(trace_output_call);
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-	static const char tramp_name[] = "kretprobe_trampoline";
+	static const char tramp_name[] = "__kretprobe_trampoline";
 	int size = sizeof(tramp_name);
 
 	if (strncmp(tramp_name, name, size) == 0)
-- 
cgit v1.2.3


From 03bac0df2886882c43e6d0bfff9dee84a184fc7e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:41:04 +0900
Subject: kprobes: Add kretprobe_find_ret_addr() for searching return address

Introduce kretprobe_find_ret_addr() and is_kretprobe_trampoline().
These APIs will be used by the ORC stack unwinder and ftrace, so that
they can check whether the given address points kretprobe trampoline
code and query the correct return address in that case.

Link: https://lkml.kernel.org/r/163163046461.489837.1044778356430293962.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h |  22 ++++++++++
 kernel/kprobes.c        | 109 ++++++++++++++++++++++++++++++++++++------------
 2 files changed, 105 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b6b2370f4a4c..6d47a9da1e0a 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -505,6 +505,28 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 }
 #endif /* !CONFIG_OPTPROBES */
 
+#ifdef CONFIG_KRETPROBES
+static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
+{
+	return (void *)addr == kretprobe_trampoline_addr();
+}
+
+unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
+				      struct llist_node **cur);
+#else
+static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
+{
+	return false;
+}
+
+static nokprobe_inline
+unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
+				      struct llist_node **cur)
+{
+	return 0;
+}
+#endif
+
 /* Returns true if kprobes handled the fault */
 static nokprobe_inline bool kprobe_page_fault(struct pt_regs *regs,
 					      unsigned int trap)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 6ed755111eea..833f07f33115 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1863,45 +1863,87 @@ static struct notifier_block kprobe_exceptions_nb = {
 
 #ifdef CONFIG_KRETPROBES
 
-unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
-					     void *frame_pointer)
+/* This assumes the 'tsk' is the current task or the is not running. */
+static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
+						  struct llist_node **cur)
 {
-	kprobe_opcode_t *correct_ret_addr = NULL;
 	struct kretprobe_instance *ri = NULL;
-	struct llist_node *first, *node;
-	struct kretprobe *rp;
+	struct llist_node *node = *cur;
+
+	if (!node)
+		node = tsk->kretprobe_instances.first;
+	else
+		node = node->next;
 
-	/* Find all nodes for this frame. */
-	first = node = current->kretprobe_instances.first;
 	while (node) {
 		ri = container_of(node, struct kretprobe_instance, llist);
-
-		BUG_ON(ri->fp != frame_pointer);
-
 		if (ri->ret_addr != kretprobe_trampoline_addr()) {
-			correct_ret_addr = ri->ret_addr;
-			/*
-			 * This is the real return address. Any other
-			 * instances associated with this task are for
-			 * other calls deeper on the call stack
-			 */
-			goto found;
+			*cur = node;
+			return ri->ret_addr;
 		}
-
 		node = node->next;
 	}
-	pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
-	BUG_ON(1);
+	return NULL;
+}
+NOKPROBE_SYMBOL(__kretprobe_find_ret_addr);
 
-found:
-	/* Unlink all nodes for this frame. */
-	current->kretprobe_instances.first = node->next;
-	node->next = NULL;
+/**
+ * kretprobe_find_ret_addr -- Find correct return address modified by kretprobe
+ * @tsk: Target task
+ * @fp: A frame pointer
+ * @cur: a storage of the loop cursor llist_node pointer for next call
+ *
+ * Find the correct return address modified by a kretprobe on @tsk in unsigned
+ * long type. If it finds the return address, this returns that address value,
+ * or this returns 0.
+ * The @tsk must be 'current' or a task which is not running. @fp is a hint
+ * to get the currect return address - which is compared with the
+ * kretprobe_instance::fp field. The @cur is a loop cursor for searching the
+ * kretprobe return addresses on the @tsk. The '*@cur' should be NULL at the
+ * first call, but '@cur' itself must NOT NULL.
+ */
+unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
+				      struct llist_node **cur)
+{
+	struct kretprobe_instance *ri = NULL;
+	kprobe_opcode_t *ret;
+
+	if (WARN_ON_ONCE(!cur))
+		return 0;
+
+	do {
+		ret = __kretprobe_find_ret_addr(tsk, cur);
+		if (!ret)
+			break;
+		ri = container_of(*cur, struct kretprobe_instance, llist);
+	} while (ri->fp != fp);
 
-	/* Run them..  */
+	return (unsigned long)ret;
+}
+NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
+
+unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
+					     void *frame_pointer)
+{
+	kprobe_opcode_t *correct_ret_addr = NULL;
+	struct kretprobe_instance *ri = NULL;
+	struct llist_node *first, *node = NULL;
+	struct kretprobe *rp;
+
+	/* Find correct address and all nodes for this frame. */
+	correct_ret_addr = __kretprobe_find_ret_addr(current, &node);
+	if (!correct_ret_addr) {
+		pr_err("kretprobe: Return address not found, not execute handler. Maybe there is a bug in the kernel.\n");
+		BUG_ON(1);
+	}
+
+	/* Run the user handler of the nodes. */
+	first = current->kretprobe_instances.first;
 	while (first) {
 		ri = container_of(first, struct kretprobe_instance, llist);
-		first = first->next;
+
+		if (WARN_ON_ONCE(ri->fp != frame_pointer))
+			break;
 
 		rp = get_kretprobe(ri);
 		if (rp && rp->handler) {
@@ -1912,6 +1954,21 @@ found:
 			rp->handler(ri, regs);
 			__this_cpu_write(current_kprobe, prev);
 		}
+		if (first == node)
+			break;
+
+		first = first->next;
+	}
+
+	/* Unlink all nodes for this frame. */
+	first = current->kretprobe_instances.first;
+	current->kretprobe_instances.first = node->next;
+	node->next = NULL;
+
+	/* Recycle free instances. */
+	while (first) {
+		ri = container_of(first, struct kretprobe_instance, llist);
+		first = first->next;
 
 		recycle_rp_inst(ri);
 	}
-- 
cgit v1.2.3


From e028c4f7ac7ca8c96126fe46c54ab3d56ffe6a66 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Tue, 14 Sep 2021 23:41:13 +0900
Subject: objtool: Add frame-pointer-specific function ignore

Add a CONFIG_FRAME_POINTER-specific version of
STACK_FRAME_NON_STANDARD() for the case where a function is
intentionally missing frame pointer setup, but otherwise needs
objtool/ORC coverage when frame pointers are disabled.

Link: https://lkml.kernel.org/r/163163047364.489837.17377799909553689661.stgit@devnote2

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/objtool.h       | 12 ++++++++++++
 tools/include/linux/objtool.h | 12 ++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/objtool.h b/include/linux/objtool.h
index 7e72d975cb76..aca52db2f3f3 100644
--- a/include/linux/objtool.h
+++ b/include/linux/objtool.h
@@ -66,6 +66,17 @@ struct unwind_hint {
 	static void __used __section(".discard.func_stack_frame_non_standard") \
 		*__func_stack_frame_non_standard_##func = func
 
+/*
+ * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore
+ * for the case where a function is intentionally missing frame pointer setup,
+ * but otherwise needs objtool/ORC coverage when frame pointers are disabled.
+ */
+#ifdef CONFIG_FRAME_POINTER
+#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func)
+#else
+#define STACK_FRAME_NON_STANDARD_FP(func)
+#endif
+
 #else /* __ASSEMBLY__ */
 
 /*
@@ -127,6 +138,7 @@ struct unwind_hint {
 #define UNWIND_HINT(sp_reg, sp_offset, type, end)	\
 	"\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
+#define STACK_FRAME_NON_STANDARD_FP(func)
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
 .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index 7e72d975cb76..aca52db2f3f3 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -66,6 +66,17 @@ struct unwind_hint {
 	static void __used __section(".discard.func_stack_frame_non_standard") \
 		*__func_stack_frame_non_standard_##func = func
 
+/*
+ * STACK_FRAME_NON_STANDARD_FP() is a frame-pointer-specific function ignore
+ * for the case where a function is intentionally missing frame pointer setup,
+ * but otherwise needs objtool/ORC coverage when frame pointers are disabled.
+ */
+#ifdef CONFIG_FRAME_POINTER
+#define STACK_FRAME_NON_STANDARD_FP(func) STACK_FRAME_NON_STANDARD(func)
+#else
+#define STACK_FRAME_NON_STANDARD_FP(func)
+#endif
+
 #else /* __ASSEMBLY__ */
 
 /*
@@ -127,6 +138,7 @@ struct unwind_hint {
 #define UNWIND_HINT(sp_reg, sp_offset, type, end)	\
 	"\n\t"
 #define STACK_FRAME_NON_STANDARD(func)
+#define STACK_FRAME_NON_STANDARD_FP(func)
 #else
 #define ANNOTATE_INTRA_FUNCTION_CALL
 .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
-- 
cgit v1.2.3


From bf094cffea2a6503ce84062f9f0243bef77c58f9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Tue, 14 Sep 2021 23:42:51 +0900
Subject: x86/kprobes: Fixup return address in generic trampoline handler

In x86, the fake return address on the stack saved by
__kretprobe_trampoline() will be replaced with the real return
address after returning from trampoline_handler(). Before fixing
the return address, the real return address can be found in the
'current->kretprobe_instances'.

However, since there is a window between updating the
'current->kretprobe_instances' and fixing the address on the stack,
if an interrupt happens at that timing and the interrupt handler
does stacktrace, it may fail to unwind because it can not get
the correct return address from 'current->kretprobe_instances'.

This will eliminate that window by fixing the return address
right before updating 'current->kretprobe_instances'.

Link: https://lkml.kernel.org/r/163163057094.489837.9044470370440745866.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Tested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes/core.c | 18 ++++++++++++++++--
 include/linux/kprobes.h        |  3 +++
 kernel/kprobes.c               | 11 +++++++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 7e1111c19605..fce99e249d61 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -1065,6 +1065,16 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline);
  */
 STACK_FRAME_NON_STANDARD_FP(__kretprobe_trampoline);
 
+/* This is called from kretprobe_trampoline_handler(). */
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+				 kprobe_opcode_t *correct_ret_addr)
+{
+	unsigned long *frame_pointer = &regs->sp + 1;
+
+	/* Replace fake return address with real one. */
+	*frame_pointer = (unsigned long)correct_ret_addr;
+}
+
 /*
  * Called from __kretprobe_trampoline
  */
@@ -1082,8 +1092,12 @@ __used __visible void trampoline_handler(struct pt_regs *regs)
 	regs->sp += sizeof(long);
 	frame_pointer = &regs->sp + 1;
 
-	/* Replace fake return address with real one. */
-	*frame_pointer = kretprobe_trampoline_handler(regs, frame_pointer);
+	/*
+	 * The return address at 'frame_pointer' is recovered by the
+	 * arch_kretprobe_fixup_return() which called from the
+	 * kretprobe_trampoline_handler().
+	 */
+	kretprobe_trampoline_handler(regs, frame_pointer);
 
 	/*
 	 * Copy FLAGS to 'pt_regs::sp' so that __kretprobe_trapmoline()
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 6d47a9da1e0a..e974caf39d3e 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -188,6 +188,9 @@ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
+void arch_kretprobe_fixup_return(struct pt_regs *regs,
+				 kprobe_opcode_t *correct_ret_addr);
+
 void __kretprobe_trampoline(void);
 /*
  * Since some architecture uses structured function pointer,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ebc587b9a346..b62af9fc3607 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1922,6 +1922,15 @@ unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
 }
 NOKPROBE_SYMBOL(kretprobe_find_ret_addr);
 
+void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
+					kprobe_opcode_t *correct_ret_addr)
+{
+	/*
+	 * Do nothing by default. Please fill this to update the fake return
+	 * address on the stack with the correct one on each arch if possible.
+	 */
+}
+
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
 					     void *frame_pointer)
 {
@@ -1967,6 +1976,8 @@ unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
 		first = first->next;
 	}
 
+	arch_kretprobe_fixup_return(regs, correct_ret_addr);
+
 	/* Unlink all nodes for this frame. */
 	first = current->kretprobe_instances.first;
 	current->kretprobe_instances.first = node->next;
-- 
cgit v1.2.3


From 78b497f2e62d8c7514de5f83c80837bbb120e93e Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 3 Sep 2021 15:08:05 +0200
Subject: kvm: use kvfree() in kvm_arch_free_vm()

By switching from kfree() to kvfree() in kvm_arch_free_vm() Arm64 can
use the common variant. This can be accomplished by adding another
macro __KVM_HAVE_ARCH_VM_FREE, which will be used only by x86 for now.

Further simplification can be achieved by adding __kvm_arch_free_vm()
doing the common part.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Message-Id: <20210903130808.30142-5-jgross@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/include/asm/kvm_host.h | 1 -
 arch/arm64/kvm/arm.c              | 8 --------
 arch/x86/include/asm/kvm_host.h   | 2 ++
 arch/x86/kvm/x86.c                | 2 +-
 include/linux/kvm_host.h          | 9 ++++++++-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f8be56d5342b..369c30e28301 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -771,7 +771,6 @@ int kvm_set_ipa_limit(void);
 
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
 
 int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index fe102cd2e518..7838e9fb693e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -297,14 +297,6 @@ struct kvm *kvm_arch_alloc_vm(void)
 	return vzalloc(sizeof(struct kvm));
 }
 
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-	if (!has_vhe())
-		kfree(kvm);
-	else
-		vfree(kvm);
-}
-
 int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
 {
 	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8470d4673068..1b280292bdff 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1540,6 +1540,8 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 {
 	return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
 }
+
+#define __KVM_HAVE_ARCH_VM_FREE
 void kvm_arch_free_vm(struct kvm *kvm);
 
 #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 79535fe83a04..03091a2e0822 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11130,7 +11130,7 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 void kvm_arch_free_vm(struct kvm *kvm)
 {
 	kfree(to_kvm_hv(kvm)->hv_pa_pg);
-	vfree(kvm);
+	__kvm_arch_free_vm(kvm);
 }
 
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3f87d6ad20bf..60a35d9fe259 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1081,10 +1081,17 @@ static inline struct kvm *kvm_arch_alloc_vm(void)
 {
 	return kzalloc(sizeof(struct kvm), GFP_KERNEL);
 }
+#endif
+
+static inline void __kvm_arch_free_vm(struct kvm *kvm)
+{
+	kvfree(kvm);
+}
 
+#ifndef __KVM_HAVE_ARCH_VM_FREE
 static inline void kvm_arch_free_vm(struct kvm *kvm)
 {
-	kfree(kvm);
+	__kvm_arch_free_vm(kvm);
 }
 #endif
 
-- 
cgit v1.2.3


From 874f670e6088d3bff3972ecd44c1cb00610f9183 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:35 +0200
Subject: sched: Clean up the might_sleep() underscore zoo

__might_sleep() vs. ___might_sleep() is hard to distinguish. Aside of that
the three underscore variant is exposed to provide a checkpoint for
rescheduling points which are distinct from blocking points.

They are semantically a preemption point which means that scheduling is
state preserving. A real blocking operation, e.g. mutex_lock(), wait*(),
which cannot preserve a task state which is not equal to RUNNING.

While technically blocking on a "sleeping" spinlock in RT enabled kernels
falls into the voluntary scheduling category because it has to wait until
the contended spin/rw lock becomes available, the RT lock substitution code
can semantically be mapped to a voluntary preemption because the RT lock
substitution code and the scheduler are providing mechanisms to preserve
the task state and to take regular non-lock related wakeups into account.

Rename ___might_sleep() to __might_resched() to make the distinction of
these functions clear.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165357.928693482@linutronix.de
---
 include/linux/kernel.h       | 6 +++---
 include/linux/sched.h        | 8 ++++----
 kernel/locking/spinlock_rt.c | 6 +++---
 kernel/sched/core.c          | 6 +++---
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2776423a587e..5e4ae54da73e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -111,7 +111,7 @@ static __always_inline void might_resched(void)
 #endif /* CONFIG_PREEMPT_* */
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void ___might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, int preempt_offset);
 extern void __might_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_migrate(const char *file, int line);
@@ -168,8 +168,8 @@ extern void __cant_migrate(const char *file, int line);
  */
 # define non_block_end() WARN_ON(current->non_block_count-- == 0)
 #else
-  static inline void ___might_sleep(const char *file, int line,
-				   int preempt_offset) { }
+  static inline void __might_resched(const char *file, int line,
+				     int preempt_offset) { }
   static inline void __might_sleep(const char *file, int line,
 				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e12b524426b0..b38f002334d5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2038,7 +2038,7 @@ static inline int _cond_resched(void) { return 0; }
 #endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
 
 #define cond_resched() ({			\
-	___might_sleep(__FILE__, __LINE__, 0);	\
+	__might_resched(__FILE__, __LINE__, 0);	\
 	_cond_resched();			\
 })
 
@@ -2046,9 +2046,9 @@ extern int __cond_resched_lock(spinlock_t *lock);
 extern int __cond_resched_rwlock_read(rwlock_t *lock);
 extern int __cond_resched_rwlock_write(rwlock_t *lock);
 
-#define cond_resched_lock(lock) ({				\
-	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
-	__cond_resched_lock(lock);				\
+#define cond_resched_lock(lock) ({					\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+	__cond_resched_lock(lock);					\
 })
 
 #define cond_resched_rwlock_read(lock) ({			\
diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c
index d2912e44d61f..c5289240cfb4 100644
--- a/kernel/locking/spinlock_rt.c
+++ b/kernel/locking/spinlock_rt.c
@@ -32,7 +32,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
 
 static __always_inline void __rt_spin_lock(spinlock_t *lock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	__might_resched(__FILE__, __LINE__, 0);
 	rtlock_lock(&lock->lock);
 	rcu_read_lock();
 	migrate_disable();
@@ -210,7 +210,7 @@ EXPORT_SYMBOL(rt_write_trylock);
 
 void __sched rt_read_lock(rwlock_t *rwlock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	__might_resched(__FILE__, __LINE__, 0);
 	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
 	rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 	rcu_read_lock();
@@ -220,7 +220,7 @@ EXPORT_SYMBOL(rt_read_lock);
 
 void __sched rt_write_lock(rwlock_t *rwlock)
 {
-	___might_sleep(__FILE__, __LINE__, 0);
+	__might_resched(__FILE__, __LINE__, 0);
 	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
 	rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT);
 	rcu_read_lock();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1bba4128a3e6..c3943aa2f60c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9489,11 +9489,11 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 			(void *)current->task_state_change,
 			(void *)current->task_state_change);
 
-	___might_sleep(file, line, preempt_offset);
+	__might_resched(file, line, preempt_offset);
 }
 EXPORT_SYMBOL(__might_sleep);
 
-void ___might_sleep(const char *file, int line, int preempt_offset)
+void __might_resched(const char *file, int line, int preempt_offset)
 {
 	/* Ratelimiting timestamp: */
 	static unsigned long prev_jiffy;
@@ -9538,7 +9538,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
-EXPORT_SYMBOL(___might_sleep);
+EXPORT_SYMBOL(__might_resched);
 
 void __cant_sleep(const char *file, int line, int preempt_offset)
 {
-- 
cgit v1.2.3


From 7b5ff4bb9adc53cfbf7ac9ba7820ccf0cd7c070a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:37 +0200
Subject: sched: Make cond_resched_*lock() variants consistent vs.
 might_sleep()

Commit 3427445afd26 ("sched: Exclude cond_resched() from nested sleep
test") removed the task state check of __might_sleep() for
cond_resched_lock() because cond_resched_lock() is not a voluntary
scheduling point which blocks. It's a preemption point which requires the
lock holder to release the spin lock.

The same rationale applies to cond_resched_rwlock_read/write(), but those
were not touched.

Make it consistent and use the non-state checking __might_resched() there
as well.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165357.991262778@linutronix.de
---
 include/linux/sched.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b38f002334d5..7a989f2487f8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2051,14 +2051,14 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
 	__cond_resched_lock(lock);					\
 })
 
-#define cond_resched_rwlock_read(lock) ({			\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_read(lock);			\
+#define cond_resched_rwlock_read(lock) ({				\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+	__cond_resched_rwlock_read(lock);				\
 })
 
-#define cond_resched_rwlock_write(lock) ({			\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_write(lock);			\
+#define cond_resched_rwlock_write(lock) ({				\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+	__cond_resched_rwlock_write(lock);				\
 })
 
 static inline void cond_resched_rcu(void)
-- 
cgit v1.2.3


From 42a387566c567603bafa1ec0c5b71c35cba83e86 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:38 +0200
Subject: sched: Remove preempt_offset argument from __might_sleep()

All callers hand in 0 and never will hand in anything else.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165358.054321586@linutronix.de
---
 include/linux/kernel.h | 7 +++----
 kernel/sched/core.c    | 4 ++--
 mm/memory.c            | 2 +-
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5e4ae54da73e..f95ee786e4ef 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -112,7 +112,7 @@ static __always_inline void might_resched(void)
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
 extern void __might_resched(const char *file, int line, int preempt_offset);
-extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_sleep(const char *file, int line);
 extern void __cant_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_migrate(const char *file, int line);
 
@@ -129,7 +129,7 @@ extern void __cant_migrate(const char *file, int line);
  * supposed to.
  */
 # define might_sleep() \
-	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 /**
  * cant_sleep - annotation for functions that cannot sleep
  *
@@ -170,8 +170,7 @@ extern void __cant_migrate(const char *file, int line);
 #else
   static inline void __might_resched(const char *file, int line,
 				     int preempt_offset) { }
-  static inline void __might_sleep(const char *file, int line,
-				   int preempt_offset) { }
+static inline void __might_sleep(const char *file, int line) { }
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
 # define cant_migrate()		do { } while (0)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c3943aa2f60c..2d790df62ec9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9475,7 +9475,7 @@ static inline int preempt_count_equals(int preempt_offset)
 	return (nested == preempt_offset);
 }
 
-void __might_sleep(const char *file, int line, int preempt_offset)
+void __might_sleep(const char *file, int line)
 {
 	unsigned int state = get_current_state();
 	/*
@@ -9489,7 +9489,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 			(void *)current->task_state_change,
 			(void *)current->task_state_change);
 
-	__might_resched(file, line, preempt_offset);
+	__might_resched(file, line, 0);
 }
 EXPORT_SYMBOL(__might_sleep);
 
diff --git a/mm/memory.c b/mm/memory.c
index 25fc46e87214..1cd1792c00f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5255,7 +5255,7 @@ void __might_fault(const char *file, int line)
 		return;
 	if (pagefault_disabled())
 		return;
-	__might_sleep(file, line, 0);
+	__might_sleep(file, line);
 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP)
 	if (current->mm)
 		might_lock_read(&current->mm->mmap_lock);
-- 
cgit v1.2.3


From 50e081b96e35e43b65591f40f7376204decd1cb5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:43 +0200
Subject: sched: Make RCU nest depth distinct in __might_resched()

For !RT kernels RCU nest depth in __might_resched() is always expected to
be 0, but on RT kernels it can be non zero while the preempt count is
expected to be always 0.

Instead of playing magic games in interpreting the 'preempt_offset'
argument, rename it to 'offsets' and use the lower 8 bits for the expected
preempt count, allow to hand in the expected RCU nest depth in the upper
bits and adopt the __might_resched() code and related checks and printks.

The affected call sites are updated in subsequent steps.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165358.243232823@linutronix.de
---
 include/linux/kernel.h |  4 ++--
 include/linux/sched.h  |  3 +++
 kernel/sched/core.c    | 28 ++++++++++++++++------------
 3 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index f95ee786e4ef..e8696e4a45aa 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -111,7 +111,7 @@ static __always_inline void might_resched(void)
 #endif /* CONFIG_PREEMPT_* */
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-extern void __might_resched(const char *file, int line, int preempt_offset);
+extern void __might_resched(const char *file, int line, unsigned int offsets);
 extern void __might_sleep(const char *file, int line);
 extern void __cant_sleep(const char *file, int line, int preempt_offset);
 extern void __cant_migrate(const char *file, int line);
@@ -169,7 +169,7 @@ extern void __cant_migrate(const char *file, int line);
 # define non_block_end() WARN_ON(current->non_block_count-- == 0)
 #else
   static inline void __might_resched(const char *file, int line,
-				     int preempt_offset) { }
+				     unsigned int offsets) { }
 static inline void __might_sleep(const char *file, int line) { }
 # define might_sleep() do { might_resched(); } while (0)
 # define cant_sleep() do { } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7a989f2487f8..b448c7460577 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2046,6 +2046,9 @@ extern int __cond_resched_lock(spinlock_t *lock);
 extern int __cond_resched_rwlock_read(rwlock_t *lock);
 extern int __cond_resched_rwlock_write(rwlock_t *lock);
 
+#define MIGHT_RESCHED_RCU_SHIFT		8
+#define MIGHT_RESCHED_PREEMPT_MASK	((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
+
 #define cond_resched_lock(lock) ({					\
 	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
 	__cond_resched_lock(lock);					\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0a27cb8f72a9..8d3fa0768e5b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -9468,12 +9468,6 @@ void __init sched_init(void)
 }
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-static inline int preempt_count_equals(int preempt_offset)
-{
-	int nested = preempt_count() + rcu_preempt_depth();
-
-	return (nested == preempt_offset);
-}
 
 void __might_sleep(const char *file, int line)
 {
@@ -9505,7 +9499,16 @@ static void print_preempt_disable_ip(int preempt_offset, unsigned long ip)
 	print_ip_sym(KERN_ERR, ip);
 }
 
-void __might_resched(const char *file, int line, int preempt_offset)
+static inline bool resched_offsets_ok(unsigned int offsets)
+{
+	unsigned int nested = preempt_count();
+
+	nested += rcu_preempt_depth() << MIGHT_RESCHED_RCU_SHIFT;
+
+	return nested == offsets;
+}
+
+void __might_resched(const char *file, int line, unsigned int offsets)
 {
 	/* Ratelimiting timestamp: */
 	static unsigned long prev_jiffy;
@@ -9515,7 +9518,7 @@ void __might_resched(const char *file, int line, int preempt_offset)
 	/* WARN_ON_ONCE() by default, no rate limit required: */
 	rcu_sleep_check();
 
-	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
+	if ((resched_offsets_ok(offsets) && !irqs_disabled() &&
 	     !is_idle_task(current) && !current->non_block_count) ||
 	    system_state == SYSTEM_BOOTING || system_state > SYSTEM_RUNNING ||
 	    oops_in_progress)
@@ -9534,11 +9537,11 @@ void __might_resched(const char *file, int line, int preempt_offset)
 	       in_atomic(), irqs_disabled(), current->non_block_count,
 	       current->pid, current->comm);
 	pr_err("preempt_count: %x, expected: %x\n", preempt_count(),
-	       preempt_offset);
+	       offsets & MIGHT_RESCHED_PREEMPT_MASK);
 
 	if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
-		pr_err("RCU nest depth: %d, expected: 0\n",
-		       rcu_preempt_depth());
+		pr_err("RCU nest depth: %d, expected: %u\n",
+		       rcu_preempt_depth(), offsets >> MIGHT_RESCHED_RCU_SHIFT);
 	}
 
 	if (task_stack_end_corrupted(current))
@@ -9548,7 +9551,8 @@ void __might_resched(const char *file, int line, int preempt_offset)
 	if (irqs_disabled())
 		print_irqtrace_events(current);
 
-	print_preempt_disable_ip(preempt_offset, preempt_disable_ip);
+	print_preempt_disable_ip(offsets & MIGHT_RESCHED_PREEMPT_MASK,
+				 preempt_disable_ip);
 
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
-- 
cgit v1.2.3


From 3e9cc688e56cc2abb9b6067f57c8397f6c96d42c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 23 Sep 2021 18:54:44 +0200
Subject: sched: Make cond_resched_lock() variants RT aware

The __might_resched() checks in the cond_resched_lock() variants use
PREEMPT_LOCK_OFFSET for preempt count offset checking which takes the
preemption disable by the spin_lock() which is still held at that point
into account.

On PREEMPT_RT enabled kernels spin/rw_lock held sections stay preemptible
which means PREEMPT_LOCK_OFFSET is 0, but that still triggers the
__might_resched() check because that takes RCU read side nesting into
account.

On RT enabled kernels spin/read/write_lock() issue rcu_read_lock() to
resemble the !RT semantics, which means in cond_resched_lock() the might
resched check will see preempt_count() == 0 and rcu_preempt_depth() == 1.

Introduce PREEMPT_LOCK_SCHED_OFFSET for those might resched checks and map
them depending on CONFIG_PREEMPT_RT.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210923165358.305969211@linutronix.de
---
 include/linux/preempt.h |  5 +++--
 include/linux/sched.h   | 34 +++++++++++++++++++++++++---------
 2 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..031898b38d06 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -122,9 +122,10 @@
  * The preempt_count offset after spin_lock()
  */
 #if !defined(CONFIG_PREEMPT_RT)
-#define PREEMPT_LOCK_OFFSET	PREEMPT_DISABLE_OFFSET
+#define PREEMPT_LOCK_OFFSET		PREEMPT_DISABLE_OFFSET
 #else
-#define PREEMPT_LOCK_OFFSET	0
+/* Locks on RT do not disable preemption */
+#define PREEMPT_LOCK_OFFSET		0
 #endif
 
 /*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b448c7460577..21b7cd00bf1d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2049,19 +2049,35 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock);
 #define MIGHT_RESCHED_RCU_SHIFT		8
 #define MIGHT_RESCHED_PREEMPT_MASK	((1U << MIGHT_RESCHED_RCU_SHIFT) - 1)
 
-#define cond_resched_lock(lock) ({					\
-	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_lock(lock);					\
+#ifndef CONFIG_PREEMPT_RT
+/*
+ * Non RT kernels have an elevated preempt count due to the held lock,
+ * but are not allowed to be inside a RCU read side critical section
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS	PREEMPT_LOCK_OFFSET
+#else
+/*
+ * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in
+ * cond_resched*lock() has to take that into account because it checks for
+ * preempt_count() and rcu_preempt_depth().
+ */
+# define PREEMPT_LOCK_RESCHED_OFFSETS	\
+	(PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT))
+#endif
+
+#define cond_resched_lock(lock) ({						\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_lock(lock);						\
 })
 
-#define cond_resched_rwlock_read(lock) ({				\
-	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_read(lock);				\
+#define cond_resched_rwlock_read(lock) ({					\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_rwlock_read(lock);					\
 })
 
-#define cond_resched_rwlock_write(lock) ({				\
-	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
-	__cond_resched_rwlock_write(lock);				\
+#define cond_resched_rwlock_write(lock) ({					\
+	__might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS);	\
+	__cond_resched_rwlock_write(lock);					\
 })
 
 static inline void cond_resched_rcu(void)
-- 
cgit v1.2.3


From f792565326825ed806626da50c6f9a928f1079c1 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Wed, 29 Sep 2021 12:43:13 -0700
Subject: perf/core: fix userpage->time_enabled of inactive events

Users of rdpmc rely on the mmapped user page to calculate accurate
time_enabled. Currently, userpage->time_enabled is only updated when the
event is added to the pmu. As a result, inactive event (due to counter
multiplexing) does not have accurate userpage->time_enabled. This can
be reproduced with something like:

   /* open 20 task perf_event "cycles", to create multiplexing */

   fd = perf_event_open();  /* open task perf_event "cycles" */
   userpage = mmap(fd);     /* use mmap and rdmpc */

   while (true) {
     time_enabled_mmap = xxx; /* use logic in perf_event_mmap_page */
     time_enabled_read = read(fd).time_enabled;
     if (time_enabled_mmap > time_enabled_read)
         BUG();
   }

Fix this by updating userpage for inactive events in merge_sched_in.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-and-tested-by: Lucian Grijincu <lucian@fb.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210929194313.2398474-1-songliubraving@fb.com
---
 include/linux/perf_event.h |  4 +++-
 kernel/events/core.c       | 34 ++++++++++++++++++++++++++++++----
 2 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe156a8170aa..9b60bb89d86a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -683,7 +683,9 @@ struct perf_event {
 	/*
 	 * timestamp shadows the actual context timing but it can
 	 * be safely used in NMI interrupt context. It reflects the
-	 * context time as it was when the event was last scheduled in.
+	 * context time as it was when the event was last scheduled in,
+	 * or when ctx_sched_in failed to schedule the event because we
+	 * run out of PMC.
 	 *
 	 * ctx_time already accounts for ctx->timestamp. Therefore to
 	 * compute ctx_time for a sample, simply add perf_clock().
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0c000cb01eeb..f23ca260307f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3707,6 +3707,29 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
 	return 0;
 }
 
+static inline bool event_update_userpage(struct perf_event *event)
+{
+	if (likely(!atomic_read(&event->mmap_count)))
+		return false;
+
+	perf_event_update_time(event);
+	perf_set_shadow_time(event, event->ctx);
+	perf_event_update_userpage(event);
+
+	return true;
+}
+
+static inline void group_update_userpage(struct perf_event *group_event)
+{
+	struct perf_event *event;
+
+	if (!event_update_userpage(group_event))
+		return;
+
+	for_each_sibling_event(event, group_event)
+		event_update_userpage(event);
+}
+
 static int merge_sched_in(struct perf_event *event, void *data)
 {
 	struct perf_event_context *ctx = event->ctx;
@@ -3725,14 +3748,15 @@ static int merge_sched_in(struct perf_event *event, void *data)
 	}
 
 	if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		*can_add_hw = 0;
 		if (event->attr.pinned) {
 			perf_cgroup_event_disable(event, ctx);
 			perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
+		} else {
+			ctx->rotate_necessary = 1;
+			perf_mux_hrtimer_restart(cpuctx);
+			group_update_userpage(event);
 		}
-
-		*can_add_hw = 0;
-		ctx->rotate_necessary = 1;
-		perf_mux_hrtimer_restart(cpuctx);
 	}
 
 	return 0;
@@ -6324,6 +6348,8 @@ accounting:
 
 		ring_buffer_attach(event, rb);
 
+		perf_event_update_time(event);
+		perf_set_shadow_time(event, event->ctx);
 		perf_event_init_userpage(event);
 		perf_event_update_userpage(event);
 	} else {
-- 
cgit v1.2.3


From 83d40a61046f73103b4e5d8f1310261487ff63b0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 20 Sep 2021 15:31:11 +0200
Subject: sched: Always inline is_percpu_thread()

  vmlinux.o: warning: objtool: check_preemption_disabled()+0x81: call to is_percpu_thread() leaves .noinstr.text section

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210928084218.063371959@infradead.org
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 39039ce8ac4c..c1a927ddec64 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,7 +1720,7 @@ extern struct pid *cad_pid;
 #define tsk_used_math(p)			((p)->flags & PF_USED_MATH)
 #define used_math()				tsk_used_math(current)
 
-static inline bool is_percpu_thread(void)
+static __always_inline bool is_percpu_thread(void)
 {
 #ifdef CONFIG_SMP
 	return (current->flags & PF_NO_SETAFFINITY) &&
-- 
cgit v1.2.3


From 74e78adc6ccf6c3b53939788cf0c49f54db70731 Mon Sep 17 00:00:00 2001
From: Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri@xilinx.com>
Date: Fri, 24 Sep 2021 15:37:08 +0530
Subject: firmware: xilinx: Add OSPI Mux selection support

Add OSPI Mux selection API support to select the AXI interface to OSPI.

Signed-off-by: Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri@xilinx.com>
Link: https://lore.kernel.org/r/1632478031-12242-2-git-send-email-lakshmi.sai.krishna.potthuri@xilinx.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/firmware/xilinx/zynqmp.c     | 17 +++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 12 ++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index a3cadbaf3cba..1436e03ff4f7 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -647,6 +647,23 @@ int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type)
 }
 EXPORT_SYMBOL_GPL(zynqmp_pm_sd_dll_reset);
 
+/**
+ * zynqmp_pm_ospi_mux_select() - OSPI Mux selection
+ *
+ * @dev_id:	Device Id of the OSPI device.
+ * @select:	OSPI Mux select value.
+ *
+ * This function select the OSPI Mux.
+ *
+ * Return:	Returns status, either success or error+reason
+ */
+int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select)
+{
+	return zynqmp_pm_invoke_fn(PM_IOCTL, dev_id, IOCTL_OSPI_MUX_SELECT,
+				   select, 0, NULL);
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_ospi_mux_select);
+
 /**
  * zynqmp_pm_write_ggs() - PM API for writing global general storage (ggs)
  * @index:	GGS register index
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 56b426fe020c..4c70a6e2141e 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -123,6 +123,7 @@ enum pm_ioctl_id {
 	IOCTL_READ_PGGS = 15,
 	/* Set healthy bit value */
 	IOCTL_SET_BOOT_HEALTH_STATUS = 17,
+	IOCTL_OSPI_MUX_SELECT = 21,
 };
 
 enum pm_query_id {
@@ -351,6 +352,11 @@ enum zynqmp_pm_shutdown_subtype {
 	ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2,
 };
 
+enum ospi_mux_select_type {
+	PM_OSPI_MUX_SEL_DMA = 0,
+	PM_OSPI_MUX_SEL_LINEAR = 1,
+};
+
 /**
  * struct zynqmp_pm_query_data - PM query data
  * @qid:	query ID
@@ -387,6 +393,7 @@ int zynqmp_pm_set_pll_frac_data(u32 clk_id, u32 data);
 int zynqmp_pm_get_pll_frac_data(u32 clk_id, u32 *data);
 int zynqmp_pm_set_sd_tapdelay(u32 node_id, u32 type, u32 value);
 int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type);
+int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select);
 int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
 			   const enum zynqmp_pm_reset_action assert_flag);
 int zynqmp_pm_reset_get_status(const enum zynqmp_pm_reset reset, u32 *status);
@@ -508,6 +515,11 @@ static inline int zynqmp_pm_sd_dll_reset(u32 node_id, u32 type)
 	return -ENODEV;
 }
 
+static inline int zynqmp_pm_ospi_mux_select(u32 dev_id, u32 select)
+{
+	return -ENODEV;
+}
+
 static inline int zynqmp_pm_reset_assert(const enum zynqmp_pm_reset reset,
 					 const enum zynqmp_pm_reset_action assert_flag)
 {
-- 
cgit v1.2.3


From 42f355ef59a2f98fa4affb4265d3ba3e2d86baf1 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 19 May 2021 16:00:20 -0400
Subject: audit: replace magic audit syscall class numbers with macros

Replace audit syscall class magic numbers with macros.

This required putting the macros into new header file
include/linux/audit_arch.h since the syscall macros were
included for both 64 bit and 32 bit in any compat code, causing
redefinition warnings.

Link: https://lore.kernel.org/r/2300b1083a32aade7ae7efb95826e8f3f260b1df.1621363275.git.rgb@redhat.com
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
[PM: renamed header to audit_arch.h after consulting with Richard]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 MAINTAINERS                        |  1 +
 arch/alpha/kernel/audit.c          |  8 ++++----
 arch/ia64/kernel/audit.c           |  8 ++++----
 arch/parisc/kernel/audit.c         |  8 ++++----
 arch/parisc/kernel/compat_audit.c  |  9 +++++----
 arch/powerpc/kernel/audit.c        | 10 +++++-----
 arch/powerpc/kernel/compat_audit.c | 11 ++++++-----
 arch/s390/kernel/audit.c           | 10 +++++-----
 arch/s390/kernel/compat_audit.c    | 11 ++++++-----
 arch/sparc/kernel/audit.c          | 10 +++++-----
 arch/sparc/kernel/compat_audit.c   | 11 ++++++-----
 arch/x86/ia32/audit.c              | 11 ++++++-----
 arch/x86/kernel/audit_64.c         |  8 ++++----
 include/linux/audit.h              |  1 +
 include/linux/audit_arch.h         | 23 +++++++++++++++++++++++
 kernel/auditsc.c                   | 12 ++++++------
 lib/audit.c                        | 10 +++++-----
 lib/compat_audit.c                 | 11 ++++++-----
 18 files changed, 102 insertions(+), 71 deletions(-)
 create mode 100644 include/linux/audit_arch.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index eeb4c70b3d5b..94c88bc0502f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3113,6 +3113,7 @@ W:	https://github.com/linux-audit
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pcmoore/audit.git
 F:	include/asm-generic/audit_*.h
 F:	include/linux/audit.h
+F:	include/linux/audit_arch.h
 F:	include/uapi/linux/audit.h
 F:	kernel/audit*
 F:	lib/*audit.c
diff --git a/arch/alpha/kernel/audit.c b/arch/alpha/kernel/audit.c
index 96a9d18ff4c4..81cbd804e375 100644
--- a/arch/alpha/kernel/audit.c
+++ b/arch/alpha/kernel/audit.c
@@ -37,13 +37,13 @@ int audit_classify_syscall(int abi, unsigned syscall)
 {
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
index 5192ca899fe6..dba6a74c9ab3 100644
--- a/arch/ia64/kernel/audit.c
+++ b/arch/ia64/kernel/audit.c
@@ -38,13 +38,13 @@ int audit_classify_syscall(int abi, unsigned syscall)
 {
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/parisc/kernel/audit.c b/arch/parisc/kernel/audit.c
index 9eb47b2225d2..14244e83db75 100644
--- a/arch/parisc/kernel/audit.c
+++ b/arch/parisc/kernel/audit.c
@@ -47,13 +47,13 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #endif
 	switch (syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/parisc/kernel/compat_audit.c b/arch/parisc/kernel/compat_audit.c
index 20c39c9d86a9..1991b99f92ba 100644
--- a/arch/parisc/kernel/compat_audit.c
+++ b/arch/parisc/kernel/compat_audit.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/audit_arch.h>
 #include <asm/unistd.h>
 
 unsigned int parisc32_dir_class[] = {
@@ -30,12 +31,12 @@ int parisc32_classify_syscall(unsigned syscall)
 {
 	switch (syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index a2dddd7f3d09..6eb18ef77dff 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -47,15 +47,15 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #endif
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 55c6ccda0a85..216a54f85a12 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #undef __powerpc64__
+#include <linux/audit_arch.h>
 #include <asm/unistd.h>
 
 unsigned ppc32_dir_class[] = {
@@ -31,14 +32,14 @@ int ppc32_classify_syscall(unsigned syscall)
 {
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
index d395c6c9944c..7e331e1831d4 100644
--- a/arch/s390/kernel/audit.c
+++ b/arch/s390/kernel/audit.c
@@ -47,15 +47,15 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #endif
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
index 444fb1f66944..acacc96c57cb 100644
--- a/arch/s390/kernel/compat_audit.c
+++ b/arch/s390/kernel/compat_audit.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #undef __s390x__
+#include <linux/audit_arch.h>
 #include <asm/unistd.h>
 #include "audit.h"
 
@@ -32,14 +33,14 @@ int s390_classify_syscall(unsigned syscall)
 {
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
index a6e91bf34d48..50fab35bdaba 100644
--- a/arch/sparc/kernel/audit.c
+++ b/arch/sparc/kernel/audit.c
@@ -48,15 +48,15 @@ int audit_classify_syscall(int abi, unsigned int syscall)
 #endif
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
index 10eeb4f15b20..12515bd655c3 100644
--- a/arch/sparc/kernel/compat_audit.c
+++ b/arch/sparc/kernel/compat_audit.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #define __32bit_syscall_numbers__
+#include <linux/audit_arch.h>
 #include <asm/unistd.h>
 #include "kernel.h"
 
@@ -32,14 +33,14 @@ int sparc32_classify_syscall(unsigned int syscall)
 {
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index 6efe6cb3768a..a5fc3b1385e0 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <linux/audit_arch.h>
 #include <asm/unistd_32.h>
 #include <asm/audit.h>
 
@@ -31,15 +32,15 @@ int ia32_classify_syscall(unsigned syscall)
 {
 	switch (syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 	case __NR_execveat:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index 83d9cad4e68b..2a6cc9c9c881 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -47,14 +47,14 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #endif
 	switch(syscall) {
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 	case __NR_execve:
 	case __NR_execveat:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 82b7c1116a85..5fbeeeb6b726 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -11,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/ptrace.h>
+#include <linux/audit_arch.h>
 #include <uapi/linux/audit.h>
 #include <uapi/linux/netfilter/nf_tables.h>
 
diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h
new file mode 100644
index 000000000000..d4a506faabb0
--- /dev/null
+++ b/include/linux/audit_arch.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* audit_arch.h -- Arch layer specific support for audit
+ *
+ * Copyright 2021 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * Author: Richard Guy Briggs <rgb@redhat.com>
+ */
+#ifndef _LINUX_AUDIT_ARCH_H_
+#define _LINUX_AUDIT_ARCH_H_
+
+enum auditsc_class_t {
+	AUDITSC_NATIVE = 0,
+	AUDITSC_COMPAT,
+	AUDITSC_OPEN,
+	AUDITSC_OPENAT,
+	AUDITSC_SOCKETCALL,
+	AUDITSC_EXECVE,
+
+	AUDITSC_NVALS /* count */
+};
+
+#endif
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 1af4e7d0c16e..3f9108101598 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -153,7 +153,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
 	n = ctx->major;
 
 	switch (audit_classify_syscall(ctx->arch, n)) {
-	case 0:	/* native */
+	case AUDITSC_NATIVE:
 		if ((mask & AUDIT_PERM_WRITE) &&
 		     audit_match_class(AUDIT_CLASS_WRITE, n))
 			return 1;
@@ -164,7 +164,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
 		     audit_match_class(AUDIT_CLASS_CHATTR, n))
 			return 1;
 		return 0;
-	case 1: /* 32bit on biarch */
+	case AUDITSC_COMPAT: /* 32bit on biarch */
 		if ((mask & AUDIT_PERM_WRITE) &&
 		     audit_match_class(AUDIT_CLASS_WRITE_32, n))
 			return 1;
@@ -175,13 +175,13 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
 		     audit_match_class(AUDIT_CLASS_CHATTR_32, n))
 			return 1;
 		return 0;
-	case 2: /* open */
+	case AUDITSC_OPEN:
 		return mask & ACC_MODE(ctx->argv[1]);
-	case 3: /* openat */
+	case AUDITSC_OPENAT:
 		return mask & ACC_MODE(ctx->argv[2]);
-	case 4: /* socketcall */
+	case AUDITSC_SOCKETCALL:
 		return ((mask & AUDIT_PERM_WRITE) && ctx->argv[0] == SYS_BIND);
-	case 5: /* execve */
+	case AUDITSC_EXECVE:
 		return mask & AUDIT_PERM_EXEC;
 	default:
 		return 0;
diff --git a/lib/audit.c b/lib/audit.c
index 5004bff928a7..3ec1a94d8d64 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -45,23 +45,23 @@ int audit_classify_syscall(int abi, unsigned syscall)
 	switch(syscall) {
 #ifdef __NR_open
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 #endif
 #ifdef __NR_openat
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 #endif
 #ifdef __NR_socketcall
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 #endif
 #ifdef __NR_execveat
 	case __NR_execveat:
 #endif
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 0;
+		return AUDITSC_NATIVE;
 	}
 }
 
diff --git a/lib/compat_audit.c b/lib/compat_audit.c
index 77eabad69b4a..d6567d9e8b99 100644
--- a/lib/compat_audit.c
+++ b/lib/compat_audit.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/types.h>
+#include <linux/audit_arch.h>
 #include <asm/unistd32.h>
 
 unsigned compat_dir_class[] = {
@@ -33,19 +34,19 @@ int audit_classify_compat_syscall(int abi, unsigned syscall)
 	switch (syscall) {
 #ifdef __NR_open
 	case __NR_open:
-		return 2;
+		return AUDITSC_OPEN;
 #endif
 #ifdef __NR_openat
 	case __NR_openat:
-		return 3;
+		return AUDITSC_OPENAT;
 #endif
 #ifdef __NR_socketcall
 	case __NR_socketcall:
-		return 4;
+		return AUDITSC_SOCKETCALL;
 #endif
 	case __NR_execve:
-		return 5;
+		return AUDITSC_EXECVE;
 	default:
-		return 1;
+		return AUDITSC_COMPAT;
 	}
 }
-- 
cgit v1.2.3


From 1c30e3af8a79260cdba833a719209b01e6b92300 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 19 May 2021 16:00:21 -0400
Subject: audit: add support for the openat2 syscall

The openat2(2) syscall was added in kernel v5.6 with commit
fddb5d430ad9 ("open: introduce openat2(2) syscall").

Add the openat2(2) syscall to the audit syscall classifier.

Link: https://github.com/linux-audit/audit-kernel/issues/67
Link: https://lore.kernel.org/r/f5f1a4d8699613f8c02ce762807228c841c2e26f.1621363275.git.rgb@redhat.com
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
[PM: merge fuzz due to previous header rename, commit line wraps]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 arch/alpha/kernel/audit.c          | 2 ++
 arch/ia64/kernel/audit.c           | 2 ++
 arch/parisc/kernel/audit.c         | 2 ++
 arch/parisc/kernel/compat_audit.c  | 2 ++
 arch/powerpc/kernel/audit.c        | 2 ++
 arch/powerpc/kernel/compat_audit.c | 2 ++
 arch/s390/kernel/audit.c           | 2 ++
 arch/s390/kernel/compat_audit.c    | 2 ++
 arch/sparc/kernel/audit.c          | 2 ++
 arch/sparc/kernel/compat_audit.c   | 2 ++
 arch/x86/ia32/audit.c              | 2 ++
 arch/x86/kernel/audit_64.c         | 2 ++
 include/linux/audit_arch.h         | 1 +
 kernel/auditsc.c                   | 3 +++
 lib/audit.c                        | 4 ++++
 lib/compat_audit.c                 | 4 ++++
 16 files changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/audit.c b/arch/alpha/kernel/audit.c
index 81cbd804e375..3ab04709784a 100644
--- a/arch/alpha/kernel/audit.c
+++ b/arch/alpha/kernel/audit.c
@@ -42,6 +42,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 		return AUDITSC_OPENAT;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c
index dba6a74c9ab3..ec61f20ca61f 100644
--- a/arch/ia64/kernel/audit.c
+++ b/arch/ia64/kernel/audit.c
@@ -43,6 +43,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 		return AUDITSC_OPENAT;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/parisc/kernel/audit.c b/arch/parisc/kernel/audit.c
index 14244e83db75..f420b5552140 100644
--- a/arch/parisc/kernel/audit.c
+++ b/arch/parisc/kernel/audit.c
@@ -52,6 +52,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 		return AUDITSC_OPENAT;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/parisc/kernel/compat_audit.c b/arch/parisc/kernel/compat_audit.c
index 1991b99f92ba..539b16891bdf 100644
--- a/arch/parisc/kernel/compat_audit.c
+++ b/arch/parisc/kernel/compat_audit.c
@@ -36,6 +36,8 @@ int parisc32_classify_syscall(unsigned syscall)
 		return AUDITSC_OPENAT;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_COMPAT;
 	}
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index 6eb18ef77dff..1bcfca5fdf67 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -54,6 +54,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 216a54f85a12..d92ffe4e5dc1 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -39,6 +39,8 @@ int ppc32_classify_syscall(unsigned syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_COMPAT;
 	}
diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c
index 7e331e1831d4..02051a596b87 100644
--- a/arch/s390/kernel/audit.c
+++ b/arch/s390/kernel/audit.c
@@ -54,6 +54,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c
index acacc96c57cb..a7c46e8310f0 100644
--- a/arch/s390/kernel/compat_audit.c
+++ b/arch/s390/kernel/compat_audit.c
@@ -40,6 +40,8 @@ int s390_classify_syscall(unsigned syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_COMPAT;
 	}
diff --git a/arch/sparc/kernel/audit.c b/arch/sparc/kernel/audit.c
index 50fab35bdaba..b092274eca79 100644
--- a/arch/sparc/kernel/audit.c
+++ b/arch/sparc/kernel/audit.c
@@ -55,6 +55,8 @@ int audit_classify_syscall(int abi, unsigned int syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/arch/sparc/kernel/compat_audit.c b/arch/sparc/kernel/compat_audit.c
index 12515bd655c3..f1ea0005a729 100644
--- a/arch/sparc/kernel/compat_audit.c
+++ b/arch/sparc/kernel/compat_audit.c
@@ -40,6 +40,8 @@ int sparc32_classify_syscall(unsigned int syscall)
 		return AUDITSC_SOCKETCALL;
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_COMPAT;
 	}
diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c
index a5fc3b1385e0..59e19549e759 100644
--- a/arch/x86/ia32/audit.c
+++ b/arch/x86/ia32/audit.c
@@ -40,6 +40,8 @@ int ia32_classify_syscall(unsigned syscall)
 	case __NR_execve:
 	case __NR_execveat:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_COMPAT;
 	}
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index 2a6cc9c9c881..44c3601cfdc4 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -53,6 +53,8 @@ int audit_classify_syscall(int abi, unsigned syscall)
 	case __NR_execve:
 	case __NR_execveat:
 		return AUDITSC_EXECVE;
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/include/linux/audit_arch.h b/include/linux/audit_arch.h
index d4a506faabb0..8fdb1afe251a 100644
--- a/include/linux/audit_arch.h
+++ b/include/linux/audit_arch.h
@@ -16,6 +16,7 @@ enum auditsc_class_t {
 	AUDITSC_OPENAT,
 	AUDITSC_SOCKETCALL,
 	AUDITSC_EXECVE,
+	AUDITSC_OPENAT2,
 
 	AUDITSC_NVALS /* count */
 };
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 3f9108101598..8c4335a35274 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -63,6 +63,7 @@
 #include <linux/fsnotify_backend.h>
 #include <uapi/linux/limits.h>
 #include <uapi/linux/netfilter/nf_tables.h>
+#include <uapi/linux/openat2.h>
 
 #include "audit.h"
 
@@ -183,6 +184,8 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
 		return ((mask & AUDIT_PERM_WRITE) && ctx->argv[0] == SYS_BIND);
 	case AUDITSC_EXECVE:
 		return mask & AUDIT_PERM_EXEC;
+	case AUDITSC_OPENAT2:
+		return mask & ACC_MODE((u32)((struct open_how *)ctx->argv[2])->flags);
 	default:
 		return 0;
 	}
diff --git a/lib/audit.c b/lib/audit.c
index 3ec1a94d8d64..738bda22dd39 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -60,6 +60,10 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #endif
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+#ifdef __NR_openat2
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+#endif
 	default:
 		return AUDITSC_NATIVE;
 	}
diff --git a/lib/compat_audit.c b/lib/compat_audit.c
index d6567d9e8b99..3d6b8996f027 100644
--- a/lib/compat_audit.c
+++ b/lib/compat_audit.c
@@ -46,6 +46,10 @@ int audit_classify_compat_syscall(int abi, unsigned syscall)
 #endif
 	case __NR_execve:
 		return AUDITSC_EXECVE;
+#ifdef __NR_openat2
+	case __NR_openat2:
+		return AUDITSC_OPENAT2;
+#endif
 	default:
 		return AUDITSC_COMPAT;
 	}
-- 
cgit v1.2.3


From e8c0722927e8fc112d78cc0435d336fcd7e3507c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 1 Oct 2021 18:15:27 +0300
Subject: net: mscc: ocelot: write full VLAN TCI in the injection header

The VLAN TCI contains more than the VLAN ID, it also has the VLAN PCP
and Drop Eligibility Indicator.

If the ocelot driver is going to write the VLAN header inside the DSA
tag, it could just as well write the entire TCI.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 2 +-
 include/linux/dsa/ocelot.h         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 559177e6ded4..05c456dbdd72 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -916,7 +916,7 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 	ocelot_ifh_set_bypass(ifh, 1);
 	ocelot_ifh_set_dest(ifh, BIT_ULL(port));
 	ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C);
-	ocelot_ifh_set_vid(ifh, skb_vlan_tag_get(skb));
+	ocelot_ifh_set_vlan_tci(ifh, skb_vlan_tag_get(skb));
 	ocelot_ifh_set_rew_op(ifh, rew_op);
 
 	for (i = 0; i < OCELOT_TAG_LEN / 4; i++)
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 435777a0073c..0fe101e8e190 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -210,9 +210,9 @@ static inline void ocelot_ifh_set_tag_type(void *injection, u64 tag_type)
 	packing(injection, &tag_type, 16, 16, OCELOT_TAG_LEN, PACK, 0);
 }
 
-static inline void ocelot_ifh_set_vid(void *injection, u64 vid)
+static inline void ocelot_ifh_set_vlan_tci(void *injection, u64 vlan_tci)
 {
-	packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0);
+	packing(injection, &vlan_tci, 15, 0, OCELOT_TAG_LEN, PACK, 0);
 }
 
 #endif
-- 
cgit v1.2.3


From f3956ebb3bf06ab2266ad5ee2214aed46405810c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 1 Oct 2021 14:32:23 -0700
Subject: ethernet: use eth_hw_addr_set() instead of ether_addr_copy()

Convert Ethernet from ether_addr_copy() to eth_hw_addr_set():

  @@
  expression dev, np;
  @@
  - ether_addr_copy(dev->dev_addr, np)
  + eth_hw_addr_set(dev, np)

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/agere/et131x.c                        | 4 ++--
 drivers/net/ethernet/alacritech/slicoss.c                  | 2 +-
 drivers/net/ethernet/amazon/ena/ena_netdev.c               | 2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c            | 2 +-
 drivers/net/ethernet/broadcom/bgmac-bcma.c                 | 2 +-
 drivers/net/ethernet/broadcom/bgmac.c                      | 2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c              | 2 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c             | 4 ++--
 drivers/net/ethernet/brocade/bna/bnad.c                    | 4 ++--
 drivers/net/ethernet/cavium/liquidio/lio_core.c            | 2 +-
 drivers/net/ethernet/cavium/liquidio/lio_main.c            | 2 +-
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c         | 2 +-
 drivers/net/ethernet/cavium/thunder/nicvf_main.c           | 3 +--
 drivers/net/ethernet/emulex/benet/be_main.c                | 2 +-
 drivers/net/ethernet/ethoc.c                               | 2 +-
 drivers/net/ethernet/ezchip/nps_enet.c                     | 2 +-
 drivers/net/ethernet/faraday/ftgmac100.c                   | 4 ++--
 drivers/net/ethernet/google/gve/gve_adminq.c               | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c            | 4 ++--
 drivers/net/ethernet/ibm/ibmveth.c                         | 2 +-
 drivers/net/ethernet/ibm/ibmvnic.c                         | 5 ++---
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c            | 2 +-
 drivers/net/ethernet/intel/fm10k/fm10k_pci.c               | 4 ++--
 drivers/net/ethernet/intel/i40e/i40e_main.c                | 4 ++--
 drivers/net/ethernet/intel/iavf/iavf_main.c                | 2 +-
 drivers/net/ethernet/intel/iavf/iavf_virtchnl.c            | 4 ++--
 drivers/net/ethernet/intel/ice/ice_main.c                  | 4 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c          | 6 +++---
 drivers/net/ethernet/korina.c                              | 2 +-
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c            | 4 ++--
 drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c             | 2 +-
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c   | 2 +-
 drivers/net/ethernet/marvell/prestera/prestera_main.c      | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c          | 2 +-
 drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c | 2 +-
 drivers/net/ethernet/microchip/enc28j60.c                  | 4 ++--
 drivers/net/ethernet/microchip/lan743x_main.c              | 4 ++--
 drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c      | 2 +-
 drivers/net/ethernet/microsoft/mana/mana_en.c              | 2 +-
 drivers/net/ethernet/mscc/ocelot_net.c                     | 2 +-
 drivers/net/ethernet/netronome/nfp/abm/main.c              | 2 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_main.c          | 2 +-
 drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c        | 2 +-
 drivers/net/ethernet/ni/nixge.c                            | 2 +-
 drivers/net/ethernet/qlogic/qede/qede_filter.c             | 4 ++--
 drivers/net/ethernet/qlogic/qede/qede_main.c               | 2 +-
 drivers/net/ethernet/qualcomm/emac/emac.c                  | 2 +-
 drivers/net/ethernet/sfc/ef10_sriov.c                      | 2 +-
 drivers/net/ethernet/sfc/efx.c                             | 2 +-
 drivers/net/ethernet/sfc/efx_common.c                      | 4 ++--
 drivers/net/ethernet/sfc/falcon/efx.c                      | 6 +++---
 drivers/net/ethernet/socionext/netsec.c                    | 2 +-
 drivers/net/ethernet/ti/am65-cpsw-nuss.c                   | 2 +-
 drivers/net/ethernet/ti/cpsw_new.c                         | 4 ++--
 drivers/net/ethernet/ti/davinci_emac.c                     | 2 +-
 drivers/net/ethernet/ti/netcp_core.c                       | 2 +-
 include/linux/etherdevice.h                                | 2 +-
 57 files changed, 77 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 920633161174..f4edc616388c 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3863,7 +3863,7 @@ static int et131x_change_mtu(struct net_device *netdev, int new_mtu)
 
 	et131x_init_send(adapter);
 	et131x_hwaddr_init(adapter);
-	ether_addr_copy(netdev->dev_addr, adapter->addr);
+	eth_hw_addr_set(netdev, adapter->addr);
 
 	/* Init the device with the new settings */
 	et131x_adapter_setup(adapter);
@@ -3966,7 +3966,7 @@ static int et131x_pci_setup(struct pci_dev *pdev,
 
 	netif_napi_add(netdev, &adapter->napi, et131x_poll, 64);
 
-	ether_addr_copy(netdev->dev_addr, adapter->addr);
+	eth_hw_addr_set(netdev, adapter->addr);
 
 	rc = -ENOMEM;
 
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 696517eae77f..82f4f2608102 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -1660,7 +1660,7 @@ static int slic_read_eeprom(struct slic_device *sdev)
 		goto free_eeprom;
 	}
 	/* set mac address */
-	ether_addr_copy(sdev->netdev->dev_addr, mac[devfn]);
+	eth_hw_addr_set(sdev->netdev, mac[devfn]);
 free_eeprom:
 	dma_free_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE, eeprom, paddr);
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 0e43000614ab..7d5d885d85d5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -4073,7 +4073,7 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter,
 		ether_addr_copy(adapter->mac_addr, netdev->dev_addr);
 	} else {
 		ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr);
-		ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
+		eth_hw_addr_set(netdev, adapter->mac_addr);
 	}
 
 	/* Set offload features */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 6c049864dac0..694aa70bcafe 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -332,7 +332,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
 	{
 		static u8 mac_addr_permanent[] = AQ_CFG_MAC_ADDR_PERMANENT;
 
-		ether_addr_copy(self->ndev->dev_addr, mac_addr_permanent);
+		eth_hw_addr_set(self->ndev, mac_addr_permanent);
 	}
 #endif
 
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 9513cfb5ba58..28759062d68d 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -150,7 +150,7 @@ static int bgmac_probe(struct bcma_device *core)
 			err = -ENOTSUPP;
 			goto err;
 		}
-		ether_addr_copy(bgmac->net_dev->dev_addr, mac);
+		eth_hw_addr_set(bgmac->net_dev, mac);
 	}
 
 	/* On BCM4706 we need common core to access PHY */
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index fe4d99abd548..d2c7834850cc 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1241,7 +1241,7 @@ static int bgmac_set_mac_address(struct net_device *net_dev, void *addr)
 	if (ret < 0)
 		return ret;
 
-	ether_addr_copy(net_dev->dev_addr, sa->sa_data);
+	eth_hw_addr_set(net_dev, sa->sa_data);
 	bgmac_write_mac_address(bgmac, net_dev->dev_addr);
 
 	eth_commit_mac_addr_change(net_dev, addr);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index 9401936b74fa..8eb28e088582 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -475,7 +475,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 	dev->features |= pf_dev->features;
 	bnxt_vf_rep_eth_addr_gen(bp->pf.mac_addr, vf_rep->vf_idx,
 				 dev->perm_addr);
-	ether_addr_copy(dev->dev_addr, dev->perm_addr);
+	eth_hw_addr_set(dev, dev->perm_addr);
 	/* Set VF-Rep's max-mtu to the corresponding VF's max-mtu */
 	if (!bnxt_hwrm_vfr_qcfg(bp, vf_rep, &max_mtu))
 		dev->max_mtu = max_mtu;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 6a8234bc9428..02fe98cbabb0 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3633,7 +3633,7 @@ static int bcmgenet_set_mac_addr(struct net_device *dev, void *p)
 	if (netif_running(dev))
 		return -EBUSY;
 
-	ether_addr_copy(dev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(dev, addr->sa_data);
 
 	return 0;
 }
@@ -4082,7 +4082,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
 
 	if (pd && !IS_ERR_OR_NULL(pd->mac_address))
-		ether_addr_copy(dev->dev_addr, pd->mac_address);
+		eth_hw_addr_set(dev, pd->mac_address);
 	else
 		if (!device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
 			if (has_acpi_companion(&pdev->dev))
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index ba47777d9cff..b1947fd9a07c 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -875,7 +875,7 @@ bnad_set_netdev_perm_addr(struct bnad *bnad)
 
 	ether_addr_copy(netdev->perm_addr, bnad->perm_addr);
 	if (is_zero_ether_addr(netdev->dev_addr))
-		ether_addr_copy(netdev->dev_addr, bnad->perm_addr);
+		eth_hw_addr_set(netdev, bnad->perm_addr);
 }
 
 /* Control Path Handlers */
@@ -3249,7 +3249,7 @@ bnad_set_mac_address(struct net_device *netdev, void *addr)
 
 	err = bnad_mac_addr_set_locked(bnad, sa->sa_data);
 	if (!err)
-		ether_addr_copy(netdev->dev_addr, sa->sa_data);
+		eth_hw_addr_set(netdev, sa->sa_data);
 
 	spin_unlock_irqrestore(&bnad->bna_lock, flags);
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 2a0d64e5797c..ec7928b54e4a 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -411,7 +411,7 @@ void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
 
 	if (!ether_addr_equal(netdev->dev_addr, mac)) {
 		macaddr_changed = true;
-		ether_addr_copy(netdev->dev_addr, mac);
+		eth_hw_addr_set(netdev, mac);
 		ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, mac);
 		call_netdevice_notifiers(NETDEV_CHANGEADDR, netdev);
 	}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index dafc79bd34f4..5d865ba7aed4 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -3634,7 +3634,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		/* Copy MAC Address to OS network device structure */
 
-		ether_addr_copy(netdev->dev_addr, mac);
+		eth_hw_addr_set(netdev, mac);
 
 		/* By default all interfaces on a single Octeon uses the same
 		 * tx and rx queues
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index f6396ac64006..8a969a9d4b63 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -2148,7 +2148,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			mac[j] = *((u8 *)(((u8 *)&lio->linfo.hw_addr) + 2 + j));
 
 		/* Copy MAC Address to OS network device structure */
-		ether_addr_copy(netdev->dev_addr, mac);
+		eth_hw_addr_set(netdev, mac);
 
 		if (liquidio_setup_io_queues(octeon_dev, i,
 					     lio->linfo.num_txpciq,
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 2b87565781a0..5ef704c8d839 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -221,8 +221,7 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
 		nic->node = mbx.nic_cfg.node_id;
 		if (!nic->set_mac_pending)
-			ether_addr_copy(nic->netdev->dev_addr,
-					mbx.nic_cfg.mac_addr);
+			eth_hw_addr_set(nic->netdev, mbx.nic_cfg.mac_addr);
 		nic->sqs_mode = mbx.nic_cfg.sqs_mode;
 		nic->loopback_supported = mbx.nic_cfg.loopback_supported;
 		nic->link_up = false;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 0fb36d50c42b..ef60e2da05a4 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -369,7 +369,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p)
 	/* Remember currently programmed MAC */
 	ether_addr_copy(adapter->dev_mac, addr->sa_data);
 done:
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(netdev, addr->sa_data);
 	dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
 	return 0;
 err:
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index c5bd27db708a..7eb7d28a489d 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1144,7 +1144,7 @@ static int ethoc_probe(struct platform_device *pdev)
 
 	/* Allow the platform setup code to pass in a MAC address. */
 	if (pdata) {
-		ether_addr_copy(netdev->dev_addr, pdata->hwaddr);
+		eth_hw_addr_set(netdev, pdata->hwaddr);
 		priv->phy_id = pdata->phy_id;
 	} else {
 		of_get_mac_address(pdev->dev.of_node, netdev->dev_addr);
diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c
index f9a288a6ec8c..f5935eb5a791 100644
--- a/drivers/net/ethernet/ezchip/nps_enet.c
+++ b/drivers/net/ethernet/ezchip/nps_enet.c
@@ -421,7 +421,7 @@ static s32 nps_enet_set_mac_address(struct net_device *ndev, void *p)
 
 	res = eth_mac_addr(ndev, p);
 	if (!res) {
-		ether_addr_copy(ndev->dev_addr, addr->sa_data);
+		eth_hw_addr_set(ndev, addr->sa_data);
 		nps_enet_set_hw_mac_address(ndev);
 	}
 
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ff76e401a014..ab9267225573 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -186,7 +186,7 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
 
 	addr = device_get_mac_address(priv->dev, mac, ETH_ALEN);
 	if (addr) {
-		ether_addr_copy(priv->netdev->dev_addr, mac);
+		eth_hw_addr_set(priv->netdev, mac);
 		dev_info(priv->dev, "Read MAC address %pM from device tree\n",
 			 mac);
 		return;
@@ -203,7 +203,7 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
 	mac[5] = l & 0xff;
 
 	if (is_valid_ether_addr(mac)) {
-		ether_addr_copy(priv->netdev->dev_addr, mac);
+		eth_hw_addr_set(priv->netdev, mac);
 		dev_info(priv->dev, "Read MAC address %pM from chip\n", mac);
 	} else {
 		eth_hw_addr_random(priv->netdev);
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c
index f089d33dd48e..af2c1d1535f5 100644
--- a/drivers/net/ethernet/google/gve/gve_adminq.c
+++ b/drivers/net/ethernet/google/gve/gve_adminq.c
@@ -733,7 +733,7 @@ int gve_adminq_describe_device(struct gve_priv *priv)
 	}
 	priv->dev->max_mtu = mtu;
 	priv->num_event_counters = be16_to_cpu(descriptor->counters);
-	ether_addr_copy(priv->dev->dev_addr, descriptor->mac);
+	eth_hw_addr_set(priv->dev, descriptor->mac);
 	mac = descriptor->mac;
 	dev_info(&priv->pdev->dev, "MAC addr: %pM\n", mac);
 	priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 468b8f07bf47..fea1be4c02ed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2287,7 +2287,7 @@ static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
 		return ret;
 	}
 
-	ether_addr_copy(netdev->dev_addr, mac_addr->sa_data);
+	eth_hw_addr_set(netdev, mac_addr->sa_data);
 
 	return 0;
 }
@@ -4933,7 +4933,7 @@ static int hns3_init_mac_addr(struct net_device *netdev)
 		dev_warn(priv->dev, "using random MAC address %pM\n",
 			 netdev->dev_addr);
 	} else if (!ether_addr_equal(netdev->dev_addr, mac_addr_temp)) {
-		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
+		eth_hw_addr_set(netdev, mac_addr_temp);
 		ether_addr_copy(netdev->perm_addr, mac_addr_temp);
 	} else {
 		return 0;
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 42d374cef664..836617fb3f40 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1613,7 +1613,7 @@ static int ibmveth_set_mac_addr(struct net_device *dev, void *p)
 		return rc;
 	}
 
-	ether_addr_copy(dev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(dev, addr->sa_data);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 21efd76dd427..9d61167ba767 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4766,8 +4766,7 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq,
 	/* crq->change_mac_addr.mac_addr is the requested one
 	 * crq->change_mac_addr_rsp.mac_addr is the returned valid one.
 	 */
-	ether_addr_copy(netdev->dev_addr,
-			&crq->change_mac_addr_rsp.mac_addr[0]);
+	eth_hw_addr_set(netdev, &crq->change_mac_addr_rsp.mac_addr[0]);
 	ether_addr_copy(adapter->mac_addr,
 			&crq->change_mac_addr_rsp.mac_addr[0]);
 out:
@@ -5723,7 +5722,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
 	bitmap_set(adapter->map_ids, 0, 1);
 
 	ether_addr_copy(adapter->mac_addr, mac_addr_p);
-	ether_addr_copy(netdev->dev_addr, adapter->mac_addr);
+	eth_hw_addr_set(netdev, adapter->mac_addr);
 	netdev->irq = dev->irq;
 	netdev->netdev_ops = &ibmvnic_netdev_ops;
 	netdev->ethtool_ops = &ibmvnic_ethtool_ops;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 2fb52bd6fc0e..2cca9e84e31e 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -990,7 +990,7 @@ static int fm10k_set_mac(struct net_device *dev, void *p)
 	}
 
 	if (!err) {
-		ether_addr_copy(dev->dev_addr, addr->sa_data);
+		eth_hw_addr_set(dev, addr->sa_data);
 		ether_addr_copy(hw->mac.addr, addr->sa_data);
 		dev->addr_assign_type &= ~NET_ADDR_RANDOM;
 	}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index adfa2768f024..b473cb7d7c57 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -300,7 +300,7 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
 		if (is_valid_ether_addr(hw->mac.perm_addr)) {
 			ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
 			ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr);
-			ether_addr_copy(netdev->dev_addr, hw->mac.perm_addr);
+			eth_hw_addr_set(netdev, hw->mac.perm_addr);
 			netdev->addr_assign_type &= ~NET_ADDR_RANDOM;
 		}
 
@@ -2045,7 +2045,7 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
 		netdev->addr_assign_type |= NET_ADDR_RANDOM;
 	}
 
-	ether_addr_copy(netdev->dev_addr, hw->mac.addr);
+	eth_hw_addr_set(netdev, hw->mac.addr);
 	ether_addr_copy(netdev->perm_addr, hw->mac.addr);
 
 	if (!is_valid_ether_addr(netdev->perm_addr)) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2f20980dd9a5..f3a1d72538fb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1587,7 +1587,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 	 */
 	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	i40e_del_mac_filter(vsi, netdev->dev_addr);
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(netdev, addr->sa_data);
 	i40e_add_mac_filter(vsi, netdev->dev_addr);
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
@@ -13424,7 +13424,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	i40e_add_mac_filter(vsi, broadcast);
 	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
-	ether_addr_copy(netdev->dev_addr, mac_addr);
+	eth_hw_addr_set(netdev, mac_addr);
 	ether_addr_copy(netdev->perm_addr, mac_addr);
 
 	/* i40iw_net_event() reads 16 bytes from neigh->primary_key */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 23762a7ef740..7f812abe5abe 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1847,7 +1847,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
 		eth_hw_addr_random(netdev);
 		ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
 	} else {
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
 		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
 	}
 
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 3c735968e1b8..8eb8d4663a81 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1685,7 +1685,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 		if (!v_retval)
 			iavf_mac_add_ok(adapter);
 		if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr))
-			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+			eth_hw_addr_set(netdev, adapter->hw.mac.addr);
 		break;
 	case VIRTCHNL_OP_GET_STATS: {
 		struct iavf_eth_stats *stats =
@@ -1716,7 +1716,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 			ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr);
 		} else {
 			/* refresh current mac address if changed */
-			ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+			eth_hw_addr_set(netdev, adapter->hw.mac.addr);
 			ether_addr_copy(netdev->perm_addr,
 					adapter->hw.mac.addr);
 		}
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 909e5cd98054..57e24aeffbc5 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3143,7 +3143,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 	if (vsi->type == ICE_VSI_PF) {
 		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
 		ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
-		ether_addr_copy(netdev->dev_addr, mac_addr);
+		eth_hw_addr_set(netdev, mac_addr);
 		ether_addr_copy(netdev->perm_addr, mac_addr);
 	}
 
@@ -5172,7 +5172,7 @@ err_update_filters:
 		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
 			   mac);
 		netif_addr_lock_bh(netdev);
-		ether_addr_copy(netdev->dev_addr, old_mac);
+		eth_hw_addr_set(netdev, old_mac);
 		netif_addr_unlock_bh(netdev);
 		return err;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index c714e1ecd308..d81811ab4ec4 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2540,7 +2540,7 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter)
 	}
 
 	if (is_valid_ether_addr(adapter->hw.mac.addr)) {
-		ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr);
+		eth_hw_addr_set(netdev, adapter->hw.mac.addr);
 		ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
 	}
 
@@ -3054,7 +3054,7 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
 		else if (is_zero_ether_addr(adapter->hw.mac.addr))
 			dev_info(&pdev->dev,
 				 "MAC address not assigned by administrator.\n");
-		ether_addr_copy(netdev->dev_addr, hw->mac.addr);
+		eth_hw_addr_set(netdev, hw->mac.addr);
 	}
 
 	if (!is_valid_ether_addr(netdev->dev_addr)) {
@@ -4231,7 +4231,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p)
 
 	ether_addr_copy(hw->mac.addr, addr->sa_data);
 	ether_addr_copy(hw->mac.perm_addr, addr->sa_data);
-	ether_addr_copy(netdev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(netdev, addr->sa_data);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 3e9f324f1061..097516af4325 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1297,7 +1297,7 @@ static int korina_probe(struct platform_device *pdev)
 	lp = netdev_priv(dev);
 
 	if (mac_addr)
-		ether_addr_copy(dev->dev_addr, mac_addr);
+		eth_hw_addr_set(dev, mac_addr);
 	else if (of_get_mac_address(pdev->dev.of_node, dev->dev_addr) < 0)
 		eth_hw_addr_random(dev);
 
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index d5c92e43f89e..94ea6dd91b74 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6083,7 +6083,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 
 	if (fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
 		*mac_from = "firmware node";
-		ether_addr_copy(dev->dev_addr, fw_mac_addr);
+		eth_hw_addr_set(dev, fw_mac_addr);
 		return;
 	}
 
@@ -6091,7 +6091,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 		mvpp21_get_mac_address(port, hw_mac_addr);
 		if (is_valid_ether_addr(hw_mac_addr)) {
 			*mac_from = "hardware";
-			ether_addr_copy(dev->dev_addr, hw_mac_addr);
+			eth_hw_addr_set(dev, hw_mac_addr);
 			return;
 		}
 	}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
index 93575800ca92..75ba57bd1d46 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_prs.c
@@ -2347,7 +2347,7 @@ int mvpp2_prs_update_mac_da(struct net_device *dev, const u8 *da)
 		return err;
 
 	/* Set addr in the device */
-	ether_addr_copy(dev->dev_addr, da);
+	eth_hw_addr_set(dev, da);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index 0aa88cea1676..9826a9012737 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -188,7 +188,7 @@ static int otx2_hw_get_mac_addr(struct otx2_nic *pfvf,
 		return PTR_ERR(msghdr);
 	}
 	rsp = (struct nix_get_mac_addr_rsp *)msghdr;
-	ether_addr_copy(netdev->dev_addr, rsp->mac_addr);
+	eth_hw_addr_set(netdev, rsp->mac_addr);
 	mutex_unlock(&pfvf->mbox.lock);
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c
index 78a7a00bce22..b667f560b931 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_main.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c
@@ -137,7 +137,7 @@ static int prestera_port_set_mac_address(struct net_device *dev, void *p)
 	if (err)
 		return err;
 
-	ether_addr_copy(dev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(dev, addr->sa_data);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 40fc7eb8b2b9..005eb3c92506 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3228,7 +3228,7 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr)
 		return -EADDRNOTAVAIL;
 
 	netif_addr_lock_bh(netdev);
-	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
+	eth_hw_addr_set(netdev, saddr->sa_data);
 	netif_addr_unlock_bh(netdev);
 
 	mlx5e_nic_set_rx_mode(priv);
diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
index 6704f5c1aa32..b990782c1eb1 100644
--- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
+++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
@@ -75,7 +75,7 @@ static void mlxbf_gige_initial_mac(struct mlxbf_gige *priv)
 	u64_to_ether_addr(local_mac, mac);
 
 	if (is_valid_ether_addr(mac)) {
-		ether_addr_copy(priv->netdev->dev_addr, mac);
+		eth_hw_addr_set(priv->netdev, mac);
 	} else {
 		/* Provide a random MAC if for some reason the device has
 		 * not been configured with a valid MAC address already.
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 09cdc2f2e7ff..bf77e8adffbf 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -517,7 +517,7 @@ static int enc28j60_set_mac_address(struct net_device *dev, void *addr)
 	if (!is_valid_ether_addr(address->sa_data))
 		return -EADDRNOTAVAIL;
 
-	ether_addr_copy(dev->dev_addr, address->sa_data);
+	eth_hw_addr_set(dev, address->sa_data);
 	return enc28j60_set_hw_macaddr(dev);
 }
 
@@ -1573,7 +1573,7 @@ static int enc28j60_probe(struct spi_device *spi)
 	}
 
 	if (device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
-		ether_addr_copy(dev->dev_addr, macaddr);
+		eth_hw_addr_set(dev, macaddr);
 	else
 		eth_hw_addr_random(dev);
 	enc28j60_set_hw_macaddr(dev);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 9e8561cdc32a..03d02403c19e 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -816,7 +816,7 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
 			eth_random_addr(adapter->mac_address);
 	}
 	lan743x_mac_set_address(adapter, adapter->mac_address);
-	ether_addr_copy(netdev->dev_addr, adapter->mac_address);
+	eth_hw_addr_set(netdev, adapter->mac_address);
 
 	return 0;
 }
@@ -2645,7 +2645,7 @@ static int lan743x_netdev_set_mac_address(struct net_device *netdev,
 	ret = eth_prepare_mac_addr_change(netdev, sock_addr);
 	if (ret)
 		return ret;
-	ether_addr_copy(netdev->dev_addr, sock_addr->sa_data);
+	eth_hw_addr_set(netdev, sock_addr->sa_data);
 	lan743x_mac_set_address(adapter, sock_addr->sa_data);
 	lan743x_rfe_update_mac_address(adapter);
 	return 0;
diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
index cb68eaaac881..b21ebaa32d7e 100644
--- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
+++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c
@@ -162,7 +162,7 @@ static int sparx5_set_mac_address(struct net_device *dev, void *p)
 	sparx5_mact_learn(sparx5, PGID_CPU, addr->sa_data, port->pvid);
 
 	/* Record the address */
-	ether_addr_copy(dev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(dev, addr->sa_data);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 1b21030308e5..9a871192ca96 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1608,7 +1608,7 @@ static int mana_init_port(struct net_device *ndev)
 	if (apc->num_queues > apc->max_queues)
 		apc->num_queues = apc->max_queues;
 
-	ether_addr_copy(ndev->dev_addr, apc->mac_addr);
+	eth_hw_addr_set(ndev, apc->mac_addr);
 
 	return 0;
 
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 8ef3868d7d68..2f2312715439 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -605,7 +605,7 @@ static int ocelot_port_set_mac_address(struct net_device *dev, void *p)
 	/* Then forget the previous one. */
 	ocelot_mact_forget(ocelot, dev->dev_addr, ocelot_port->pvid_vlan.vid);
 
-	ether_addr_copy(dev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(dev, addr->sa_data);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/abm/main.c b/drivers/net/ethernet/netronome/nfp/abm/main.c
index 605a1617b195..5d3df28c648f 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/main.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/main.c
@@ -305,7 +305,7 @@ nfp_abm_vnic_set_mac(struct nfp_pf *pf, struct nfp_abm *abm, struct nfp_net *nn,
 		return;
 	}
 
-	ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+	eth_hw_addr_set(nn->dp.netdev, mac_addr);
 	ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
index 5fbb7c613ff1..751f76cd4f79 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c
@@ -55,7 +55,7 @@ nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev,
 		return;
 	}
 
-	ether_addr_copy(netdev->dev_addr, eth_port->mac_addr);
+	eth_hw_addr_set(netdev, eth_port->mac_addr);
 	ether_addr_copy(netdev->perm_addr, eth_port->mac_addr);
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index c0e2f4394aef..87f2268b16d6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -58,7 +58,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
 		return;
 	}
 
-	ether_addr_copy(nn->dp.netdev->dev_addr, mac_addr);
+	eth_hw_addr_set(nn->dp.netdev, mac_addr);
 	ether_addr_copy(nn->dp.netdev->perm_addr, mac_addr);
 }
 
diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 346145d3180e..cfeb7620ae20 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -1283,7 +1283,7 @@ static int nixge_probe(struct platform_device *pdev)
 
 	mac_addr = nixge_get_nvmem_address(&pdev->dev);
 	if (mac_addr && is_valid_ether_addr(mac_addr)) {
-		ether_addr_copy(ndev->dev_addr, mac_addr);
+		eth_hw_addr_set(ndev, mac_addr);
 		kfree(mac_addr);
 	} else {
 		eth_hw_addr_random(ndev);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index f99b085b56a5..03c51dd37e1f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -557,7 +557,7 @@ void qede_force_mac(void *dev, u8 *mac, bool forced)
 		return;
 	}
 
-	ether_addr_copy(edev->ndev->dev_addr, mac);
+	eth_hw_addr_set(edev->ndev, mac);
 	__qede_unlock(edev);
 }
 
@@ -1101,7 +1101,7 @@ int qede_set_mac_addr(struct net_device *ndev, void *p)
 			goto out;
 	}
 
-	ether_addr_copy(ndev->dev_addr, addr->sa_data);
+	eth_hw_addr_set(ndev, addr->sa_data);
 	DP_INFO(edev, "Setting device MAC to %pM\n", addr->sa_data);
 
 	if (edev->state != QEDE_STATE_OPEN) {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index ee4c3bd28a93..75adb71adf18 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -836,7 +836,7 @@ static void qede_init_ndev(struct qede_dev *edev)
 	ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE;
 
 	/* Set network device HW mac */
-	ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
+	eth_hw_addr_set(edev->ndev, edev->dev_info.common.hw_mac);
 
 	ndev->mtu = edev->dev_info.common.mtu;
 }
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 9015a38eaced..fbfabfc5cc51 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -550,7 +550,7 @@ static int emac_probe_resources(struct platform_device *pdev,
 
 	/* get mac address */
 	if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
-		ether_addr_copy(netdev->dev_addr, maddr);
+		eth_hw_addr_set(netdev, maddr);
 	else
 		eth_hw_addr_random(netdev);
 
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 752d6406f07e..06d23c708a5f 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -523,7 +523,7 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
 			goto fail;
 
 		if (vf->efx)
-			ether_addr_copy(vf->efx->net_dev->dev_addr, mac);
+			eth_hw_addr_set(vf->efx->net_dev, mac);
 	}
 
 	ether_addr_copy(vf->mac, mac);
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 43ef4f529028..6960a2fe2b53 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -136,7 +136,7 @@ static int efx_probe_port(struct efx_nic *efx)
 		return rc;
 
 	/* Initialise MAC address to permanent address */
-	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
+	eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
index 896b59253197..f187631b2c5c 100644
--- a/drivers/net/ethernet/sfc/efx_common.c
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -181,11 +181,11 @@ int efx_set_mac_address(struct net_device *net_dev, void *data)
 
 	/* save old address */
 	ether_addr_copy(old_addr, net_dev->dev_addr);
-	ether_addr_copy(net_dev->dev_addr, new_addr);
+	eth_hw_addr_set(net_dev, new_addr);
 	if (efx->type->set_mac_address) {
 		rc = efx->type->set_mac_address(efx);
 		if (rc) {
-			ether_addr_copy(net_dev->dev_addr, old_addr);
+			eth_hw_addr_set(net_dev, old_addr);
 			return rc;
 		}
 	}
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index 423bdf81200f..c68837a951f4 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -1044,7 +1044,7 @@ static int ef4_probe_port(struct ef4_nic *efx)
 		return rc;
 
 	/* Initialise MAC address to permanent address */
-	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
+	eth_hw_addr_set(efx->net_dev, efx->net_dev->perm_addr);
 
 	return 0;
 }
@@ -2162,11 +2162,11 @@ static int ef4_set_mac_address(struct net_device *net_dev, void *data)
 
 	/* save old address */
 	ether_addr_copy(old_addr, net_dev->dev_addr);
-	ether_addr_copy(net_dev->dev_addr, new_addr);
+	eth_hw_addr_set(net_dev, new_addr);
 	if (efx->type->set_mac_address) {
 		rc = efx->type->set_mac_address(efx);
 		if (rc) {
-			ether_addr_copy(net_dev->dev_addr, old_addr);
+			eth_hw_addr_set(net_dev, old_addr);
 			return rc;
 		}
 	}
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f80a2aef9972..c7e56dc0a494 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -2036,7 +2036,7 @@ static int netsec_probe(struct platform_device *pdev)
 
 	mac = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf));
 	if (mac)
-		ether_addr_copy(ndev->dev_addr, mac);
+		eth_hw_addr_set(ndev, mac);
 
 	if (priv->eeprom_base &&
 	    (!mac || !is_valid_ether_addr(ndev->dev_addr))) {
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 0de5f4a4fe08..6904bfaa5777 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1970,7 +1970,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
 	ndev_priv->msg_enable = AM65_CPSW_DEBUG;
 	SET_NETDEV_DEV(port->ndev, dev);
 
-	ether_addr_copy(port->ndev->dev_addr, port->slave.mac_addr);
+	eth_hw_addr_set(port->ndev, port->slave.mac_addr);
 
 	port->ndev->min_mtu = AM65_CPSW_MIN_PACKET_SIZE;
 	port->ndev->max_mtu = AM65_CPSW_MAX_PACKET_SIZE;
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 1530532748a8..279e261e4720 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1000,7 +1000,7 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
 			   flags, vid);
 
 	ether_addr_copy(priv->mac_addr, addr->sa_data);
-	ether_addr_copy(ndev->dev_addr, priv->mac_addr);
+	eth_hw_addr_set(ndev, priv->mac_addr);
 	cpsw_set_slave_mac(&cpsw->slaves[slave_no], priv);
 
 	pm_runtime_put(cpsw->dev);
@@ -1401,7 +1401,7 @@ static int cpsw_create_ports(struct cpsw_common *cpsw)
 			dev_info(cpsw->dev, "Random MACID = %pM\n",
 				 priv->mac_addr);
 		}
-		ether_addr_copy(ndev->dev_addr, slave_data->mac_addr);
+		eth_hw_addr_set(ndev, slave_data->mac_addr);
 		ether_addr_copy(priv->mac_addr, slave_data->mac_addr);
 
 		cpsw->slaves[i].ndev = ndev;
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index e8291d848839..e4b4624be2ae 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1899,7 +1899,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
 
 	rc = davinci_emac_try_get_mac(pdev, res_ctrl ? 0 : 1, priv->mac_addr);
 	if (!rc)
-		ether_addr_copy(ndev->dev_addr, priv->mac_addr);
+		eth_hw_addr_set(ndev, priv->mac_addr);
 
 	if (!is_valid_ether_addr(priv->mac_addr)) {
 		/* Use random MAC if still none obtained. */
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index eda2961c0fe2..a4cd44a39e3d 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -2028,7 +2028,7 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 
 		emac_arch_get_mac_addr(efuse_mac_addr, efuse, efuse_mac);
 		if (is_valid_ether_addr(efuse_mac_addr))
-			ether_addr_copy(ndev->dev_addr, efuse_mac_addr);
+			eth_hw_addr_set(ndev, efuse_mac_addr);
 		else
 			eth_random_addr(ndev->dev_addr);
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 928c411bd509..e7b2e5fd8d24 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -323,7 +323,7 @@ static inline void eth_hw_addr_inherit(struct net_device *dst,
 				       struct net_device *src)
 {
 	dst->addr_assign_type = src->addr_assign_type;
-	ether_addr_copy(dst->dev_addr, src->dev_addr);
+	eth_hw_addr_set(dst, src->dev_addr);
 }
 
 /**
-- 
cgit v1.2.3


From d0c27c9211fef3ce8083cc3ad2ca3067d211edc9 Mon Sep 17 00:00:00 2001
From: Henrik Grimler <henrik@grimler.se>
Date: Wed, 29 Sep 2021 20:14:18 +0200
Subject: power: supply: max17042_battery: fix typo in MAX17042_IAvg_empty

Datasheet gives the name IAvg_empty, not LAvg_empty.

Signed-off-by: Henrik Grimler <henrik@grimler.se>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max17042_battery.c | 2 +-
 include/linux/power/max17042_battery.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index 5809ba997093..32f331480487 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -786,7 +786,7 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
 	if ((chip->chip_type == MAXIM_DEVICE_TYPE_MAX17042) ||
 	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17047) ||
 	    (chip->chip_type == MAXIM_DEVICE_TYPE_MAX17050)) {
-		max17042_override_por(map, MAX17042_LAvg_empty, config->lavg_empty);
+		max17042_override_por(map, MAX17042_IAvg_empty, config->iavg_empty);
 		max17042_override_por(map, MAX17042_TempNom, config->temp_nom);
 		max17042_override_por(map, MAX17042_TempLim, config->temp_lim);
 		max17042_override_por(map, MAX17042_FCTC, config->fctc);
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index dd24756a8af7..c417abd2ab70 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -78,7 +78,7 @@ enum max17042_register {
 	MAX17042_T_empty	= 0x34,
 
 	MAX17042_FullCAP0       = 0x35,
-	MAX17042_LAvg_empty	= 0x36,
+	MAX17042_IAvg_empty	= 0x36,
 	MAX17042_FCTC		= 0x37,
 	MAX17042_RCOMP0		= 0x38,
 	MAX17042_TempCo		= 0x39,
@@ -221,7 +221,7 @@ struct max17042_config_data {
 	u16	fullcap;	/* 0x10 */
 	u16	fullcapnom;	/* 0x23 */
 	u16	socempty;	/* 0x33 */
-	u16	lavg_empty;	/* 0x36 */
+	u16	iavg_empty;	/* 0x36 */
 	u16	dqacc;		/* 0x45 */
 	u16	dpacc;		/* 0x46 */
 	u16	qrtbl00;	/* 0x12 */
-- 
cgit v1.2.3


From dae9a6cab8009e526570e7477ce858dcdfeb256e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 30 Sep 2021 17:06:21 -0400
Subject: NFSD: Have legacy NFSD WRITE decoders use xdr_stream_subsegment()

Refactor.

Now that the NFSv2 and NFSv3 XDR decoders have been converted to
use xdr_streams, the WRITE decoder functions can use
xdr_stream_subsegment() to extract the WRITE payload into its own
xdr_buf, just as the NFSv4 WRITE XDR decoder currently does.

That makes it possible to pass the first kvec, pages array + length,
page_base, and total payload length via a single function parameter.

The payload's page_base is not yet assigned or used, but will be in
subsequent patches.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs3proc.c         |  3 +--
 fs/nfsd/nfs3xdr.c          | 12 ++----------
 fs/nfsd/nfs4proc.c         |  3 +--
 fs/nfsd/nfsproc.c          |  3 +--
 fs/nfsd/nfsxdr.c           |  9 +--------
 fs/nfsd/xdr.h              |  2 +-
 fs/nfsd/xdr3.h             |  2 +-
 include/linux/sunrpc/svc.h |  3 +--
 net/sunrpc/svc.c           | 11 ++++++-----
 9 files changed, 15 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 17715a6c7a40..4418517f6f12 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -201,8 +201,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
 
 	fh_copy(&resp->fh, &argp->fh);
 	resp->committed = argp->stable;
-	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
-				      &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, &argp->payload);
 	if (!nvecs) {
 		resp->status = nfserr_io;
 		goto out;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 3d37923afb06..267e56f218af 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -621,9 +621,6 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	u32 max_blocksize = svc_max_payload(rqstp);
-	struct kvec *head = rqstp->rq_arg.head;
-	struct kvec *tail = rqstp->rq_arg.tail;
-	size_t remaining;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
 		return 0;
@@ -641,17 +638,12 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 	/* request sanity */
 	if (args->count != args->len)
 		return 0;
-	remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
-	remaining -= xdr_stream_pos(xdr);
-	if (remaining < xdr_align_size(args->len))
-		return 0;
 	if (args->count > max_blocksize) {
 		args->count = max_blocksize;
 		args->len = max_blocksize;
 	}
-
-	args->first.iov_base = xdr->p;
-	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
+	if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
+		return 0;
 
 	return 1;
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3dc40c1d32bc..5895bbeba373 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1033,8 +1033,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 
 	write->wr_how_written = write->wr_stable_how;
 
-	nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages,
-				      write->wr_payload.head, write->wr_buflen);
+	nvecs = svc_fill_write_vector(rqstp, &write->wr_payload);
 	WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
 
 	status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 90fcd6178823..eea5b59b6a6c 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -234,8 +234,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
 		SVCFH_fmt(&argp->fh),
 		argp->len, argp->offset);
 
-	nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages,
-				      &argp->first, cnt);
+	nvecs = svc_fill_write_vector(rqstp, &argp->payload);
 	if (!nvecs) {
 		resp->status = nfserr_io;
 		goto out;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 082449c7d0db..ddcc18adfeb1 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -325,10 +325,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 {
 	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_writeargs *args = rqstp->rq_argp;
-	struct kvec *head = rqstp->rq_arg.head;
-	struct kvec *tail = rqstp->rq_arg.tail;
 	u32 beginoffset, totalcount;
-	size_t remaining;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
 		return 0;
@@ -346,12 +343,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 		return 0;
 	if (args->len > NFSSVC_MAXBLKSIZE_V2)
 		return 0;
-	remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
-	remaining -= xdr_stream_pos(xdr);
-	if (remaining < xdr_align_size(args->len))
+	if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
 		return 0;
-	args->first.iov_base = xdr->p;
-	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
 
 	return 1;
 }
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index f45b4bc93f52..80fd6d7f3404 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -33,7 +33,7 @@ struct nfsd_writeargs {
 	svc_fh			fh;
 	__u32			offset;
 	int			len;
-	struct kvec		first;
+	struct xdr_buf		payload;
 };
 
 struct nfsd_createargs {
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 933008382bbe..712c117300cb 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -40,7 +40,7 @@ struct nfsd3_writeargs {
 	__u32			count;
 	int			stable;
 	__u32			len;
-	struct kvec		first;
+	struct xdr_buf		payload;
 };
 
 struct nfsd3_createargs {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 064c96157d1f..6263410c948a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -532,8 +532,7 @@ int		   svc_encode_result_payload(struct svc_rqst *rqstp,
 					     unsigned int offset,
 					     unsigned int length);
 unsigned int	   svc_fill_write_vector(struct svc_rqst *rqstp,
-					 struct page **pages,
-					 struct kvec *first, size_t total);
+					 struct xdr_buf *payload);
 char		  *svc_fill_symlink_pathname(struct svc_rqst *rqstp,
 					     struct kvec *first, void *p,
 					     size_t total);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a3bbe5ce4570..08ca797bb8a4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1676,16 +1676,17 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload);
 /**
  * svc_fill_write_vector - Construct data argument for VFS write call
  * @rqstp: svc_rqst to operate on
- * @pages: list of pages containing data payload
- * @first: buffer containing first section of write payload
- * @total: total number of bytes of write payload
+ * @payload: xdr_buf containing only the write data payload
  *
  * Fills in rqstp::rq_vec, and returns the number of elements.
  */
-unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, struct page **pages,
-				   struct kvec *first, size_t total)
+unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
+				   struct xdr_buf *payload)
 {
+	struct page **pages = payload->pages;
+	struct kvec *first = payload->head;
 	struct kvec *vec = rqstp->rq_vec;
+	size_t total = payload->len;
 	unsigned int i;
 
 	/* Some types of transport can present the write payload
-- 
cgit v1.2.3


From ca05cbae2a0468e5d78e9b4605936a8bf5da328b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 10 Jul 2021 18:07:14 -0400
Subject: NFS: Fix up nfs_ctx_key_to_expire()

If the cached credential exists but doesn't have any expiration callback
then exit early.
Fix up atomicity issues when replacing the credential with a new one
since the existing code could lead to refcount leaks.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/inode.c         |  4 ++--
 fs/nfs/write.c         | 41 ++++++++++++++++++++++++++++-------------
 include/linux/nfs_fs.h |  2 +-
 3 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 853213b3a209..4f45281c47cf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1024,7 +1024,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry,
 		ctx->cred = get_cred(filp->f_cred);
 	else
 		ctx->cred = get_current_cred();
-	ctx->ll_cred = NULL;
+	rcu_assign_pointer(ctx->ll_cred, NULL);
 	ctx->state = NULL;
 	ctx->mode = f_mode;
 	ctx->flags = 0;
@@ -1063,7 +1063,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 	put_cred(ctx->cred);
 	dput(ctx->dentry);
 	nfs_sb_deactive(sb);
-	put_rpccred(ctx->ll_cred);
+	put_rpccred(rcu_dereference_protected(ctx->ll_cred, 1));
 	kfree(ctx->mdsthreshold);
 	kfree_rcu(ctx, rcu_head);
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index eae9bf114041..773ea2c8504d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1246,7 +1246,7 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode)
 	struct nfs_open_context *ctx = nfs_file_open_context(filp);
 
 	if (nfs_ctx_key_to_expire(ctx, inode) &&
-	    !ctx->ll_cred)
+	    !rcu_access_pointer(ctx->ll_cred))
 		/* Already expired! */
 		return -EACCES;
 	return 0;
@@ -1258,23 +1258,38 @@ nfs_key_timeout_notify(struct file *filp, struct inode *inode)
 bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
 {
 	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
-	struct rpc_cred *cred = ctx->ll_cred;
+	struct rpc_cred *cred, *new, *old = NULL;
 	struct auth_cred acred = {
 		.cred = ctx->cred,
 	};
+	bool ret = false;
 
-	if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) {
-		put_rpccred(cred);
-		ctx->ll_cred = NULL;
-		cred = NULL;
-	}
-	if (!cred)
-		cred = auth->au_ops->lookup_cred(auth, &acred, 0);
-	if (!cred || IS_ERR(cred))
+	rcu_read_lock();
+	cred = rcu_dereference(ctx->ll_cred);
+	if (cred && !(cred->cr_ops->crkey_timeout &&
+		      cred->cr_ops->crkey_timeout(cred)))
+		goto out;
+	rcu_read_unlock();
+
+	new = auth->au_ops->lookup_cred(auth, &acred, 0);
+	if (new == cred) {
+		put_rpccred(new);
 		return true;
-	ctx->ll_cred = cred;
-	return !!(cred->cr_ops->crkey_timeout &&
-		  cred->cr_ops->crkey_timeout(cred));
+	}
+	if (IS_ERR_OR_NULL(new)) {
+		new = NULL;
+		ret = true;
+	} else if (new->cr_ops->crkey_timeout &&
+		   new->cr_ops->crkey_timeout(new))
+		ret = true;
+
+	rcu_read_lock();
+	old = rcu_dereference_protected(xchg(&ctx->ll_cred,
+					     RCU_INITIALIZER(new)), 1);
+out:
+	rcu_read_unlock();
+	put_rpccred(old);
+	return ret;
 }
 
 /*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index b9a8b925db43..9b75448ce0df 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -81,7 +81,7 @@ struct nfs_open_context {
 	fl_owner_t flock_owner;
 	struct dentry *dentry;
 	const struct cred *cred;
-	struct rpc_cred *ll_cred;	/* low-level cred - use to check for expiry */
+	struct rpc_cred __rcu *ll_cred;	/* low-level cred - use to check for expiry */
 	struct nfs4_state *state;
 	fmode_t mode;
 
-- 
cgit v1.2.3


From b9f8713f42af11ae6d7f63075334ba5298436be6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 12 Jul 2021 12:24:15 -0400
Subject: SUNRPC: Remove unnecessary memory barriers

The only check for RPC_TASK_RUNNING is the one in rpc_make_runnable(),
which happens under the same spin lock held when we call
rpc_clear_running().

Ditto, the last check for RPC_TASK_QUEUED in rpc_execute() is performed
under the same lock as the one held when we call rpc_clear_queued().

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index a237b8dbf608..db964bb63912 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -150,25 +150,13 @@ struct rpc_task_setup {
 #define RPC_TASK_MSG_PIN_WAIT	5
 #define RPC_TASK_SIGNALLED	6
 
-#define RPC_IS_RUNNING(t)	test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
-#define rpc_set_running(t)	set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 #define rpc_test_and_set_running(t) \
 				test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
-#define rpc_clear_running(t)	\
-	do { \
-		smp_mb__before_atomic(); \
-		clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \
-		smp_mb__after_atomic(); \
-	} while (0)
+#define rpc_clear_running(t)	clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 
 #define RPC_IS_QUEUED(t)	test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)
 #define rpc_set_queued(t)	set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)
-#define rpc_clear_queued(t)	\
-	do { \
-		smp_mb__before_atomic(); \
-		clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate); \
-		smp_mb__after_atomic(); \
-	} while (0)
+#define rpc_clear_queued(t)	clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)
 
 #define RPC_IS_ACTIVATED(t)	test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
 
-- 
cgit v1.2.3


From ff81dfb5d721fff87bd516c558847f6effb70031 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 28 Sep 2021 14:33:44 -0400
Subject: NFS: Further optimisations for 'ls -l'

If a user is doing 'ls -l', we have a heuristic in GETATTR that tells
the readdir code to try to use READDIRPLUS in order to refresh the inode
attributes. In certain cirumstances, we also try to invalidate the
remaining directory entries in order to ensure this refresh.

If there are multiple readers of the directory, we probably should avoid
invalidating the page cache, since the heuristic breaks down in that
situation anyway.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/dir.c           | 16 +++++++++++-----
 include/linux/nfs_fs.h |  5 ++---
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index fa4d33687d2b..33cfff8ea551 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -78,6 +78,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 		ctx->attr_gencount = nfsi->attr_gencount;
 		ctx->dir_cookie = 0;
 		ctx->dup_cookie = 0;
+		ctx->page_index = 0;
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -85,6 +86,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 					      NFS_INO_INVALID_DATA |
 						      NFS_INO_REVAL_FORCED);
 		list_add(&ctx->list, &nfsi->open_files);
+		clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
 		spin_unlock(&dir->i_lock);
 		return ctx;
 	}
@@ -627,8 +629,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
 	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
 	    !list_empty(&nfsi->open_files)) {
 		set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
-		invalidate_mapping_pages(dir->i_mapping,
-			nfsi->page_index + 1, -1);
+		set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
 	}
 }
 
@@ -938,10 +939,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 			       sizeof(nfsi->cookieverf));
 	}
 	res = nfs_readdir_search_array(desc);
-	if (res == 0) {
-		nfsi->page_index = desc->page_index;
+	if (res == 0)
 		return 0;
-	}
 	nfs_readdir_page_unlock_and_put_cached(desc);
 	return res;
 }
@@ -1080,6 +1079,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_dir_context *dir_ctx = file->private_data;
 	struct nfs_readdir_descriptor *desc;
+	pgoff_t page_index;
 	int res;
 
 	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
@@ -1110,10 +1110,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	desc->dir_cookie = dir_ctx->dir_cookie;
 	desc->dup_cookie = dir_ctx->dup_cookie;
 	desc->duped = dir_ctx->duped;
+	page_index = dir_ctx->page_index;
 	desc->attr_gencount = dir_ctx->attr_gencount;
 	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
 	spin_unlock(&file->f_lock);
 
+	if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) &&
+	    list_is_singular(&nfsi->open_files))
+		invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1);
+
 	do {
 		res = readdir_search_pagecache(desc);
 
@@ -1150,6 +1155,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	dir_ctx->dup_cookie = desc->dup_cookie;
 	dir_ctx->duped = desc->duped;
 	dir_ctx->attr_gencount = desc->attr_gencount;
+	dir_ctx->page_index = desc->page_index;
 	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
 	spin_unlock(&file->f_lock);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9b75448ce0df..ca547cc5458c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -103,6 +103,7 @@ struct nfs_open_dir_context {
 	__be32	verf[NFS_DIR_VERIFIER_SIZE];
 	__u64 dir_cookie;
 	__u64 dup_cookie;
+	pgoff_t page_index;
 	signed char duped;
 };
 
@@ -181,9 +182,6 @@ struct nfs_inode {
 	struct rw_semaphore	rmdir_sem;
 	struct mutex		commit_mutex;
 
-	/* track last access to cached pages */
-	unsigned long		page_index;
-
 #if IS_ENABLED(CONFIG_NFS_V4)
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
@@ -272,6 +270,7 @@ struct nfs4_copy_state {
 #define NFS_INO_INVALIDATING	(3)		/* inode is being invalidated */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
+#define NFS_INO_FORCE_READDIR	(7)		/* force readdirplus */
 #define NFS_INO_LAYOUTCOMMIT	(9)		/* layoutcommit required */
 #define NFS_INO_LAYOUTCOMMITTING (10)		/* layoutcommit inflight */
 #define NFS_INO_LAYOUTSTATS	(11)		/* layoutstats inflight */
-- 
cgit v1.2.3


From 3734b9f2cee01d9dde2fbbda742ba6dd6ba10a29 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 27 Sep 2021 01:40:24 +0300
Subject: opp: Change type of dev_pm_opp_attach_genpd(names) argument

Elements of the 'names' array are not changed by the code, constify them
for consistency.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 6 +++---
 include/linux/pm_opp.h | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 04b4691a8aac..3057beabd370 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -2348,12 +2348,12 @@ static void _opp_detach_genpd(struct opp_table *opp_table)
  * "required-opps" are added in DT.
  */
 struct opp_table *dev_pm_opp_attach_genpd(struct device *dev,
-		const char **names, struct device ***virt_devs)
+		const char * const *names, struct device ***virt_devs)
 {
 	struct opp_table *opp_table;
 	struct device *virt_dev;
 	int index = 0, ret = -EINVAL;
-	const char **name = names;
+	const char * const *name = names;
 
 	opp_table = _add_opp_table(dev, false);
 	if (IS_ERR(opp_table))
@@ -2457,7 +2457,7 @@ static void devm_pm_opp_detach_genpd(void *data)
  *
  * Return: 0 on success and errorno otherwise.
  */
-int devm_pm_opp_attach_genpd(struct device *dev, const char **names,
+int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names,
 			     struct device ***virt_devs)
 {
 	struct opp_table *opp_table;
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 84150a22fd7c..7f1f066fc377 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -156,9 +156,9 @@ int devm_pm_opp_set_clkname(struct device *dev, const char *name);
 struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
 void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table);
 int devm_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data));
-struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs);
+struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs);
 void dev_pm_opp_detach_genpd(struct opp_table *opp_table);
-int devm_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs);
+int devm_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs);
 struct dev_pm_opp *dev_pm_opp_xlate_required_opp(struct opp_table *src_table, struct opp_table *dst_table, struct dev_pm_opp *src_opp);
 int dev_pm_opp_xlate_performance_state(struct opp_table *src_table, struct opp_table *dst_table, unsigned int pstate);
 int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq);
@@ -376,7 +376,7 @@ static inline int devm_pm_opp_set_clkname(struct device *dev, const char *name)
 	return -EOPNOTSUPP;
 }
 
-static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char **names, struct device ***virt_devs)
+static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, const char * const *names, struct device ***virt_devs)
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
@@ -384,7 +384,7 @@ static inline struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, cons
 static inline void dev_pm_opp_detach_genpd(struct opp_table *opp_table) {}
 
 static inline int devm_pm_opp_attach_genpd(struct device *dev,
-					   const char **names,
+					   const char * const *names,
 					   struct device ***virt_devs)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3


From 46b49b12f3fc5e1347dba37d4639e2165f447871 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 8 Sep 2021 17:58:33 -0500
Subject: arch/cc: Introduce a function to check for confidential computing
 features

In preparation for other confidential computing technologies, introduce
a generic helper function, cc_platform_has(), that can be used to
check for specific active confidential computing attributes, like
memory encryption. This is intended to eliminate having to add multiple
technology-specific checks to the code (e.g. if (sev_active() ||
tdx_active() || ... ).

 [ bp: s/_CC_PLATFORM_H/_LINUX_CC_PLATFORM_H/g ]

Co-developed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Co-developed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210928191009.32551-3-bp@alien8.de
---
 arch/Kconfig                |  3 ++
 include/linux/cc_platform.h | 88 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)
 create mode 100644 include/linux/cc_platform.h

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8df1c7102643..d1e69d6e8498 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1234,6 +1234,9 @@ config RELR
 config ARCH_HAS_MEM_ENCRYPT
 	bool
 
+config ARCH_HAS_CC_PLATFORM
+	bool
+
 config HAVE_SPARSE_SYSCALL_NR
        bool
        help
diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h
new file mode 100644
index 000000000000..a075b70b9a70
--- /dev/null
+++ b/include/linux/cc_platform.h
@@ -0,0 +1,88 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Confidential Computing Platform Capability checks
+ *
+ * Copyright (C) 2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ */
+
+#ifndef _LINUX_CC_PLATFORM_H
+#define _LINUX_CC_PLATFORM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+/**
+ * enum cc_attr - Confidential computing attributes
+ *
+ * These attributes represent confidential computing features that are
+ * currently active.
+ */
+enum cc_attr {
+	/**
+	 * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active
+	 *
+	 * The platform/OS is running with active memory encryption. This
+	 * includes running either as a bare-metal system or a hypervisor
+	 * and actively using memory encryption or as a guest/virtual machine
+	 * and actively using memory encryption.
+	 *
+	 * Examples include SME, SEV and SEV-ES.
+	 */
+	CC_ATTR_MEM_ENCRYPT,
+
+	/**
+	 * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active
+	 *
+	 * The platform/OS is running as a bare-metal system or a hypervisor
+	 * and actively using memory encryption.
+	 *
+	 * Examples include SME.
+	 */
+	CC_ATTR_HOST_MEM_ENCRYPT,
+
+	/**
+	 * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active
+	 *
+	 * The platform/OS is running as a guest/virtual machine and actively
+	 * using memory encryption.
+	 *
+	 * Examples include SEV and SEV-ES.
+	 */
+	CC_ATTR_GUEST_MEM_ENCRYPT,
+
+	/**
+	 * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active
+	 *
+	 * The platform/OS is running as a guest/virtual machine and actively
+	 * using memory encryption and register state encryption.
+	 *
+	 * Examples include SEV-ES.
+	 */
+	CC_ATTR_GUEST_STATE_ENCRYPT,
+};
+
+#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+
+/**
+ * cc_platform_has() - Checks if the specified cc_attr attribute is active
+ * @attr: Confidential computing attribute to check
+ *
+ * The cc_platform_has() function will return an indicator as to whether the
+ * specified Confidential Computing attribute is currently active.
+ *
+ * Context: Any context
+ * Return:
+ * * TRUE  - Specified Confidential Computing attribute is active
+ * * FALSE - Specified Confidential Computing attribute is not active
+ */
+bool cc_platform_has(enum cc_attr attr);
+
+#else	/* !CONFIG_ARCH_HAS_CC_PLATFORM */
+
+static inline bool cc_platform_has(enum cc_attr attr) { return false; }
+
+#endif	/* CONFIG_ARCH_HAS_CC_PLATFORM */
+
+#endif	/* _LINUX_CC_PLATFORM_H */
-- 
cgit v1.2.3


From e9d1d2bb75b2d5d4b426769c5aae0ce8cef3558f Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Wed, 8 Sep 2021 17:58:39 -0500
Subject: treewide: Replace the use of mem_encrypt_active() with
 cc_platform_has()

Replace uses of mem_encrypt_active() with calls to cc_platform_has() with
the CC_ATTR_MEM_ENCRYPT attribute.

Remove the implementation of mem_encrypt_active() across all arches.

For s390, since the default implementation of the cc_platform_has()
matches the s390 implementation of mem_encrypt_active(), cc_platform_has()
does not need to be implemented in s390 (the config option
ARCH_HAS_CC_PLATFORM is not set).

Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20210928191009.32551-9-bp@alien8.de
---
 arch/powerpc/include/asm/mem_encrypt.h  | 5 -----
 arch/powerpc/platforms/pseries/svm.c    | 5 +++--
 arch/s390/include/asm/mem_encrypt.h     | 2 --
 arch/x86/include/asm/mem_encrypt.h      | 5 -----
 arch/x86/kernel/head64.c                | 9 +++++++--
 arch/x86/mm/ioremap.c                   | 4 ++--
 arch/x86/mm/mem_encrypt.c               | 2 +-
 arch/x86/mm/pat/set_memory.c            | 3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++-
 drivers/gpu/drm/drm_cache.c             | 4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     | 4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c     | 6 +++---
 drivers/iommu/amd/iommu.c               | 3 ++-
 drivers/iommu/amd/iommu_v2.c            | 3 ++-
 drivers/iommu/iommu.c                   | 3 ++-
 fs/proc/vmcore.c                        | 6 +++---
 include/linux/mem_encrypt.h             | 4 ----
 kernel/dma/swiotlb.c                    | 4 ++--
 18 files changed, 36 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
index ba9dab07c1be..2f26b8fc8d29 100644
--- a/arch/powerpc/include/asm/mem_encrypt.h
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -10,11 +10,6 @@
 
 #include <asm/svm.h>
 
-static inline bool mem_encrypt_active(void)
-{
-	return is_secure_guest();
-}
-
 static inline bool force_dma_unencrypted(struct device *dev)
 {
 	return is_secure_guest();
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 87f001b4c4e4..c083ecbbae4d 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -8,6 +8,7 @@
 
 #include <linux/mm.h>
 #include <linux/memblock.h>
+#include <linux/cc_platform.h>
 #include <asm/machdep.h>
 #include <asm/svm.h>
 #include <asm/swiotlb.h>
@@ -63,7 +64,7 @@ void __init svm_swiotlb_init(void)
 
 int set_memory_encrypted(unsigned long addr, int numpages)
 {
-	if (!mem_encrypt_active())
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return 0;
 
 	if (!PAGE_ALIGNED(addr))
@@ -76,7 +77,7 @@ int set_memory_encrypted(unsigned long addr, int numpages)
 
 int set_memory_decrypted(unsigned long addr, int numpages)
 {
-	if (!mem_encrypt_active())
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return 0;
 
 	if (!PAGE_ALIGNED(addr))
diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 2542cbf7e2d1..08a8b96606d7 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,8 +4,6 @@
 
 #ifndef __ASSEMBLY__
 
-static inline bool mem_encrypt_active(void) { return false; }
-
 int set_memory_encrypted(unsigned long addr, int numpages);
 int set_memory_decrypted(unsigned long addr, int numpages);
 
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index da14ede311aa..2d4f5c17d79c 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -96,11 +96,6 @@ static inline void mem_encrypt_free_decrypted_mem(void) { }
 
 extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
 
-static inline bool mem_encrypt_active(void)
-{
-	return sme_me_mask;
-}
-
 static inline u64 sme_get_me_mask(void)
 {
 	return sme_me_mask;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index de01903c3735..fc5371a7e9d1 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -19,7 +19,7 @@
 #include <linux/start_kernel.h>
 #include <linux/io.h>
 #include <linux/memblock.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 #include <linux/pgtable.h>
 
 #include <asm/processor.h>
@@ -284,8 +284,13 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	 * The bss section will be memset to zero later in the initialization so
 	 * there is no need to zero it after changing the memory encryption
 	 * attribute.
+	 *
+	 * This is early code, use an open coded check for SME instead of
+	 * using cc_platform_has(). This eliminates worries about removing
+	 * instrumentation or checking boot_cpu_data in the cc_platform_has()
+	 * function.
 	 */
-	if (mem_encrypt_active()) {
+	if (sme_get_me_mask()) {
 		vaddr = (unsigned long)__start_bss_decrypted;
 		vaddr_end = (unsigned long)__end_bss_decrypted;
 		for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index b59a5cbc6bc5..026031b3b782 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -694,7 +694,7 @@ static bool __init early_memremap_is_setup_data(resource_size_t phys_addr,
 bool arch_memremap_can_ram_remap(resource_size_t phys_addr, unsigned long size,
 				 unsigned long flags)
 {
-	if (!mem_encrypt_active())
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return true;
 
 	if (flags & MEMREMAP_ENC)
@@ -724,7 +724,7 @@ pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
 {
 	bool encrypted_prot;
 
-	if (!mem_encrypt_active())
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return prot;
 
 	encrypted_prot = true;
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 2d04c39bea1d..23d54b810f08 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -400,7 +400,7 @@ void __init mem_encrypt_free_decrypted_mem(void)
 	 * The unused memory range was mapped decrypted, change the encryption
 	 * attribute from decrypted to encrypted before freeing it.
 	 */
-	if (mem_encrypt_active()) {
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
 		r = set_memory_encrypted(vaddr, npages);
 		if (r) {
 			pr_warn("failed to free unused decrypted pages\n");
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index ad8a5c586a35..527957586f3c 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -18,6 +18,7 @@
 #include <linux/libnvdimm.h>
 #include <linux/vmstat.h>
 #include <linux/kernel.h>
+#include <linux/cc_platform.h>
 
 #include <asm/e820/api.h>
 #include <asm/processor.h>
@@ -1986,7 +1987,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
 	int ret;
 
 	/* Nothing to do if memory encryption is not active */
-	if (!mem_encrypt_active())
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return 0;
 
 	/* Should not be working on unaligned addresses */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f18240f87387..7741195eb85e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -38,6 +38,7 @@
 #include <drm/drm_probe_helper.h>
 #include <linux/mmu_notifier.h>
 #include <linux/suspend.h>
+#include <linux/cc_platform.h>
 
 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@@ -1269,7 +1270,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 	 * however, SME requires an indirect IOMMU mapping because the encryption
 	 * bit is beyond the DMA mask of the chip.
 	 */
-	if (mem_encrypt_active() && ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
+	    ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) {
 		dev_info(&pdev->dev,
 			 "SME is not compatible with RAVEN\n");
 		return -ENOTSUPP;
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 30cc59fe6ef7..f19d9acbe959 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -31,7 +31,7 @@
 #include <linux/dma-buf-map.h>
 #include <linux/export.h>
 #include <linux/highmem.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 #include <xen/xen.h>
 
 #include <drm/drm_cache.h>
@@ -204,7 +204,7 @@ bool drm_need_swiotlb(int dma_bits)
 	 * Enforce dma_alloc_coherent when memory encryption is active as well
 	 * for the same reasons as for Xen paravirtual hosts.
 	 */
-	if (mem_encrypt_active())
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return true;
 
 	for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index ab9a1750e1df..bfd71c86faa5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -29,7 +29,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
@@ -666,7 +666,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
 		[vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
 	/* TTM currently doesn't fully support SEV encryption. */
-	if (mem_encrypt_active())
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return -EINVAL;
 
 	if (vmw_force_coherent)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index e50fb82a3030..2aceac7856e2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,7 +28,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 
 #include <asm/hypervisor.h>
 #include <drm/drm_ioctl.h>
@@ -160,7 +160,7 @@ static unsigned long vmw_port_hb_out(struct rpc_channel *channel,
 	unsigned long msg_len = strlen(msg);
 
 	/* HB port can't access encrypted memory. */
-	if (hb && !mem_encrypt_active()) {
+	if (hb && !cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
 		unsigned long bp = channel->cookie_high;
 		u32 channel_id = (channel->channel_id << 16);
 
@@ -216,7 +216,7 @@ static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply,
 	unsigned long si, di, eax, ebx, ecx, edx;
 
 	/* HB port can't access encrypted memory */
-	if (hb && !mem_encrypt_active()) {
+	if (hb && !cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
 		unsigned long bp = channel->cookie_low;
 		u32 channel_id = (channel->channel_id << 16);
 
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 1722bb161841..9e5da037d949 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/irqdomain.h>
 #include <linux/percpu.h>
 #include <linux/io-pgtable.h>
+#include <linux/cc_platform.h>
 #include <asm/irq_remapping.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -2238,7 +2239,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
 	 * active, because some of those devices (AMD GPUs) don't have the
 	 * encryption bit in their DMA-mask and require remapping.
 	 */
-	if (!mem_encrypt_active() && dev_data->iommu_v2)
+	if (!cc_platform_has(CC_ATTR_MEM_ENCRYPT) && dev_data->iommu_v2)
 		return IOMMU_DOMAIN_IDENTITY;
 
 	return 0;
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index a9e568276c99..13cbeb997cc1 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -17,6 +17,7 @@
 #include <linux/wait.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
+#include <linux/cc_platform.h>
 
 #include "amd_iommu.h"
 
@@ -742,7 +743,7 @@ int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
 	 * When memory encryption is active the device is likely not in a
 	 * direct-mapped domain. Forbid using IOMMUv2 functionality for now.
 	 */
-	if (mem_encrypt_active())
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		return -ENODEV;
 
 	if (!amd_iommu_v2_supported())
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3303d707bab4..e80261d17a49 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -25,6 +25,7 @@
 #include <linux/property.h>
 #include <linux/fsl/mc.h>
 #include <linux/module.h>
+#include <linux/cc_platform.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -130,7 +131,7 @@ static int __init iommu_subsys_init(void)
 		else
 			iommu_set_default_translated(false);
 
-		if (iommu_default_passthrough() && mem_encrypt_active()) {
+		if (iommu_default_passthrough() && cc_platform_has(CC_ATTR_MEM_ENCRYPT)) {
 			pr_info("Memory encryption detected - Disabling default IOMMU Passthrough\n");
 			iommu_set_default_translated(false);
 		}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 9a15334da208..cdbbf819d2d6 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -26,7 +26,7 @@
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/uaccess.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 #include <asm/io.h>
 #include "internal.h"
 
@@ -177,7 +177,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
  */
 ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
 {
-	return read_from_oldmem(buf, count, ppos, 0, mem_encrypt_active());
+	return read_from_oldmem(buf, count, ppos, 0, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
 }
 
 /*
@@ -378,7 +378,7 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
 					    buflen);
 			start = m->paddr + *fpos - m->offset;
 			tmp = read_from_oldmem(buffer, tsz, &start,
-					       userbuf, mem_encrypt_active());
+					       userbuf, cc_platform_has(CC_ATTR_MEM_ENCRYPT));
 			if (tmp < 0)
 				return tmp;
 			buflen -= tsz;
diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h
index 5c4a18a91f89..ae4526389261 100644
--- a/include/linux/mem_encrypt.h
+++ b/include/linux/mem_encrypt.h
@@ -16,10 +16,6 @@
 
 #include <asm/mem_encrypt.h>
 
-#else	/* !CONFIG_ARCH_HAS_MEM_ENCRYPT */
-
-static inline bool mem_encrypt_active(void) { return false; }
-
 #endif	/* CONFIG_ARCH_HAS_MEM_ENCRYPT */
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 87c40517e822..c4ca040fdb05 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -34,7 +34,7 @@
 #include <linux/highmem.h>
 #include <linux/gfp.h>
 #include <linux/scatterlist.h>
-#include <linux/mem_encrypt.h>
+#include <linux/cc_platform.h>
 #include <linux/set_memory.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -552,7 +552,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
 	if (!mem)
 		panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
 
-	if (mem_encrypt_active())
+	if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
 		pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
 
 	if (mapping_size > alloc_size) {
-- 
cgit v1.2.3


From e69709f6861aaba80b4eaab6825e8c522a2afb5c Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Mon, 20 Sep 2021 20:22:46 +0300
Subject: opp: Add more resource-managed variants of dev_pm_opp_of_add_table()

Add resource-managed variants of dev_pm_opp_of_add_table_indexed() and
dev_pm_opp_of_add_table_noclk(), allowing drivers to remove boilerplate
code.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
[ Viresh: Added underscore to devm_of_add_table_indexed() ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c       | 46 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/pm_opp.h | 12 ++++++++++++
 2 files changed, 51 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 5437085fb380..fe218af735b0 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -1081,6 +1081,17 @@ static void devm_pm_opp_of_table_release(void *data)
 	dev_pm_opp_of_remove_table(data);
 }
 
+static int _devm_of_add_table_indexed(struct device *dev, int index, bool getclk)
+{
+	int ret;
+
+	ret = _of_add_table_indexed(dev, index, getclk);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, devm_pm_opp_of_table_release, dev);
+}
+
 /**
  * devm_pm_opp_of_add_table() - Initialize opp table from device tree
  * @dev:	device pointer used to lookup OPP table.
@@ -1102,13 +1113,7 @@ static void devm_pm_opp_of_table_release(void *data)
  */
 int devm_pm_opp_of_add_table(struct device *dev)
 {
-	int ret;
-
-	ret = dev_pm_opp_of_add_table(dev);
-	if (ret)
-		return ret;
-
-	return devm_add_action_or_reset(dev, devm_pm_opp_of_table_release, dev);
+	return _devm_of_add_table_indexed(dev, 0, true);
 }
 EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table);
 
@@ -1151,6 +1156,19 @@ int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed);
 
+/**
+ * devm_pm_opp_of_add_table_indexed() - Initialize indexed opp table from device tree
+ * @dev:	device pointer used to lookup OPP table.
+ * @index:	Index number.
+ *
+ * This is a resource-managed variant of dev_pm_opp_of_add_table_indexed().
+ */
+int devm_pm_opp_of_add_table_indexed(struct device *dev, int index)
+{
+	return _devm_of_add_table_indexed(dev, index, true);
+}
+EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table_indexed);
+
 /**
  * dev_pm_opp_of_add_table_noclk() - Initialize indexed opp table from device
  *		tree without getting clk for device.
@@ -1169,6 +1187,20 @@ int dev_pm_opp_of_add_table_noclk(struct device *dev, int index)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_noclk);
 
+/**
+ * devm_pm_opp_of_add_table_noclk() - Initialize indexed opp table from device
+ *		tree without getting clk for device.
+ * @dev:	device pointer used to lookup OPP table.
+ * @index:	Index number.
+ *
+ * This is a resource-managed variant of dev_pm_opp_of_add_table_noclk().
+ */
+int devm_pm_opp_of_add_table_noclk(struct device *dev, int index)
+{
+	return _devm_of_add_table_indexed(dev, index, false);
+}
+EXPORT_SYMBOL_GPL(devm_pm_opp_of_add_table_noclk);
+
 /* CPU device specific helpers */
 
 /**
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 7f1f066fc377..879c138c7b8e 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -439,7 +439,9 @@ static inline int dev_pm_opp_sync_regulators(struct device *dev)
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int dev_pm_opp_of_add_table(struct device *dev);
 int dev_pm_opp_of_add_table_indexed(struct device *dev, int index);
+int devm_pm_opp_of_add_table_indexed(struct device *dev, int index);
 int dev_pm_opp_of_add_table_noclk(struct device *dev, int index);
+int devm_pm_opp_of_add_table_noclk(struct device *dev, int index);
 void dev_pm_opp_of_remove_table(struct device *dev);
 int devm_pm_opp_of_add_table(struct device *dev);
 int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask);
@@ -465,11 +467,21 @@ static inline int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
 	return -EOPNOTSUPP;
 }
 
+static inline int devm_pm_opp_of_add_table_indexed(struct device *dev, int index)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int dev_pm_opp_of_add_table_noclk(struct device *dev, int index)
 {
 	return -EOPNOTSUPP;
 }
 
+static inline int devm_pm_opp_of_add_table_noclk(struct device *dev, int index)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline void dev_pm_opp_of_remove_table(struct device *dev)
 {
 }
-- 
cgit v1.2.3


From 19198e4ec97dc9d173b458a75ace3c3ca55c2d85 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Mon, 4 Oct 2021 09:58:39 +0300
Subject: qed: Fix kernel-doc warnings

This patch fixes all the qed and qede kernel-doc warnings
according to the guidelines that are described in
Documentation/doc-guide/kernel-doc.rst.

Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h          |   9 +-
 drivers/net/ethernet/qlogic/qed/qed_cxt.h      | 138 ++--
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h  | 343 +++++----
 drivers/net/ethernet/qlogic/qed/qed_hsi.h      | 956 +++++++++++++------------
 drivers/net/ethernet/qlogic/qed/qed_hw.h       | 222 +++---
 drivers/net/ethernet/qlogic/qed/qed_init_ops.h |  58 +-
 drivers/net/ethernet/qlogic/qed/qed_int.h      | 284 ++++----
 drivers/net/ethernet/qlogic/qed/qed_iscsi.h    |   9 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.h       | 134 ++--
 drivers/net/ethernet/qlogic/qed/qed_ll2.h      | 130 ++--
 drivers/net/ethernet/qlogic/qed/qed_mcp.h      | 757 +++++++++++---------
 drivers/net/ethernet/qlogic/qed/qed_selftest.h |  30 +-
 drivers/net/ethernet/qlogic/qed/qed_sp.h       | 215 +++---
 drivers/net/ethernet/qlogic/qed/qed_sriov.h    |  99 +--
 drivers/net/ethernet/qlogic/qed/qed_vf.h       | 301 ++++----
 drivers/net/ethernet/qlogic/qede/qede_main.c   |   5 +-
 include/linux/qed/qed_chain.h                  |  97 +--
 include/linux/qed/qed_if.h                     | 255 ++++---
 include/linux/qed/qed_iscsi_if.h               |   2 +-
 include/linux/qed/qed_ll2_if.h                 |  42 +-
 include/linux/qed/qed_nvmetcp_if.h             |  17 +
 21 files changed, 2186 insertions(+), 1917 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index d58e021614cd..b656408b9d70 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -877,12 +877,13 @@ u32 qed_get_hsi_def_val(struct qed_dev *cdev, enum qed_hsi_def_type type);
 
 
 /**
- * @brief qed_concrete_to_sw_fid - get the sw function id from
- *        the concrete value.
+ * qed_concrete_to_sw_fid(): Get the sw function id from
+ *                           the concrete value.
  *
- * @param concrete_fid
+ * @cdev: Qed dev pointer.
+ * @concrete_fid: Concrete fid.
  *
- * @return inline u8
+ * Return: inline u8.
  */
 static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 					u32 concrete_fid)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index 8adb7ed0c12d..d31196db7bdd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -28,24 +28,23 @@ struct qed_tid_mem {
 };
 
 /**
- * @brief qedo_cid_get_cxt_info - Returns the context info for a specific cid
+ * qed_cxt_get_cid_info(): Returns the context info for a specific cidi.
  *
+ * @p_hwfn: HW device data.
+ * @p_info: In/out.
  *
- * @param p_hwfn
- * @param p_info in/out
- *
- * @return int
+ * Return: Int.
  */
 int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn,
 			 struct qed_cxt_info *p_info);
 
 /**
- * @brief qed_cxt_get_tid_mem_info
+ * qed_cxt_get_tid_mem_info(): Returns the tid mem info.
  *
- * @param p_hwfn
- * @param p_info
+ * @p_hwfn: HW device data.
+ * @p_info: in/out.
  *
- * @return int
+ * Return: int.
  */
 int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 			     struct qed_tid_mem *p_info);
@@ -64,142 +63,155 @@ u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
 				enum protocol_type type, u32 *vf_cid);
 
 /**
- * @brief qed_cxt_set_pf_params - Set the PF params for cxt init
+ * qed_cxt_set_pf_params(): Set the PF params for cxt init.
+ *
+ * @p_hwfn: HW device data.
+ * @rdma_tasks: Requested maximum.
  *
- * @param p_hwfn
- * @param rdma_tasks - requested maximum
- * @return int
+ * Return: int.
  */
 int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn, u32 rdma_tasks);
 
 /**
- * @brief qed_cxt_cfg_ilt_compute - compute ILT init parameters
+ * qed_cxt_cfg_ilt_compute(): Compute ILT init parameters.
  *
- * @param p_hwfn
- * @param last_line
+ * @p_hwfn: HW device data.
+ * @last_line: Last_line.
  *
- * @return int
+ * Return: Int
  */
 int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *last_line);
 
 /**
- * @brief qed_cxt_cfg_ilt_compute_excess - how many lines can be decreased
+ * qed_cxt_cfg_ilt_compute_excess(): How many lines can be decreased.
+ *
+ * @p_hwfn: HW device data.
+ * @used_lines: Used lines.
  *
- * @param p_hwfn
- * @param used_lines
+ * Return: Int.
  */
 u32 qed_cxt_cfg_ilt_compute_excess(struct qed_hwfn *p_hwfn, u32 used_lines);
 
 /**
- * @brief qed_cxt_mngr_alloc - Allocate and init the context manager struct
+ * qed_cxt_mngr_alloc(): Allocate and init the context manager struct.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_cxt_mngr_free
+ * qed_cxt_mngr_free() - Context manager free.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_cxt_mngr_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_cxt_tables_alloc - Allocate ILT shadow, Searcher T2, acquired map
+ * qed_cxt_tables_alloc(): Allocate ILT shadow, Searcher T2, acquired map.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_cxt_mngr_setup - Reset the acquired CIDs
+ * qed_cxt_mngr_setup(): Reset the acquired CIDs.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  */
 void qed_cxt_mngr_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_cxt_hw_init_common - Initailze ILT and DQ, common phase, per path.
- *
+ * qed_cxt_hw_init_common(): Initailze ILT and DQ, common phase, per path.
  *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_cxt_hw_init_common(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_cxt_hw_init_pf - Initailze ILT and DQ, PF phase, per path.
+ * qed_cxt_hw_init_pf(): Initailze ILT and DQ, PF phase, per path.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Void.
  */
 void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_qm_init_pf - Initailze the QM PF phase, per path
+ * qed_qm_init_pf(): Initailze the QM PF phase, per path.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @is_pf_loading: Is pf pending.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param is_pf_loading
+ * Return: Void.
  */
 void qed_qm_init_pf(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, bool is_pf_loading);
 
 /**
- * @brief Reconfigures QM pf on the fly
+ * qed_qm_reconf(): Reconfigures QM pf on the fly.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int
+ * Return: Int.
  */
 int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 #define QED_CXT_PF_CID (0xff)
 
 /**
- * @brief qed_cxt_release - Release a cid
+ * qed_cxt_release_cid(): Release a cid.
  *
- * @param p_hwfn
- * @param cid
+ * @p_hwfn: HW device data.
+ * @cid: Cid.
+ *
+ * Return: Void.
  */
 void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid);
 
 /**
- * @brief qed_cxt_release - Release a cid belonging to a vf-queue
+ * _qed_cxt_release_cid(): Release a cid belonging to a vf-queue.
+ *
+ * @p_hwfn: HW device data.
+ * @cid: Cid.
+ * @vfid: Engine relative index. QED_CXT_PF_CID if belongs to PF.
  *
- * @param p_hwfn
- * @param cid
- * @param vfid - engine relative index. QED_CXT_PF_CID if belongs to PF
+ * Return: Void.
  */
 void _qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid, u8 vfid);
 
 /**
- * @brief qed_cxt_acquire - Acquire a new cid of a specific protocol type
+ * qed_cxt_acquire_cid(): Acquire a new cid of a specific protocol type.
  *
- * @param p_hwfn
- * @param type
- * @param p_cid
+ * @p_hwfn: HW device data.
+ * @type: Type.
+ * @p_cid: Pointer cid.
  *
- * @return int
+ * Return: Int.
  */
 int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
 			enum protocol_type type, u32 *p_cid);
 
 /**
- * @brief _qed_cxt_acquire - Acquire a new cid of a specific protocol type
- *                           for a vf-queue
+ * _qed_cxt_acquire_cid(): Acquire a new cid of a specific protocol type
+ *                         for a vf-queue.
  *
- * @param p_hwfn
- * @param type
- * @param p_cid
- * @param vfid - engine relative index. QED_CXT_PF_CID if belongs to PF
+ * @p_hwfn: HW device data.
+ * @type: Type.
+ * @p_cid: Pointer cid.
+ * @vfid: Engine relative index. QED_CXT_PF_CID if belongs to PF.
  *
- * @return int
+ * Return: Int.
  */
 int _qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn,
 			 enum protocol_type type, u32 *p_cid, u8 vfid);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index d3c1f3879be8..f0a825b985a4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -15,44 +15,52 @@
 #include "qed_int.h"
 
 /**
- * @brief qed_init_dp - initialize the debug level
+ * qed_init_dp(): Initialize the debug level.
  *
- * @param cdev
- * @param dp_module
- * @param dp_level
+ * @cdev: Qed dev pointer.
+ * @dp_module: Module debug parameter.
+ * @dp_level: Module debug level.
+ *
+ * Return: Void.
  */
 void qed_init_dp(struct qed_dev *cdev,
 		 u32 dp_module,
 		 u8 dp_level);
 
 /**
- * @brief qed_init_struct - initialize the device structure to
- *        its defaults
+ * qed_init_struct(): Initialize the device structure to
+ *                    its defaults.
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Void.
  */
 void qed_init_struct(struct qed_dev *cdev);
 
 /**
- * @brief qed_resc_free -
+ * qed_resc_free: Free device resources.
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Void.
  */
 void qed_resc_free(struct qed_dev *cdev);
 
 /**
- * @brief qed_resc_alloc -
+ * qed_resc_alloc(): Alloc device resources.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_resc_alloc(struct qed_dev *cdev);
 
 /**
- * @brief qed_resc_setup -
+ * qed_resc_setup(): Setup device resources.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
+ *
+ * Return: Void.
  */
 void qed_resc_setup(struct qed_dev *cdev);
 
@@ -105,94 +113,97 @@ struct qed_hw_init_params {
 };
 
 /**
- * @brief qed_hw_init -
+ * qed_hw_init(): Init Qed hardware.
  *
- * @param cdev
- * @param p_params
+ * @cdev: Qed dev pointer.
+ * @p_params: Pointers to params.
  *
- * @return int
+ * Return: Int.
  */
 int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params);
 
 /**
- * @brief qed_hw_timers_stop_all - stop the timers HW block
+ * qed_hw_timers_stop_all(): Stop the timers HW block.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return void
+ * Return: void.
  */
 void qed_hw_timers_stop_all(struct qed_dev *cdev);
 
 /**
- * @brief qed_hw_stop -
+ * qed_hw_stop(): Stop Qed hardware.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: int.
  */
 int qed_hw_stop(struct qed_dev *cdev);
 
 /**
- * @brief qed_hw_stop_fastpath -should be called incase
- *		slowpath is still required for the device,
- *		but fastpath is not.
+ * qed_hw_stop_fastpath(): Should be called incase
+ *		           slowpath is still required for the device,
+ *		           but fastpath is not.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_hw_stop_fastpath(struct qed_dev *cdev);
 
 /**
- * @brief qed_hw_start_fastpath -restart fastpath traffic,
- *		only if hw_stop_fastpath was called
+ * qed_hw_start_fastpath(): Restart fastpath traffic,
+ *		            only if hw_stop_fastpath was called.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_hw_start_fastpath(struct qed_hwfn *p_hwfn);
 
 
 /**
- * @brief qed_hw_prepare -
+ * qed_hw_prepare(): Prepare Qed hardware.
  *
- * @param cdev
- * @param personality - personality to initialize
+ * @cdev: Qed dev pointer.
+ * @personality: Personality to initialize.
  *
- * @return int
+ * Return: Int.
  */
 int qed_hw_prepare(struct qed_dev *cdev,
 		   int personality);
 
 /**
- * @brief qed_hw_remove -
+ * qed_hw_remove(): Remove Qed hardware.
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Void.
  */
 void qed_hw_remove(struct qed_dev *cdev);
 
 /**
- * @brief qed_ptt_acquire - Allocate a PTT window
+ * qed_ptt_acquire(): Allocate a PTT window.
  *
- * Should be called at the entry point to the driver (at the beginning of an
- * exported function)
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: struct qed_ptt.
  *
- * @return struct qed_ptt
+ * Should be called at the entry point to the driver (at the beginning of an
+ * exported function).
  */
 struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ptt_release - Release PTT Window
+ * qed_ptt_release(): Release PTT Window.
  *
- * Should be called at the end of a flow - at the end of the function that
- * acquired the PTT.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
+ * Return: Void.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Should be called at the end of a flow - at the end of the function that
+ * acquired the PTT.
  */
 void qed_ptt_release(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt);
@@ -205,15 +216,17 @@ enum qed_dmae_address_type_t {
 };
 
 /**
- * @brief qed_dmae_host2grc - copy data from source addr to
- * dmae registers using the given ptt
+ * qed_dmae_host2grc(): Copy data from source addr to
+ *                      dmae registers using the given ptt.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @source_addr: Source address.
+ * @grc_addr: GRC address (dmae_data_offset).
+ * @size_in_dwords: Size.
+ * @p_params: (default parameters will be used in case of NULL).
  *
- * @param p_hwfn
- * @param p_ptt
- * @param source_addr
- * @param grc_addr (dmae_data_offset)
- * @param size_in_dwords
- * @param p_params (default parameters will be used in case of NULL)
+ * Return: Int.
  */
 int
 qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
@@ -224,29 +237,34 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
 		  struct qed_dmae_params *p_params);
 
  /**
- * @brief qed_dmae_grc2host - Read data from dmae data offset
- * to source address using the given ptt
+ * qed_dmae_grc2host(): Read data from dmae data offset
+ *                      to source address using the given ptt.
+ *
+ * @p_ptt: P_ptt.
+ * @grc_addr: GRC address (dmae_data_offset).
+ * @dest_addr: Destination Address.
+ * @size_in_dwords: Size.
+ * @p_params: (default parameters will be used in case of NULL).
  *
- * @param p_ptt
- * @param grc_addr (dmae_data_offset)
- * @param dest_addr
- * @param size_in_dwords
- * @param p_params (default parameters will be used in case of NULL)
+ * Return: Int.
  */
 int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		      u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
 		      struct qed_dmae_params *p_params);
 
 /**
- * @brief qed_dmae_host2host - copy data from to source address
- * to a destination adress (for SRIOV) using the given ptt
+ * qed_dmae_host2host(): Copy data from to source address
+ *                       to a destination adrress (for SRIOV) using the given
+ *                       ptt.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param source_addr
- * @param dest_addr
- * @param size_in_dwords
- * @param p_params (default parameters will be used in case of NULL)
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @source_addr: Source address.
+ * @dest_addr: Destination address.
+ * @size_in_dwords: size.
+ * @p_params: (default parameters will be used in case of NULL).
+ *
+ * Return: Int.
  */
 int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
@@ -259,51 +277,51 @@ int qed_chain_alloc(struct qed_dev *cdev, struct qed_chain *chain,
 void qed_chain_free(struct qed_dev *cdev, struct qed_chain *chain);
 
 /**
- * @@brief qed_fw_l2_queue - Get absolute L2 queue ID
+ * qed_fw_l2_queue(): Get absolute L2 queue ID.
  *
- *  @param p_hwfn
- *  @param src_id - relative to p_hwfn
- *  @param dst_id - absolute per engine
+ * @p_hwfn: HW device data.
+ * @src_id: Relative to p_hwfn.
+ * @dst_id: Absolute per engine.
  *
- *  @return int
+ * Return: Int.
  */
 int qed_fw_l2_queue(struct qed_hwfn *p_hwfn,
 		    u16 src_id,
 		    u16 *dst_id);
 
 /**
- * @@brief qed_fw_vport - Get absolute vport ID
+ * qed_fw_vport(): Get absolute vport ID.
  *
- *  @param p_hwfn
- *  @param src_id - relative to p_hwfn
- *  @param dst_id - absolute per engine
+ * @p_hwfn: HW device data.
+ * @src_id: Relative to p_hwfn.
+ * @dst_id: Absolute per engine.
  *
- *  @return int
+ * Return: Int.
  */
 int qed_fw_vport(struct qed_hwfn *p_hwfn,
 		 u8 src_id,
 		 u8 *dst_id);
 
 /**
- * @@brief qed_fw_rss_eng - Get absolute RSS engine ID
+ * qed_fw_rss_eng(): Get absolute RSS engine ID.
  *
- *  @param p_hwfn
- *  @param src_id - relative to p_hwfn
- *  @param dst_id - absolute per engine
+ * @p_hwfn: HW device data.
+ * @src_id: Relative to p_hwfn.
+ * @dst_id: Absolute per engine.
  *
- *  @return int
+ * Return: Int.
  */
 int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
 		   u8 src_id,
 		   u8 *dst_id);
 
 /**
- * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
- *	banks that are allocated to the PF.
+ * qed_llh_get_num_ppfid(): Return the allocated number of LLH filter
+ *	                    banks that are allocated to the PF.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return u8 - Number of LLH filter banks
+ * Return: u8 Number of LLH filter banks.
  */
 u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
 
@@ -314,45 +332,50 @@ enum qed_eng {
 };
 
 /**
- * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
- *	LLH filter bank.
+ * qed_llh_set_ppfid_affinity(): Set the engine affinity for the given
+ *	                         LLH filter bank.
  *
- * @param cdev
- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
- * @param eng
+ * @cdev: Qed dev pointer.
+ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
+ * @eng: Engine.
  *
- * @return int
+ * Return: Int.
  */
 int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
 			       u8 ppfid, enum qed_eng eng);
 
 /**
- * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
+ * qed_llh_set_roce_affinity(): Set the RoCE engine affinity.
  *
- * @param cdev
- * @param eng
+ * @cdev: Qed dev pointer.
+ * @eng: Engine.
  *
- * @return int
+ * Return: Int.
  */
 int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
 
 /**
- * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
- *	bank.
+ * qed_llh_add_mac_filter(): Add a LLH MAC filter into the given filter
+ *	                     bank.
+ *
+ * @cdev: Qed dev pointer.
+ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
+ * @mac_addr: MAC to add.
  *
- * @param cdev
- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
- * @param mac_addr - MAC to add
+ * Return: Int.
  */
 int qed_llh_add_mac_filter(struct qed_dev *cdev,
 			   u8 ppfid, u8 mac_addr[ETH_ALEN]);
 
 /**
- * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
- *	filter bank.
+ * qed_llh_remove_mac_filter(): Remove a LLH MAC filter from the given
+ *	                        filter bank.
  *
- * @param p_ptt
- * @param p_filter - MAC to remove
+ * @cdev: Qed dev pointer.
+ * @ppfid: Ppfid.
+ * @mac_addr: MAC to remove
+ *
+ * Return: Void.
  */
 void qed_llh_remove_mac_filter(struct qed_dev *cdev,
 			       u8 ppfid, u8 mac_addr[ETH_ALEN]);
@@ -368,15 +391,16 @@ enum qed_llh_prot_filter_type_t {
 };
 
 /**
- * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
- *	given filter bank.
+ * qed_llh_add_protocol_filter(): Add a LLH protocol filter into the
+ *	                          given filter bank.
+ *
+ * @cdev: Qed dev pointer.
+ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
+ * @type: Type of filters and comparing.
+ * @source_port_or_eth_type: Source port or ethertype to add.
+ * @dest_port: Destination port to add.
  *
- * @param cdev
- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
- * @param type - type of filters and comparing
- * @param source_port_or_eth_type - source port or ethertype to add
- * @param dest_port - destination port to add
- * @param type - type of filters and comparing
+ * Return: Int.
  */
 int
 qed_llh_add_protocol_filter(struct qed_dev *cdev,
@@ -385,14 +409,14 @@ qed_llh_add_protocol_filter(struct qed_dev *cdev,
 			    u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
- *	the given filter bank.
+ * qed_llh_remove_protocol_filter(): Remove a LLH protocol filter from
+ *	                             the given filter bank.
  *
- * @param cdev
- * @param ppfid - relative within the allocated ppfids ('0' is the default one).
- * @param type - type of filters and comparing
- * @param source_port_or_eth_type - source port or ethertype to add
- * @param dest_port - destination port to add
+ * @cdev: Qed dev pointer.
+ * @ppfid: Relative within the allocated ppfids ('0' is the default one).
+ * @type: Type of filters and comparing.
+ * @source_port_or_eth_type: Source port or ethertype to add.
+ * @dest_port: Destination port to add.
  */
 void
 qed_llh_remove_protocol_filter(struct qed_dev *cdev,
@@ -401,31 +425,31 @@ qed_llh_remove_protocol_filter(struct qed_dev *cdev,
 			       u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * *@brief Cleanup of previous driver remains prior to load
+ * qed_final_cleanup(): Cleanup of previous driver remains prior to load.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param id - For PF, engine-relative. For VF, PF-relative.
- * @param is_vf - true iff cleanup is made for a VF.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @id: For PF, engine-relative. For VF, PF-relative.
+ * @is_vf: True iff cleanup is made for a VF.
  *
- * @return int
+ * Return: Int.
  */
 int qed_final_cleanup(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, u16 id, bool is_vf);
 
 /**
- * @brief qed_get_queue_coalesce - Retrieve coalesce value for a given queue.
+ * qed_get_queue_coalesce(): Retrieve coalesce value for a given queue.
  *
- * @param p_hwfn
- * @param p_coal - store coalesce value read from the hardware.
- * @param p_handle
+ * @p_hwfn: HW device data.
+ * @coal: Store coalesce value read from the hardware.
+ * @handle: P_handle.
  *
- * @return int
+ * Return: Int.
  **/
 int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
 
 /**
- * @brief qed_set_queue_coalesce - Configure coalesce parameters for Rx and
+ * qed_set_queue_coalesce(): Configure coalesce parameters for Rx and
  *    Tx queue. The fact that we can configure coalescing to up to 511, but on
  *    varying accuracy [the bigger the value the less accurate] up to a mistake
  *    of 3usec for the highest values.
@@ -433,37 +457,38 @@ int qed_get_queue_coalesce(struct qed_hwfn *p_hwfn, u16 *coal, void *handle);
  *    should be in same range [i.e., either 0-0x7f, 0x80-0xff or 0x100-0x1ff]
  *    otherwise configuration would break.
  *
+ * @rx_coal: Rx Coalesce value in micro seconds.
+ * @tx_coal: TX Coalesce value in micro seconds.
+ * @p_handle: P_handle.
  *
- * @param rx_coal - Rx Coalesce value in micro seconds.
- * @param tx_coal - TX Coalesce value in micro seconds.
- * @param p_handle
- *
- * @return int
+ * Return: Int.
  **/
 int
 qed_set_queue_coalesce(u16 rx_coal, u16 tx_coal, void *p_handle);
 
 /**
- * @brief qed_pglueb_set_pfid_enable - Enable or disable PCI BUS MASTER
+ * qed_pglueb_set_pfid_enable(): Enable or disable PCI BUS MASTER.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param b_enable - true/false
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @b_enable: True/False.
  *
- * @return int
+ * Return: Int.
  */
 int qed_pglueb_set_pfid_enable(struct qed_hwfn *p_hwfn,
 			       struct qed_ptt *p_ptt, bool b_enable);
 
 /**
- * @brief db_recovery_add - add doorbell information to the doorbell
- * recovery mechanism.
+ * qed_db_recovery_add(): add doorbell information to the doorbell
+ *                    recovery mechanism.
  *
- * @param cdev
- * @param db_addr - doorbell address
- * @param db_data - address of where db_data is stored
- * @param db_width - doorbell is 32b pr 64b
- * @param db_space - doorbell recovery addresses are user or kernel space
+ * @cdev: Qed dev pointer.
+ * @db_addr: Doorbell address.
+ * @db_data: Address of where db_data is stored.
+ * @db_width: Doorbell is 32b pr 64b.
+ * @db_space: Doorbell recovery addresses are user or kernel space.
+ *
+ * Return: Int.
  */
 int qed_db_recovery_add(struct qed_dev *cdev,
 			void __iomem *db_addr,
@@ -472,13 +497,15 @@ int qed_db_recovery_add(struct qed_dev *cdev,
 			enum qed_db_rec_space db_space);
 
 /**
- * @brief db_recovery_del - remove doorbell information from the doorbell
+ * qed_db_recovery_del() - remove doorbell information from the doorbell
  * recovery mechanism. db_data serves as key (db_addr is not unique).
  *
- * @param cdev
- * @param db_addr - doorbell address
- * @param db_data - address where db_data is stored. Serves as key for the
+ * @cdev: Qed dev pointer.
+ * @db_addr: doorbell address.
+ * @db_data: address where db_data is stored. Serves as key for the
  *                  entry to delete.
+ *
+ * Return: Int.
  */
 int qed_db_recovery_del(struct qed_dev *cdev,
 			void __iomem *db_addr, void *db_data);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index fb1baa2da2d0..744c82a10875 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -3012,96 +3012,102 @@ struct iro {
 /***************************** Public Functions *******************************/
 
 /**
- * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug
- *	arrays.
+ * qed_dbg_set_bin_ptr(): Sets a pointer to the binary data with debug
+ *                        arrays.
  *
- * @param p_hwfn -	    HW device data
- * @param bin_ptr - a pointer to the binary data with debug arrays.
+ * @p_hwfn: HW device data.
+ * @bin_ptr: A pointer to the binary data with debug arrays.
+ *
+ * Return: enum dbg status.
  */
 enum dbg_status qed_dbg_set_bin_ptr(struct qed_hwfn *p_hwfn,
 				    const u8 * const bin_ptr);
 
 /**
- * @brief qed_read_regs - Reads registers into a buffer (using GRC).
+ * qed_read_regs(): Reads registers into a buffer (using GRC).
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf: Destination buffer.
+ * @addr: Source GRC address in dwords.
+ * @len: Number of registers to read.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf - Destination buffer.
- * @param addr - Source GRC address in dwords.
- * @param len - Number of registers to read.
+ * Return: Void.
  */
 void qed_read_regs(struct qed_hwfn *p_hwfn,
 		   struct qed_ptt *p_ptt, u32 *buf, u32 addr, u32 len);
 
 /**
- * @brief qed_read_fw_info - Reads FW info from the chip.
+ * qed_read_fw_info(): Reads FW info from the chip.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @fw_info: (Out) a pointer to write the FW info into.
+ *
+ * Return: True if the FW info was read successfully from one of the Storms,
+ * or false if all Storms are in reset.
  *
  * The FW info contains FW-related information, such as the FW version,
  * FW image (main/L2B/kuku), FW timestamp, etc.
  * The FW info is read from the internal RAM of the first Storm that is not in
  * reset.
- *
- * @param p_hwfn -	    HW device data
- * @param p_ptt -	    Ptt window used for writing the registers.
- * @param fw_info -	Out: a pointer to write the FW info into.
- *
- * @return true if the FW info was read successfully from one of the Storms,
- * or false if all Storms are in reset.
  */
 bool qed_read_fw_info(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt, struct fw_info *fw_info);
 /**
- * @brief qed_dbg_grc_config - Sets the value of a GRC parameter.
+ * qed_dbg_grc_config(): Sets the value of a GRC parameter.
  *
- * @param p_hwfn -	HW device data
- * @param grc_param -	GRC parameter
- * @param val -		Value to set.
+ * @p_hwfn: HW device data.
+ * @grc_param: GRC parameter.
+ * @val: Value to set.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- grc_param is invalid
- *	- val is outside the allowed boundaries
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *         - Grc_param is invalid.
+ *         - Val is outside the allowed boundaries.
  */
 enum dbg_status qed_dbg_grc_config(struct qed_hwfn *p_hwfn,
 				   enum dbg_grc_params grc_param, u32 val);
 
 /**
- * @brief qed_dbg_grc_set_params_default - Reverts all GRC parameters to their
- *	default value.
+ * qed_dbg_grc_set_params_default(): Reverts all GRC parameters to their
+ *                                   default value.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn		- HW device data
+ * Return: Void.
  */
 void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn);
 /**
- * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for
- *	GRC Dump.
+ * qed_dbg_grc_get_dump_buf_size(): Returns the required buffer size for
+ *                                  GRC Dump.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump
- *	data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) required buffer size (in dwords) for the GRC Dump
+ *             data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 					      struct qed_ptt *p_ptt,
 					      u32 *buf_size);
 
 /**
- * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the collected GRC data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified dump buffer is too small
- * Otherwise, returns ok.
+ * qed_dbg_grc_dump(): Dumps GRC data into the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the collected GRC data into.
+ * @buf_size_in_dwords:Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *        - The version wasn't set.
+ *        - The specified dump buffer is too small.
+ *          Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
@@ -3110,36 +3116,36 @@ enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn,
 				 u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size
- *	for idle check results.
+ * qed_dbg_idle_chk_get_dump_buf_size(): Returns the required buffer size
+ *                                       for idle check results.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for the idle check
- *	data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) required buffer size (in dwords) for the idle check
+ *             data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * return: Error if one of the following holds:
+ *        - The version wasn't set.
+ *          Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
 
 /**
- * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results
- *	into the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the idle check data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- * Otherwise, returns ok.
+ * qed_dbg_idle_chk_dump: Performs idle check and writes the results
+ *                        into the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the idle check data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *         - The specified buffer is too small.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 				      struct qed_ptt *p_ptt,
@@ -3148,42 +3154,42 @@ enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn,
 				      u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size
- *	for mcp trace results.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the trace data in MCP scratchpad contain an invalid signature
- *	- the bundle ID in NVRAM is invalid
- *	- the trace meta data cannot be found (in NVRAM or image file)
- * Otherwise, returns ok.
+ * qed_dbg_mcp_trace_get_dump_buf_size(): Returns the required buffer size
+ *                                        for mcp trace results.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) Required buffer size (in dwords) for mcp trace data.
+ *
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *         - The trace data in MCP scratchpad contain an invalid signature.
+ *         - The bundle ID in NVRAM is invalid.
+ *         - The trace meta data cannot be found (in NVRAM or image file).
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						    struct qed_ptt *p_ptt,
 						    u32 *buf_size);
 
 /**
- * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results
- *	into the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the mcp trace data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- *	- the trace data in MCP scratchpad contain an invalid signature
- *	- the bundle ID in NVRAM is invalid
- *	- the trace meta data cannot be found (in NVRAM or image file)
- *	- the trace meta data cannot be read (from NVRAM or image file)
- * Otherwise, returns ok.
+ * qed_dbg_mcp_trace_dump(): Performs mcp trace and writes the results
+ *                           into the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the mcp trace data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *        - The version wasn't set.
+ *        - The specified buffer is too small.
+ *        - The trace data in MCP scratchpad contain an invalid signature.
+ *        - The bundle ID in NVRAM is invalid.
+ *        - The trace meta data cannot be found (in NVRAM or image file).
+ *        - The trace meta data cannot be read (from NVRAM or image file).
+ *          Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt,
@@ -3192,36 +3198,36 @@ enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 				       u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size
- *	for grc trace fifo results.
+ * qed_dbg_reg_fifo_get_dump_buf_size(): Returns the required buffer size
+ *                                       for grc trace fifo results.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) Required buffer size (in dwords) for reg fifo data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
 
 /**
- * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into
- *	the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the reg fifo data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- *	- DMAE transaction failed
- * Otherwise, returns ok.
+ * qed_dbg_reg_fifo_dump(): Reads the reg fifo and writes the results into
+ *                          the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the reg fifo data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *        - The version wasn't set.
+ *        - The specified buffer is too small.
+ *        - DMAE transaction failed.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 				      struct qed_ptt *p_ptt,
@@ -3230,37 +3236,37 @@ enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn,
 				      u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size
- *	for the IGU fifo results.
+ * qed_dbg_igu_fifo_get_dump_buf_size(): Returns the required buffer size
+ *                                       for the IGU fifo results.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo
- *	data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) Required buffer size (in dwords) for the IGU fifo
+ *            data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						   struct qed_ptt *p_ptt,
 						   u32 *buf_size);
 
 /**
- * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into
- *	the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the IGU fifo data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- *	- DMAE transaction failed
- * Otherwise, returns ok.
+ * qed_dbg_igu_fifo_dump(): Reads the IGU fifo and writes the results into
+ *                          the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the IGU fifo data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set
+ *         - The specified buffer is too small
+ *         - DMAE transaction failed
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 				      struct qed_ptt *p_ptt,
@@ -3269,37 +3275,37 @@ enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn,
 				      u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required
- *	buffer size for protection override window results.
+ * qed_dbg_protection_override_get_dump_buf_size(): Returns the required
+ *        buffer size for protection override window results.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for protection
- *	override data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) Required buffer size (in dwords) for protection
+ *             override data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set
+ *           Otherwise, returns ok.
  */
 enum dbg_status
 qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 					      struct qed_ptt *p_ptt,
 					      u32 *buf_size);
 /**
- * @brief qed_dbg_protection_override_dump - Reads protection override window
- *	entries and writes the results into the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the protection override data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- *	- DMAE transaction failed
- * Otherwise, returns ok.
+ * qed_dbg_protection_override_dump(): Reads protection override window
+ *       entries and writes the results into the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the protection override data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * @return: Error if one of the following holds:
+ *          - The version wasn't set.
+ *          - The specified buffer is too small.
+ *          - DMAE transaction failed.
+ *             Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
 						 struct qed_ptt *p_ptt,
@@ -3307,34 +3313,34 @@ enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn,
 						 u32 buf_size_in_dwords,
 						 u32 *num_dumped_dwords);
 /**
- * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer
- *	size for FW Asserts results.
+ * qed_dbg_fw_asserts_get_dump_buf_size(): Returns the required buffer
+ *                                         size for FW Asserts results.
  *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @buf_size: (OUT) Required buffer size (in dwords) for FW Asserts data.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn,
 						     struct qed_ptt *p_ptt,
 						     u32 *buf_size);
 /**
- * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results
- *	into the specified buffer.
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - Ptt window used for writing the registers.
- * @param dump_buf - Pointer to write the FW Asserts data into.
- * @param buf_size_in_dwords - Size of the specified buffer in dwords.
- * @param num_dumped_dwords - OUT: number of dumped dwords.
- *
- * @return error if one of the following holds:
- *	- the version wasn't set
- *	- the specified buffer is too small
- * Otherwise, returns ok.
+ * qed_dbg_fw_asserts_dump(): Reads the FW Asserts and writes the results
+ *                            into the specified buffer.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dump_buf: Pointer to write the FW Asserts data into.
+ * @buf_size_in_dwords: Size of the specified buffer in dwords.
+ * @num_dumped_dwords: (OUT) number of dumped dwords.
+ *
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *         - The specified buffer is too small.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 					struct qed_ptt *p_ptt,
@@ -3343,19 +3349,19 @@ enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn,
 					u32 *num_dumped_dwords);
 
 /**
- * @brief qed_dbg_read_attn - Reads the attention registers of the specified
+ * qed_dbg_read_attn(): Reads the attention registers of the specified
  * block and type, and writes the results into the specified buffer.
  *
- * @param p_hwfn -	 HW device data
- * @param p_ptt -	 Ptt window used for writing the registers.
- * @param block -	 Block ID.
- * @param attn_type -	 Attention type.
- * @param clear_status - Indicates if the attention status should be cleared.
- * @param results -	 OUT: Pointer to write the read results into
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @block: Block ID.
+ * @attn_type: Attention type.
+ * @clear_status: Indicates if the attention status should be cleared.
+ * @results:  (OUT) Pointer to write the read results into.
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set
+ *          Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
@@ -3365,15 +3371,15 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 				  struct dbg_attn_block_result *results);
 
 /**
- * @brief qed_dbg_print_attn - Prints attention registers values in the
- *	specified results struct.
+ * qed_dbg_print_attn(): Prints attention registers values in the
+ *                       specified results struct.
  *
- * @param p_hwfn
- * @param results - Pointer to the attention read results
+ * @p_hwfn: HW device data.
+ * @results: Pointer to the attention read results
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *        - The version wasn't set
+ *          Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
@@ -3420,60 +3426,64 @@ struct dbg_tools_user_data {
 /***************************** Public Functions *******************************/
 
 /**
- * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with
- *	debug arrays.
+ * qed_dbg_user_set_bin_ptr(): Sets a pointer to the binary data with
+ *                             debug arrays.
  *
- * @param p_hwfn - HW device data
- * @param bin_ptr - a pointer to the binary data with debug arrays.
+ * @p_hwfn: HW device data.
+ * @bin_ptr: a pointer to the binary data with debug arrays.
+ *
+ * Return: dbg_status.
  */
 enum dbg_status qed_dbg_user_set_bin_ptr(struct qed_hwfn *p_hwfn,
 					 const u8 * const bin_ptr);
 
 /**
- * @brief qed_dbg_alloc_user_data - Allocates user debug data.
+ * qed_dbg_alloc_user_data(): Allocates user debug data.
+ *
+ * @p_hwfn: HW device data.
+ * @user_data_ptr: (OUT) a pointer to the allocated memory.
  *
- * @param p_hwfn -		 HW device data
- * @param user_data_ptr - OUT: a pointer to the allocated memory.
+ * Return: dbg_status.
  */
 enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn,
 					void **user_data_ptr);
 
 /**
- * @brief qed_dbg_get_status_str - Returns a string for the specified status.
+ * qed_dbg_get_status_str(): Returns a string for the specified status.
  *
- * @param status - a debug status code.
+ * @status: A debug status code.
  *
- * @return a string for the specified status
+ * Return: A string for the specified status.
  */
 const char *qed_dbg_get_status_str(enum dbg_status status);
 
 /**
- * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size
- *	for idle check results (in bytes).
+ * qed_get_idle_chk_results_buf_size(): Returns the required buffer size
+ *                                      for idle check results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - idle check dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: idle check dump buffer.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                    results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
 						  u32  num_dumped_dwords,
 						  u32 *results_buf_size);
 /**
- * @brief qed_print_idle_chk_results - Prints idle check results
+ * qed_print_idle_chk_results(): Prints idle check results
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - idle check dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the idle check results.
- * @param num_errors - OUT: number of errors found in idle check.
- * @param num_warnings - OUT: number of warnings found in idle check.
+ * @p_hwfn: HW device data.
+ * @dump_buf: idle check dump buffer.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf: buffer for printing the idle check results.
+ * @num_errors: (OUT) number of errors found in idle check.
+ * @num_warnings: (OUT) number of warnings found in idle check.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 					   u32 *dump_buf,
@@ -3483,28 +3493,30 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 					   u32 *num_warnings);
 
 /**
- * @brief qed_dbg_mcp_trace_set_meta_data - Sets the MCP Trace meta data.
+ * qed_dbg_mcp_trace_set_meta_data(): Sets the MCP Trace meta data.
+ *
+ * @p_hwfn: HW device data.
+ * @meta_buf: Meta buffer.
+ *
+ * Return: Void.
  *
  * Needed in case the MCP Trace dump doesn't contain the meta data (e.g. due to
  * no NVRAM access).
- *
- * @param data - pointer to MCP Trace meta data
- * @param size - size of MCP Trace meta data in dwords
  */
 void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
 				     const u32 *meta_buf);
 
 /**
- * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
- *	for MCP Trace results (in bytes).
+ * qed_get_mcp_trace_results_buf_size(): Returns the required buffer size
+ *                                       for MCP Trace results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - MCP Trace dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: MCP Trace dump buffer.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                    results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Rrror if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
 						   u32 *dump_buf,
@@ -3512,14 +3524,14 @@ enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
 						   u32 *results_buf_size);
 
 /**
- * @brief qed_print_mcp_trace_results - Prints MCP Trace results
+ * qed_print_mcp_trace_results(): Prints MCP Trace results
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - mcp trace dump buffer, starting from the header.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the mcp trace results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: MCP trace dump buffer, starting from the header.
+ * @num_dumped_dwords: Member of dwords that were dumped.
+ * @results_buf: Buffer for printing the mcp trace results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					    u32 *dump_buf,
@@ -3527,30 +3539,30 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					    char *results_buf);
 
 /**
- * @brief qed_print_mcp_trace_results_cont - Prints MCP Trace results, and
+ * qed_print_mcp_trace_results_cont(): Prints MCP Trace results, and
  * keeps the MCP trace meta data allocated, to support continuous MCP Trace
  * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should
  * be called to free the meta data.
  *
- * @param p_hwfn -	      HW device data
- * @param dump_buf -	      mcp trace dump buffer, starting from the header.
- * @param results_buf -	      buffer for printing the mcp trace results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: MVP trace dump buffer, starting from the header.
+ * @results_buf: Buffer for printing the mcp trace results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
 						 u32 *dump_buf,
 						 char *results_buf);
 
 /**
- * @brief print_mcp_trace_line - Prints MCP Trace results for a single line
+ * qed_print_mcp_trace_line(): Prints MCP Trace results for a single line
  *
- * @param p_hwfn -	      HW device data
- * @param dump_buf -	      mcp trace dump buffer, starting from the header.
- * @param num_dumped_bytes -  number of bytes that were dumped.
- * @param results_buf -	      buffer for printing the mcp trace results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: MCP trace dump buffer, starting from the header.
+ * @num_dumped_bytes: Number of bytes that were dumped.
+ * @results_buf: Buffer for printing the mcp trace results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
 					 u8 *dump_buf,
@@ -3558,24 +3570,26 @@ enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
 					 char *results_buf);
 
 /**
- * @brief mcp_trace_free_meta_data - Frees the MCP Trace meta data.
+ * qed_mcp_trace_free_meta_data(): Frees the MCP Trace meta data.
  * Should be called after continuous MCP Trace parsing.
  *
- * @param p_hwfn - HW device data
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
- *	for reg_fifo results (in bytes).
+ * qed_get_reg_fifo_results_buf_size(): Returns the required buffer size
+ *                                      for reg_fifo results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - reg fifo dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: Reg fifo dump buffer.
+ * @num_dumped_dwords: Number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                     results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
@@ -3583,14 +3597,14 @@ enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *results_buf_size);
 
 /**
- * @brief qed_print_reg_fifo_results - Prints reg fifo results
+ * qed_print_reg_fifo_results(): Prints reg fifo results.
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - reg fifo dump buffer, starting from the header.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the reg fifo results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: Reg fifo dump buffer, starting from the header.
+ * @num_dumped_dwords: Number of dwords that were dumped.
+ * @results_buf: Buffer for printing the reg fifo results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
 					   u32 *dump_buf,
@@ -3598,16 +3612,16 @@ enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn,
 					   char *results_buf);
 
 /**
- * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size
- *	for igu_fifo results (in bytes).
+ * qed_get_igu_fifo_results_buf_size(): Returns the required buffer size
+ *                                      for igu_fifo results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - IGU fifo dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: IGU fifo dump buffer.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                    results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *dump_buf,
@@ -3615,14 +3629,14 @@ enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
 						  u32 *results_buf_size);
 
 /**
- * @brief qed_print_igu_fifo_results - Prints IGU fifo results
+ * qed_print_igu_fifo_results(): Prints IGU fifo results
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - IGU fifo dump buffer, starting from the header.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the IGU fifo results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: IGU fifo dump buffer, starting from the header.
+ * @num_dumped_dwords: Number of dwords that were dumped.
+ * @results_buf: Buffer for printing the IGU fifo results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
 					   u32 *dump_buf,
@@ -3630,16 +3644,16 @@ enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn,
 					   char *results_buf);
 
 /**
- * @brief qed_get_protection_override_results_buf_size - Returns the required
- *	buffer size for protection override results (in bytes).
+ * qed_get_protection_override_results_buf_size(): Returns the required
+ *         buffer size for protection override results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - protection override dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: Protection override dump buffer.
+ * @num_dumped_dwords: Number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                    results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status
 qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
@@ -3648,15 +3662,15 @@ qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn,
 					     u32 *results_buf_size);
 
 /**
- * @brief qed_print_protection_override_results - Prints protection override
- *	results.
+ * qed_print_protection_override_results(): Prints protection override
+ *                                          results.
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - protection override dump buffer, starting from the header.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the reg fifo results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: Protection override dump buffer, starting from the header.
+ * @num_dumped_dwords: Number of dwords that were dumped.
+ * @results_buf: Buffer for printing the reg fifo results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
 						      u32 *dump_buf,
@@ -3664,16 +3678,16 @@ enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn,
 						      char *results_buf);
 
 /**
- * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size
- *	for FW Asserts results (in bytes).
+ * qed_get_fw_asserts_results_buf_size(): Returns the required buffer size
+ *                                        for FW Asserts results (in bytes).
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - FW Asserts dump buffer.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed
- *	results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: FW Asserts dump buffer.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf_size: (OUT) required buffer size (in bytes) for the parsed
+ *                    results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
 						    u32 *dump_buf,
@@ -3681,14 +3695,14 @@ enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn,
 						    u32 *results_buf_size);
 
 /**
- * @brief qed_print_fw_asserts_results - Prints FW Asserts results
+ * qed_print_fw_asserts_results(): Prints FW Asserts results.
  *
- * @param p_hwfn - HW device data
- * @param dump_buf - FW Asserts dump buffer, starting from the header.
- * @param num_dumped_dwords - number of dwords that were dumped.
- * @param results_buf - buffer for printing the FW Asserts results.
+ * @p_hwfn: HW device data.
+ * @dump_buf: FW Asserts dump buffer, starting from the header.
+ * @num_dumped_dwords: number of dwords that were dumped.
+ * @results_buf: buffer for printing the FW Asserts results.
  *
- * @return error if the parsing fails, ok otherwise.
+ * Return: Error if the parsing fails, ok otherwise.
  */
 enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 					     u32 *dump_buf,
@@ -3696,15 +3710,15 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn,
 					     char *results_buf);
 
 /**
- * @brief qed_dbg_parse_attn - Parses and prints attention registers values in
- * the specified results struct.
+ * qed_dbg_parse_attn(): Parses and prints attention registers values in
+ *                      the specified results struct.
  *
- * @param p_hwfn -  HW device data
- * @param results - Pointer to the attention read results
+ * @p_hwfn: HW device data.
+ * @results: Pointer to the attention read results
  *
- * @return error if one of the following holds:
- *	- the version wasn't set
- * Otherwise, returns ok.
+ * Return: Error if one of the following holds:
+ *         - The version wasn't set.
+ *           Otherwise, returns ok.
  */
 enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
@@ -3746,18 +3760,18 @@ enum dbg_status qed_dbg_parse_attn(struct qed_hwfn *p_hwfn,
 #define GTT_BAR0_MAP_REG_PSDM_RAM	0x01a000UL
 
 /**
- * @brief qed_qm_pf_mem_size - prepare QM ILT sizes
+ * qed_qm_pf_mem_size(): Prepare QM ILT sizes.
  *
- * Returns the required host memory size in 4KB units.
- * Must be called before all QM init HSI functions.
+ * @num_pf_cids: Number of connections used by this PF.
+ * @num_vf_cids: Number of connections used by VFs of this PF.
+ * @num_tids: Number of tasks used by this PF.
+ * @num_pf_pqs: Number of PQs used by this PF.
+ * @num_vf_pqs: Number of PQs used by VFs of this PF.
  *
- * @param num_pf_cids - number of connections used by this PF
- * @param num_vf_cids - number of connections used by VFs of this PF
- * @param num_tids - number of tasks used by this PF
- * @param num_pf_pqs - number of PQs used by this PF
- * @param num_vf_pqs - number of PQs used by VFs of this PF
+ * Return: The required host memory size in 4KB units.
  *
- * @return The required host memory size in 4KB units.
+ * Returns the required host memory size in 4KB units.
+ * Must be called before all QM init HSI functions.
  */
 u32 qed_qm_pf_mem_size(u32 num_pf_cids,
 		       u32 num_vf_cids,
@@ -3800,74 +3814,74 @@ int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
 	struct qed_qm_pf_rt_init_params *p_params);
 
 /**
- * @brief qed_init_pf_wfq - Initializes the WFQ weight of the specified PF
+ * qed_init_pf_wfq(): Initializes the WFQ weight of the specified PF.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers
- * @param pf_id - PF ID
- * @param pf_wfq - WFQ weight. Must be non-zero.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers
+ * @pf_id: PF ID
+ * @pf_wfq: WFQ weight. Must be non-zero.
  *
- * @return 0 on success, -1 on error.
+ * Return: 0 on success, -1 on error.
  */
 int qed_init_pf_wfq(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, u8 pf_id, u16 pf_wfq);
 
 /**
- * @brief qed_init_pf_rl - Initializes the rate limit of the specified PF
+ * qed_init_pf_rl(): Initializes the rate limit of the specified PF
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers
- * @param pf_id - PF ID
- * @param pf_rl - rate limit in Mb/sec units
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @pf_id: PF ID.
+ * @pf_rl: rate limit in Mb/sec units
  *
- * @return 0 on success, -1 on error.
+ * Return: 0 on success, -1 on error.
  */
 int qed_init_pf_rl(struct qed_hwfn *p_hwfn,
 		   struct qed_ptt *p_ptt, u8 pf_id, u32 pf_rl);
 
 /**
- * @brief qed_init_vport_wfq Initializes the WFQ weight of the specified VPORT
+ * qed_init_vport_wfq(): Initializes the WFQ weight of the specified VPORT
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers
- * @param first_tx_pq_id- An array containing the first Tx PQ ID associated
- *	  with the VPORT for each TC. This array is filled by
- *	  qed_qm_pf_rt_init
- * @param vport_wfq - WFQ weight. Must be non-zero.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers
+ * @first_tx_pq_id: An array containing the first Tx PQ ID associated
+ *                  with the VPORT for each TC. This array is filled by
+ *                  qed_qm_pf_rt_init
+ * @wfq: WFQ weight. Must be non-zero.
  *
- * @return 0 on success, -1 on error.
+ * Return: 0 on success, -1 on error.
  */
 int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
 		       u16 first_tx_pq_id[NUM_OF_TCS], u16 wfq);
 
 /**
- * @brief qed_init_global_rl - Initializes the rate limit of the specified
- * rate limiter
+ * qed_init_global_rl():  Initializes the rate limit of the specified
+ * rate limiter.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers
- * @param rl_id - RL ID
- * @param rate_limit - rate limit in Mb/sec units
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @rl_id: RL ID.
+ * @rate_limit: Rate limit in Mb/sec units
  *
- * @return 0 on success, -1 on error.
+ * Return: 0 on success, -1 on error.
  */
 int qed_init_global_rl(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
 		       u16 rl_id, u32 rate_limit);
 
 /**
- * @brief qed_send_qm_stop_cmd  Sends a stop command to the QM
+ * qed_send_qm_stop_cmd(): Sends a stop command to the QM.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param is_release_cmd - true for release, false for stop.
- * @param is_tx_pq - true for Tx PQs, false for Other PQs.
- * @param start_pq - first PQ ID to stop
- * @param num_pqs - Number of PQs to stop, starting from start_pq.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @is_release_cmd: true for release, false for stop.
+ * @is_tx_pq: true for Tx PQs, false for Other PQs.
+ * @start_pq: first PQ ID to stop
+ * @num_pqs: Number of PQs to stop, starting from start_pq.
  *
- * @return bool, true if successful, false if timeout occurred while waiting for
- *	QM command done.
+ * Return: Bool, true if successful, false if timeout occurred while waiting
+ *         for QM command done.
  */
 bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
@@ -3875,53 +3889,64 @@ bool qed_send_qm_stop_cmd(struct qed_hwfn *p_hwfn,
 			  bool is_tx_pq, u16 start_pq, u16 num_pqs);
 
 /**
- * @brief qed_set_vxlan_dest_port - initializes vxlan tunnel destination udp port
+ * qed_set_vxlan_dest_port(): Initializes vxlan tunnel destination udp port.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
- * @param dest_port - vxlan destination udp port.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dest_port: vxlan destination udp port.
+ *
+ * Return: Void.
  */
 void qed_set_vxlan_dest_port(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u16 dest_port);
 
 /**
- * @brief qed_set_vxlan_enable - enable or disable VXLAN tunnel in HW
+ * qed_set_vxlan_enable(): Enable or disable VXLAN tunnel in HW.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @vxlan_enable: vxlan enable flag.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
- * @param vxlan_enable - vxlan enable flag.
+ * Return: Void.
  */
 void qed_set_vxlan_enable(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, bool vxlan_enable);
 
 /**
- * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
+ * qed_set_gre_enable(): Enable or disable GRE tunnel in HW.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @eth_gre_enable: Eth GRE enable flag.
+ * @ip_gre_enable: IP GRE enable flag.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
- * @param eth_gre_enable - eth GRE enable enable flag.
- * @param ip_gre_enable - IP GRE enable enable flag.
+ * Return: Void.
  */
 void qed_set_gre_enable(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt,
 			bool eth_gre_enable, bool ip_gre_enable);
 
 /**
- * @brief qed_set_geneve_dest_port - initializes geneve tunnel destination udp port
+ * qed_set_geneve_dest_port(): Initializes geneve tunnel destination udp port
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
- * @param dest_port - geneve destination udp port.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @dest_port: Geneve destination udp port.
+ *
+ * Retur: Void.
  */
 void qed_set_geneve_dest_port(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt, u16 dest_port);
 
 /**
- * @brief qed_set_gre_enable - enable or disable GRE tunnel in HW
+ * qed_set_geneve_enable(): Enable or disable GRE tunnel in HW.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @eth_geneve_enable: Eth GENEVE enable flag.
+ * @ip_geneve_enable: IP GENEVE enable flag.
  *
- * @param p_ptt - ptt window used for writing the registers.
- * @param eth_geneve_enable - eth GENEVE enable enable flag.
- * @param ip_geneve_enable - IP GENEVE enable enable flag.
+ * Return: Void.
  */
 void qed_set_geneve_enable(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt,
@@ -3931,25 +3956,29 @@ void qed_set_vxlan_no_l2_enable(struct qed_hwfn *p_hwfn,
 				struct qed_ptt *p_ptt, bool enable);
 
 /**
- * @brief qed_gft_disable - Disable GFT
+ * qed_gft_disable(): Disable GFT.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @pf_id: PF on which to disable GFT.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
- * @param pf_id - pf on which to disable GFT.
+ * Return: Void.
  */
 void qed_gft_disable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 pf_id);
 
 /**
- * @brief qed_gft_config - Enable and configure HW for GFT
- *
- * @param p_hwfn - HW device data
- * @param p_ptt - ptt window used for writing the registers.
- * @param pf_id - pf on which to enable GFT.
- * @param tcp - set profile tcp packets.
- * @param udp - set profile udp  packet.
- * @param ipv4 - set profile ipv4 packet.
- * @param ipv6 - set profile ipv6 packet.
- * @param profile_type - define packet same fields. Use enum gft_profile_type.
+ * qed_gft_config(): Enable and configure HW for GFT.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @pf_id: PF on which to enable GFT.
+ * @tcp: Set profile tcp packets.
+ * @udp: Set profile udp  packet.
+ * @ipv4: Set profile ipv4 packet.
+ * @ipv6: Set profile ipv6 packet.
+ * @profile_type: Define packet same fields. Use enum gft_profile_type.
+ *
+ * Return: Void.
  */
 void qed_gft_config(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
@@ -3959,107 +3988,120 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
 		    bool ipv4, bool ipv6, enum gft_profile_type profile_type);
 
 /**
- * @brief qed_enable_context_validation - Enable and configure context
- *	validation.
+ * qed_enable_context_validation(): Enable and configure context
+ *                                  validation.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
  *
- * @param p_hwfn
- * @param p_ptt - ptt window used for writing the registers.
+ * Return: Void.
  */
 void qed_enable_context_validation(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_calc_session_ctx_validation - Calcualte validation byte for
- *	session context.
+ * qed_calc_session_ctx_validation(): Calcualte validation byte for
+ *                                    session context.
  *
- * @param p_ctx_mem - pointer to context memory.
- * @param ctx_size - context size.
- * @param ctx_type - context type.
- * @param cid - context cid.
+ * @p_ctx_mem: Pointer to context memory.
+ * @ctx_size: Context size.
+ * @ctx_type: Context type.
+ * @cid: Context cid.
+ *
+ * Return: Void.
  */
 void qed_calc_session_ctx_validation(void *p_ctx_mem,
 				     u16 ctx_size, u8 ctx_type, u32 cid);
 
 /**
- * @brief qed_calc_task_ctx_validation - Calcualte validation byte for task
- *	context.
+ * qed_calc_task_ctx_validation(): Calcualte validation byte for task
+ *                                 context.
+ *
+ * @p_ctx_mem: Pointer to context memory.
+ * @ctx_size: Context size.
+ * @ctx_type: Context type.
+ * @tid: Context tid.
  *
- * @param p_ctx_mem - pointer to context memory.
- * @param ctx_size - context size.
- * @param ctx_type - context type.
- * @param tid - context tid.
+ * Return: Void.
  */
 void qed_calc_task_ctx_validation(void *p_ctx_mem,
 				  u16 ctx_size, u8 ctx_type, u32 tid);
 
 /**
- * @brief qed_memset_session_ctx - Memset session context to 0 while
- *	preserving validation bytes.
+ * qed_memset_session_ctx(): Memset session context to 0 while
+ *                            preserving validation bytes.
+ *
+ * @p_ctx_mem: Pointer to context memory.
+ * @ctx_size: Size to initialzie.
+ * @ctx_type: Context type.
  *
- * @param p_hwfn -
- * @param p_ctx_mem - pointer to context memory.
- * @param ctx_size - size to initialzie.
- * @param ctx_type - context type.
+ * Return: Void.
  */
 void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
 
 /**
- * @brief qed_memset_task_ctx - Memset task context to 0 while preserving
- *	validation bytes.
+ * qed_memset_task_ctx(): Memset task context to 0 while preserving
+ *                        validation bytes.
  *
- * @param p_ctx_mem - pointer to context memory.
- * @param ctx_size - size to initialzie.
- * @param ctx_type - context type.
+ * @p_ctx_mem: Pointer to context memory.
+ * @ctx_size: size to initialzie.
+ * @ctx_type: context type.
+ *
+ * Return: Void.
  */
 void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
 
 #define NUM_STORMS 6
 
 /**
- * @brief qed_set_rdma_error_level - Sets the RDMA assert level.
- *                                   If the severity of the error will be
- *                                   above the level, the FW will assert.
- * @param p_hwfn - HW device data
- * @param p_ptt - ptt window used for writing the registers
- * @param assert_level - An array of assert levels for each storm.
+ * qed_set_rdma_error_level(): Sets the RDMA assert level.
+ *                             If the severity of the error will be
+ *                             above the level, the FW will assert.
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @assert_level: An array of assert levels for each storm.
  *
+ * Return: Void.
  */
 void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
 			      u8 assert_level[NUM_STORMS]);
 /**
- * @brief qed_fw_overlay_mem_alloc - Allocates and fills the FW overlay memory.
+ * qed_fw_overlay_mem_alloc(): Allocates and fills the FW overlay memory.
  *
- * @param p_hwfn - HW device data
- * @param fw_overlay_in_buf - the input FW overlay buffer.
- * @param buf_size - the size of the input FW overlay buffer in bytes.
- *		     must be aligned to dwords.
- * @param fw_overlay_out_mem - OUT: a pointer to the allocated overlays memory.
+ * @p_hwfn: HW device data.
+ * @fw_overlay_in_buf: The input FW overlay buffer.
+ * @buf_size_in_bytes: The size of the input FW overlay buffer in bytes.
+ *		        must be aligned to dwords.
  *
- * @return a pointer to the allocated overlays memory,
+ * Return: A pointer to the allocated overlays memory,
  * or NULL in case of failures.
  */
 struct phys_mem_desc *
 qed_fw_overlay_mem_alloc(struct qed_hwfn *p_hwfn,
-			 const u32 * const fw_overlay_in_buf,
+			 const u32 *const fw_overlay_in_buf,
 			 u32 buf_size_in_bytes);
 
 /**
- * @brief qed_fw_overlay_init_ram - Initializes the FW overlay RAM.
+ * qed_fw_overlay_init_ram(): Initializes the FW overlay RAM.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @fw_overlay_mem: the allocated FW overlay memory.
  *
- * @param p_hwfn - HW device data.
- * @param p_ptt - ptt window used for writing the registers.
- * @param fw_overlay_mem - the allocated FW overlay memory.
+ * Return: Void.
  */
 void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt,
 			     struct phys_mem_desc *fw_overlay_mem);
 
 /**
- * @brief qed_fw_overlay_mem_free - Frees the FW overlay memory.
+ * qed_fw_overlay_mem_free(): Frees the FW overlay memory.
+ *
+ * @p_hwfn: HW device data.
+ * @fw_overlay_mem: The allocated FW overlay memory to free.
  *
- * @param p_hwfn - HW device data.
- * @param fw_overlay_mem - the allocated FW overlay memory to free.
+ * Return: Void.
  */
 void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
 			     struct phys_mem_desc *fw_overlay_mem);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 2734f49956f7..e535983ce21b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -53,85 +53,94 @@ enum _dmae_cmd_crc_mask {
 #define DMAE_MAX_CLIENTS        32
 
 /**
- * @brief qed_gtt_init - Initialize GTT windows
+ * qed_gtt_init(): Initialize GTT windows.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_gtt_init(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ptt_invalidate - Forces all ptt entries to be re-configured
+ * qed_ptt_invalidate(): Forces all ptt entries to be re-configured
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_ptt_invalidate(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ptt_pool_alloc - Allocate and initialize PTT pool
+ * qed_ptt_pool_alloc(): Allocate and initialize PTT pool.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return struct _qed_status - success (0), negative - error.
+ * Return: struct _qed_status - success (0), negative - error.
  */
 int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ptt_pool_free -
+ * qed_ptt_pool_free(): Free PTT pool.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_ptt_pool_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ptt_get_hw_addr - Get PTT's GRC/HW address
+ * qed_ptt_get_hw_addr(): Get PTT's GRC/HW address.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt
  *
- * @return u32
+ * Return: u32.
  */
 u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_ptt_get_bar_addr - Get PPT's external BAR address
+ * qed_ptt_get_bar_addr(): Get PPT's external BAR address.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_ptt: P_ptt
  *
- * @return u32
+ * Return: u32.
  */
 u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_ptt_set_win - Set PTT Window's GRC BAR address
+ * qed_ptt_set_win(): Set PTT Window's GRC BAR address
  *
- * @param p_hwfn
- * @param new_hw_addr
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @new_hw_addr: New HW address.
+ * @p_ptt: P_Ptt
+ *
+ * Return: Void.
  */
 void qed_ptt_set_win(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt,
 		     u32 new_hw_addr);
 
 /**
- * @brief qed_get_reserved_ptt - Get a specific reserved PTT
+ * qed_get_reserved_ptt(): Get a specific reserved PTT.
  *
- * @param p_hwfn
- * @param ptt_idx
+ * @p_hwfn: HW device data.
+ * @ptt_idx: Ptt Index.
  *
- * @return struct qed_ptt *
+ * Return: struct qed_ptt *.
  */
 struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
 				     enum reserved_ptts ptt_idx);
 
 /**
- * @brief qed_wr - Write value to BAR using the given ptt
+ * qed_wr(): Write value to BAR using the given ptt.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @val: Val.
+ * @hw_addr: HW address
  *
- * @param p_hwfn
- * @param p_ptt
- * @param val
- * @param hw_addr
+ * Return: Void.
  */
 void qed_wr(struct qed_hwfn *p_hwfn,
 	    struct qed_ptt *p_ptt,
@@ -139,26 +148,28 @@ void qed_wr(struct qed_hwfn *p_hwfn,
 	    u32 val);
 
 /**
- * @brief qed_rd - Read value from BAR using the given ptt
+ * qed_rd(): Read value from BAR using the given ptt.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @hw_addr: HW address
  *
- * @param p_hwfn
- * @param p_ptt
- * @param val
- * @param hw_addr
+ * Return: Void.
  */
 u32 qed_rd(struct qed_hwfn *p_hwfn,
 	   struct qed_ptt *p_ptt,
 	   u32 hw_addr);
 
 /**
- * @brief qed_memcpy_from - copy n bytes from BAR using the given
- *        ptt
- *
- * @param p_hwfn
- * @param p_ptt
- * @param dest
- * @param hw_addr
- * @param n
+ * qed_memcpy_from(): Copy n bytes from BAR using the given ptt.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @dest: Destination.
+ * @hw_addr: HW address.
+ * @n: N
+ *
+ * Return: Void.
  */
 void qed_memcpy_from(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt,
@@ -167,14 +178,15 @@ void qed_memcpy_from(struct qed_hwfn *p_hwfn,
 		     size_t n);
 
 /**
- * @brief qed_memcpy_to - copy n bytes to BAR using the given
- *        ptt
- *
- * @param p_hwfn
- * @param p_ptt
- * @param hw_addr
- * @param src
- * @param n
+ * qed_memcpy_to(): Copy n bytes to BAR using the given  ptt
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @hw_addr: HW address.
+ * @src: Source.
+ * @n: N
+ *
+ * Return: Void.
  */
 void qed_memcpy_to(struct qed_hwfn *p_hwfn,
 		   struct qed_ptt *p_ptt,
@@ -182,83 +194,97 @@ void qed_memcpy_to(struct qed_hwfn *p_hwfn,
 		   void *src,
 		   size_t n);
 /**
- * @brief qed_fid_pretend - pretend to another function when
- *        accessing the ptt window. There is no way to unpretend
- *        a function. The only way to cancel a pretend is to
- *        pretend back to the original function.
- *
- * @param p_hwfn
- * @param p_ptt
- * @param fid - fid field of pxp_pretend structure. Can contain
- *            either pf / vf, port/path fields are don't care.
+ * qed_fid_pretend(): pretend to another function when
+ *                    accessing the ptt window. There is no way to unpretend
+ *                    a function. The only way to cancel a pretend is to
+ *                    pretend back to the original function.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @fid: fid field of pxp_pretend structure. Can contain
+ *        either pf / vf, port/path fields are don't care.
+ *
+ * Return: Void.
  */
 void qed_fid_pretend(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt,
 		     u16 fid);
 
 /**
- * @brief qed_port_pretend - pretend to another port when
- *        accessing the ptt window
+ * qed_port_pretend(): Pretend to another port when accessing the ptt window
  *
- * @param p_hwfn
- * @param p_ptt
- * @param port_id - the port to pretend to
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @port_id: The port to pretend to
+ *
+ * Return: Void.
  */
 void qed_port_pretend(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
 		      u8 port_id);
 
 /**
- * @brief qed_port_unpretend - cancel any previously set port
- *        pretend
+ * qed_port_unpretend(): Cancel any previously set port pretend
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Return: Void.
  */
 void qed_port_unpretend(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_port_fid_pretend - pretend to another port and another function
- *        when accessing the ptt window
+ * qed_port_fid_pretend(): Pretend to another port and another function
+ *                         when accessing the ptt window
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @port_id: The port to pretend to
+ * @fid: fid field of pxp_pretend structure. Can contain either pf / vf.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param port_id - the port to pretend to
- * @param fid - fid field of pxp_pretend structure. Can contain either pf / vf.
+ * Return: Void.
  */
 void qed_port_fid_pretend(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 port_id, u16 fid);
 
 /**
- * @brief qed_vfid_to_concrete - build a concrete FID for a
- *        given VF ID
+ * qed_vfid_to_concrete(): Build a concrete FID for a given VF ID
  *
- * @param p_hwfn
- * @param p_ptt
- * @param vfid
+ * @p_hwfn: HW device data.
+ * @vfid: VFID.
+ *
+ * Return: Void.
  */
 u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid);
 
 /**
- * @brief qed_dmae_idx_to_go_cmd - map the idx to dmae cmd
- * this is declared here since other files will require it.
- * @param idx
+ * qed_dmae_idx_to_go_cmd(): Map the idx to dmae cmd
+ *    this is declared here since other files will require it.
+ *
+ * @idx: Index
+ *
+ * Return: Void.
  */
 u32 qed_dmae_idx_to_go_cmd(u8 idx);
 
 /**
- * @brief qed_dmae_info_alloc - Init the dmae_info structure
- * which is part of p_hwfn.
- * @param p_hwfn
+ * qed_dmae_info_alloc(): Init the dmae_info structure
+ *                        which is part of p_hwfn.
+ *
+ * @p_hwfn: HW device data.
+ *
+ * Return: Int.
  */
 int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_dmae_info_free - Free the dmae_info structure
- * which is part of p_hwfn
+ * qed_dmae_info_free(): Free the dmae_info structure
+ *                       which is part of p_hwfn.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_dmae_info_free(struct qed_hwfn *p_hwfn);
 
@@ -292,14 +318,16 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 #define QED_HW_ERR_MAX_STR_SIZE 256
 
 /**
- * @brief qed_hw_err_notify - Notify upper layer driver and management FW
- *	about a HW error.
- *
- * @param p_hwfn
- * @param p_ptt
- * @param err_type
- * @param fmt - debug data buffer to send to the MFW
- * @param ... - buffer format args
+ * qed_hw_err_notify(): Notify upper layer driver and management FW
+ *                      about a HW error.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @err_type: Err Type.
+ * @fmt: Debug data buffer to send to the MFW
+ * @...: buffer format args
+ *
+ * Return void.
  */
 void __printf(4, 5) __cold qed_hw_err_notify(struct qed_hwfn *p_hwfn,
 					     struct qed_ptt *p_ptt,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
index a573c8921982..1dbc460c9eec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.h
@@ -12,23 +12,24 @@
 #include "qed.h"
 
 /**
- * @brief qed_init_iro_array - init iro_arr.
+ * qed_init_iro_array(): init iro_arr.
  *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Void.
  */
 void qed_init_iro_array(struct qed_dev *cdev);
 
 /**
- * @brief qed_init_run - Run the init-sequence.
+ * qed_init_run(): Run the init-sequence.
  *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @phase: Phase.
+ * @phase_id: Phase ID.
+ * @modes: Mode.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param phase
- * @param phase_id
- * @param modes
- * @return _qed_status_t
+ * Return: _qed_status_t
  */
 int qed_init_run(struct qed_hwfn *p_hwfn,
 		 struct qed_ptt *p_ptt,
@@ -37,30 +38,31 @@ int qed_init_run(struct qed_hwfn *p_hwfn,
 		 int modes);
 
 /**
- * @brief qed_init_hwfn_allocate - Allocate RT array, Store 'values' ptrs.
+ * qed_init_alloc(): Allocate RT array, Store 'values' ptrs.
  *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
- *
- * @return _qed_status_t
+ * Return: _qed_status_t.
  */
 int qed_init_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_init_hwfn_deallocate
+ * qed_init_free(): Init HW function deallocate.
  *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_init_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_init_store_rt_reg - Store a configuration value in the RT array.
+ * qed_init_store_rt_reg(): Store a configuration value in the RT array.
  *
+ * @p_hwfn: HW device data.
+ * @rt_offset: RT offset.
+ * @val: Val.
  *
- * @param p_hwfn
- * @param rt_offset
- * @param val
+ * Return: Void.
  */
 void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
 			   u32 rt_offset,
@@ -72,15 +74,6 @@ void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn,
 #define OVERWRITE_RT_REG(hwfn, offset, val) \
 	qed_init_store_rt_reg(hwfn, offset, val)
 
-/**
- * @brief
- *
- *
- * @param p_hwfn
- * @param rt_offset
- * @param val
- * @param size
- */
 void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
 			   u32 rt_offset,
 			   u32 *val,
@@ -90,11 +83,12 @@ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn,
 	qed_init_store_rt_agg(hwfn, offset, (u32 *)&val, sizeof(val))
 
 /**
- * @brief
- *      Initialize GTT global windows and set admin window
- *      related params of GTT/PTT to default values.
+ * qed_gtt_init(): Initialize GTT global windows and set admin window
+ *                 related params of GTT/PTT to default values.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return Void.
  */
 void qed_gtt_init(struct qed_hwfn *p_hwfn);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index c5550e96bbe1..eb8e0f4242d7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -53,51 +53,54 @@ enum qed_coalescing_fsm {
 };
 
 /**
- * @brief qed_int_igu_enable_int - enable device interrupts
+ * qed_int_igu_enable_int(): Enable device interrupts.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param int_mode - interrupt mode to use
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @int_mode: Interrupt mode to use.
+ *
+ * Return: Void.
  */
 void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt,
 			    enum qed_int_mode int_mode);
 
 /**
- * @brief qed_int_igu_disable_int - disable device interrupts
+ * qed_int_igu_disable_int():  Disable device interrupts.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Return: Void.
  */
 void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_int_igu_read_sisr_reg - Reads the single isr multiple dpc
- *        register from igu.
+ * qed_int_igu_read_sisr_reg(): Reads the single isr multiple dpc
+ *                             register from igu.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return u64
+ * Return: u64.
  */
 u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn);
 
 #define QED_SP_SB_ID 0xffff
 /**
- * @brief qed_int_sb_init - Initializes the sb_info structure.
+ * qed_int_sb_init(): Initializes the sb_info structure.
  *
- * once the structure is initialized it can be passed to sb related functions.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @sb_info: points to an uninitialized (but allocated) sb_info structure
+ * @sb_virt_addr: SB Virtual address.
+ * @sb_phy_addr: SB Physial address.
+ * @sb_id: the sb_id to be used (zero based in driver)
+ *           should use QED_SP_SB_ID for SP Status block
  *
- * @param p_hwfn
- * @param p_ptt
- * @param sb_info	points to an uninitialized (but
- *			allocated) sb_info structure
- * @param sb_virt_addr
- * @param sb_phy_addr
- * @param sb_id	the sb_id to be used (zero based in driver)
- *			should use QED_SP_SB_ID for SP Status block
+ * Return: int.
  *
- * @return int
+ * Once the structure is initialized it can be passed to sb related functions.
  */
 int qed_int_sb_init(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
@@ -106,82 +109,91 @@ int qed_int_sb_init(struct qed_hwfn *p_hwfn,
 		    dma_addr_t sb_phy_addr,
 		    u16 sb_id);
 /**
- * @brief qed_int_sb_setup - Setup the sb.
+ * qed_int_sb_setup(): Setup the sb.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @sb_info: Initialized sb_info structure.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param sb_info	initialized sb_info structure
+ * Return: Void.
  */
 void qed_int_sb_setup(struct qed_hwfn *p_hwfn,
 		      struct qed_ptt *p_ptt,
 		      struct qed_sb_info *sb_info);
 
 /**
- * @brief qed_int_sb_release - releases the sb_info structure.
+ * qed_int_sb_release(): Releases the sb_info structure.
  *
- * once the structure is released, it's memory can be freed
+ * @p_hwfn: HW device data.
+ * @sb_info: Points to an allocated sb_info structure.
+ * @sb_id: The sb_id to be used (zero based in driver)
+ *         should never be equal to QED_SP_SB_ID
+ *         (SP Status block).
  *
- * @param p_hwfn
- * @param sb_info	points to an allocated sb_info structure
- * @param sb_id		the sb_id to be used (zero based in driver)
- *			should never be equal to QED_SP_SB_ID
- *			(SP Status block)
+ * Return: int.
  *
- * @return int
+ * Once the structure is released, it's memory can be freed.
  */
 int qed_int_sb_release(struct qed_hwfn *p_hwfn,
 		       struct qed_sb_info *sb_info,
 		       u16 sb_id);
 
 /**
- * @brief qed_int_sp_dpc - To be called when an interrupt is received on the
- *        default status block.
+ * qed_int_sp_dpc(): To be called when an interrupt is received on the
+ *                   default status block.
  *
- * @param p_hwfn - pointer to hwfn
+ * @t: Tasklet.
+ *
+ * Return: Void.
  *
  */
 void qed_int_sp_dpc(struct tasklet_struct *t);
 
 /**
- * @brief qed_int_get_num_sbs - get the number of status
- *        blocks configured for this funciton in the igu.
+ * qed_int_get_num_sbs(): Get the number of status blocks configured
+ *                        for this funciton in the igu.
  *
- * @param p_hwfn
- * @param p_sb_cnt_info
+ * @p_hwfn: HW device data.
+ * @p_sb_cnt_info: Pointer to SB count info.
  *
- * @return int - number of status blocks configured
+ * Return: Void.
  */
 void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
 			 struct qed_sb_cnt_info *p_sb_cnt_info);
 
 /**
- * @brief qed_int_disable_post_isr_release - performs the cleanup post ISR
+ * qed_int_disable_post_isr_release(): Performs the cleanup post ISR
  *        release. The API need to be called after releasing all slowpath IRQs
  *        of the device.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
+ * Return: Void.
  */
 void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
 /**
- * @brief qed_int_attn_clr_enable - sets whether the general behavior is
+ * qed_int_attn_clr_enable: Sets whether the general behavior is
  *        preventing attentions from being reasserted, or following the
  *        attributes of the specific attention.
  *
- * @param cdev
- * @param clr_enable
+ * @cdev: Qed dev pointer.
+ * @clr_enable: Clear enable
+ *
+ * Return: Void.
  *
  */
 void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
 
 /**
- * @brief - Doorbell Recovery handler.
+ * qed_db_rec_handler(): Doorbell Recovery handler.
  *          Run doorbell recovery in case of PF overflow (and flush DORQ if
  *          needed).
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Int.
  */
 int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
@@ -223,30 +235,34 @@ struct qed_igu_info {
 };
 
 /**
- * @brief - Make sure the IGU CAM reflects the resources provided by MFW
+ * qed_int_igu_reset_cam(): Make sure the IGU CAM reflects the resources
+ *                          provided by MFW.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Return: Void.
  */
 int qed_int_igu_reset_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Translate the weakly-defined client sb-id into an IGU sb-id
+ * qed_get_igu_sb_id(): Translate the weakly-defined client sb-id into
+ *                      an IGU sb-id
  *
- * @param p_hwfn
- * @param sb_id - user provided sb_id
+ * @p_hwfn: HW device data.
+ * @sb_id: user provided sb_id.
  *
- * @return an index inside IGU CAM where the SB resides
+ * Return: An index inside IGU CAM where the SB resides.
  */
 u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id);
 
 /**
- * @brief return a pointer to an unused valid SB
+ * qed_get_igu_free_sb(): Return a pointer to an unused valid SB
  *
- * @param p_hwfn
- * @param b_is_pf - true iff we want a SB belonging to a PF
+ * @p_hwfn: HW device data.
+ * @b_is_pf: True iff we want a SB belonging to a PF.
  *
- * @return point to an igu_block, NULL if none is available
+ * Return: Point to an igu_block, NULL if none is available.
  */
 struct qed_igu_block *qed_get_igu_free_sb(struct qed_hwfn *p_hwfn,
 					  bool b_is_pf);
@@ -259,15 +275,15 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn,
 void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_int_igu_read_cam - Reads the IGU CAM.
+ * qed_int_igu_read_cam():  Reads the IGU CAM.
  *	This function needs to be called during hardware
  *	prepare. It reads the info from igu cam to know which
  *	status block is the default / base status block etc.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int
+ * Return: Int.
  */
 int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt);
@@ -275,24 +291,22 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn,
 typedef int (*qed_int_comp_cb_t)(struct qed_hwfn *p_hwfn,
 				 void *cookie);
 /**
- * @brief qed_int_register_cb - Register callback func for
- *      slowhwfn statusblock.
- *
- *	Every protocol that uses the slowhwfn status block
- *	should register a callback function that will be called
- *	once there is an update of the sp status block.
- *
- * @param p_hwfn
- * @param comp_cb - function to be called when there is an
- *                  interrupt on the sp sb
- *
- * @param cookie  - passed to the callback function
- * @param sb_idx  - OUT parameter which gives the chosen index
- *                  for this protocol.
- * @param p_fw_cons  - pointer to the actual address of the
- *                     consumer for this protocol.
- *
- * @return int
+ * qed_int_register_cb(): Register callback func for slowhwfn statusblock.
+ *
+ * @p_hwfn: HW device data.
+ * @comp_cb: Function to be called when there is an
+ *           interrupt on the sp sb
+ * @cookie: Passed to the callback function
+ * @sb_idx: (OUT) parameter which gives the chosen index
+ *           for this protocol.
+ * @p_fw_cons: Pointer to the actual address of the
+ *             consumer for this protocol.
+ *
+ * Return: Int.
+ *
+ * Every protocol that uses the slowhwfn status block
+ * should register a callback function that will be called
+ * once there is an update of the sp status block.
  */
 int qed_int_register_cb(struct qed_hwfn *p_hwfn,
 			qed_int_comp_cb_t comp_cb,
@@ -301,37 +315,40 @@ int qed_int_register_cb(struct qed_hwfn *p_hwfn,
 			__le16 **p_fw_cons);
 
 /**
- * @brief qed_int_unregister_cb - Unregisters callback
- *      function from sp sb.
- *      Partner of qed_int_register_cb -> should be called
- *      when no longer required.
+ * qed_int_unregister_cb(): Unregisters callback function from sp sb.
+ *
+ * @p_hwfn: HW device data.
+ * @pi: Producer Index.
  *
- * @param p_hwfn
- * @param pi
+ * Return: Int.
  *
- * @return int
+ * Partner of qed_int_register_cb -> should be called
+ * when no longer required.
  */
 int qed_int_unregister_cb(struct qed_hwfn *p_hwfn,
 			  u8 pi);
 
 /**
- * @brief qed_int_get_sp_sb_id - Get the slowhwfn sb id.
+ * qed_int_get_sp_sb_id(): Get the slowhwfn sb id.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return u16
+ * Return: u16.
  */
 u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief Status block cleanup. Should be called for each status
- *        block that will be used -> both PF / VF
- *
- * @param p_hwfn
- * @param p_ptt
- * @param igu_sb_id	- igu status block id
- * @param opaque	- opaque fid of the sb owner.
- * @param b_set		- set(1) / clear(0)
+ * qed_int_igu_init_pure_rt_single(): Status block cleanup.
+ *                                    Should be called for each status
+ *                                    block that will be used -> both PF / VF.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @igu_sb_id: IGU status block id.
+ * @opaque: Opaque fid of the sb owner.
+ * @b_set: Set(1) / Clear(0).
+ *
+ * Return: Void.
  */
 void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
@@ -340,15 +357,16 @@ void qed_int_igu_init_pure_rt_single(struct qed_hwfn *p_hwfn,
 				     bool b_set);
 
 /**
- * @brief qed_int_cau_conf - configure cau for a given status
- *        block
- *
- * @param p_hwfn
- * @param ptt
- * @param sb_phys
- * @param igu_sb_id
- * @param vf_number
- * @param vf_valid
+ * qed_int_cau_conf_sb(): Configure cau for a given status block.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @sb_phys: SB Physical.
+ * @igu_sb_id: IGU status block id.
+ * @vf_number: VF number
+ * @vf_valid: VF valid or not.
+ *
+ * Return: Void.
  */
 void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 			 struct qed_ptt *p_ptt,
@@ -358,52 +376,58 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
 			 u8 vf_valid);
 
 /**
- * @brief qed_int_alloc
+ * qed_int_alloc(): QED interrupt alloc.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int
+ * Return: Int.
  */
 int qed_int_alloc(struct qed_hwfn *p_hwfn,
 		  struct qed_ptt *p_ptt);
 
 /**
- * @brief qed_int_free
+ * qed_int_free(): QED interrupt free.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_int_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_int_setup
+ * qed_int_setup(): QED interrupt setup.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Void.
  */
 void qed_int_setup(struct qed_hwfn *p_hwfn,
 		   struct qed_ptt *p_ptt);
 
 /**
- * @brief - Enable Interrupt & Attention for hw function
+ * qed_int_igu_enable(): Enable Interrupt & Attention for hw function.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param int_mode
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @int_mode: Interrut mode
  *
- * @return int
+ * Return: Int.
  */
 int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 		       enum qed_int_mode int_mode);
 
 /**
- * @brief - Initialize CAU status block entry
+ * qed_init_cau_sb_entry(): Initialize CAU status block entry.
+ *
+ * @p_hwfn: HW device data.
+ * @p_sb_entry: Pointer SB entry.
+ * @pf_id: PF number
+ * @vf_number: VF number
+ * @vf_valid: VF valid or not.
  *
- * @param p_hwfn
- * @param p_sb_entry
- * @param pf_id
- * @param vf_number
- * @param vf_valid
+ * Return: Void.
  */
 void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn,
 			   struct cau_sb_entry *p_sb_entry,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
index dab7a5d09f87..dec2b00259d4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -34,10 +34,13 @@ void qed_iscsi_setup(struct qed_hwfn *p_hwfn);
 void qed_iscsi_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief - Fills provided statistics struct with statistics.
+ * qed_get_protocol_stats_iscsi(): Fills provided statistics
+ *                                 struct with statistics.
  *
- * @param cdev
- * @param stats - points to struct that will be filled with statistics.
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
  */
 void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
 				  struct qed_mcp_iscsi_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 8eceeebb1a7b..2ab7f3f0cf6c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -92,18 +92,18 @@ struct qed_filter_mcast {
 };
 
 /**
- * @brief qed_eth_rx_queue_stop - This ramrod closes an Rx queue
+ * qed_eth_rx_queue_stop(): This ramrod closes an Rx queue.
  *
- * @param p_hwfn
- * @param p_rxq			Handler of queue to close
- * @param eq_completion_only	If True completion will be on
- *				EQe, if False completion will be
- *				on EQe if p_hwfn opaque
- *				different from the RXQ opaque
- *				otherwise on CQe.
- * @param cqe_completion	If True completion will be
- *				receive on CQe.
- * @return int
+ * @p_hwfn: HW device data.
+ * @p_rxq: Handler of queue to close
+ * @eq_completion_only: If True completion will be on
+ *                      EQe, if False completion will be
+ *                      on EQe if p_hwfn opaque
+ *                      different from the RXQ opaque
+ *                      otherwise on CQe.
+ * @cqe_completion: If True completion will be receive on CQe.
+ *
+ * Return: Int.
  */
 int
 qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
@@ -111,12 +111,12 @@ qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
 		      bool eq_completion_only, bool cqe_completion);
 
 /**
- * @brief qed_eth_tx_queue_stop - closes a Tx queue
+ * qed_eth_tx_queue_stop(): Closes a Tx queue.
  *
- * @param p_hwfn
- * @param p_txq - handle to Tx queue needed to be closed
+ * @p_hwfn: HW device data.
+ * @p_txq: handle to Tx queue needed to be closed.
  *
- * @return int
+ * Return: Int.
  */
 int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_txq);
 
@@ -205,16 +205,15 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn,
 			struct qed_spq_comp_cb *p_comp_data);
 
 /**
- * @brief qed_sp_vport_stop -
- *
- * This ramrod closes a VPort after all its RX and TX queues are terminated.
- * An Assert is generated if any queues are left open.
+ * qed_sp_vport_stop: This ramrod closes a VPort after all its
+ *                    RX and TX queues are terminated.
+ *                    An Assert is generated if any queues are left open.
  *
- * @param p_hwfn
- * @param opaque_fid
- * @param vport_id VPort ID
+ * @p_hwfn: HW device data.
+ * @opaque_fid: Opaque FID
+ * @vport_id: VPort ID.
  *
- * @return int
+ * Return: Int.
  */
 int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id);
 
@@ -225,22 +224,21 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
 			    struct qed_spq_comp_cb *p_comp_data);
 
 /**
- * @brief qed_sp_rx_eth_queues_update -
- *
- * This ramrod updates an RX queue. It is used for setting the active state
- * of the queue and updating the TPA and SGE parameters.
+ * qed_sp_eth_rx_queues_update(): This ramrod updates an RX queue.
+ *                                It is used for setting the active state
+ *                                of the queue and updating the TPA and
+ *                                SGE parameters.
+ * @p_hwfn: HW device data.
+ * @pp_rxq_handlers: An array of queue handlers to be updated.
+ * @num_rxqs: number of queues to update.
+ * @complete_cqe_flg: Post completion to the CQE Ring if set.
+ * @complete_event_flg: Post completion to the Event Ring if set.
+ * @comp_mode: Comp mode.
+ * @p_comp_data: Pointer Comp data.
  *
- * @note At the moment - only used by non-linux VFs.
+ * Return: Int.
  *
- * @param p_hwfn
- * @param pp_rxq_handlers	An array of queue handlers to be updated.
- * @param num_rxqs              number of queues to update.
- * @param complete_cqe_flg	Post completion to the CQE Ring if set
- * @param complete_event_flg	Post completion to the Event Ring if set
- * @param comp_mode
- * @param p_comp_data
- *
- * @return int
+ * Note At the moment - only used by non-linux VFs.
  */
 
 int
@@ -257,30 +255,32 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
 void qed_reset_vport_stats(struct qed_dev *cdev);
 
 /**
- * *@brief qed_arfs_mode_configure -
- *
- **Enable or disable rfs mode. It must accept atleast one of tcp or udp true
- **and atleast one of ipv4 or ipv6 true to enable rfs mode.
+ * qed_arfs_mode_configure(): Enable or disable rfs mode.
+ *                            It must accept at least one of tcp or udp true
+ *                            and at least one of ipv4 or ipv6 true to enable
+ *                            rfs mode.
  *
- **@param p_hwfn
- **@param p_ptt
- **@param p_cfg_params - arfs mode configuration parameters.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_cfg_params: arfs mode configuration parameters.
  *
+ * Return. Void.
  */
 void qed_arfs_mode_configure(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt,
 			     struct qed_arfs_config_params *p_cfg_params);
 
 /**
- * @brief - qed_configure_rfs_ntuple_filter
+ * qed_configure_rfs_ntuple_filter(): This ramrod should be used to add
+ *                                     or remove arfs hw filter
  *
- * This ramrod should be used to add or remove arfs hw filter
+ * @p_hwfn: HW device data.
+ * @p_cb: Used for QED_SPQ_MODE_CB,where client would initialize
+ *        it with cookie and callback function address, if not
+ *        using this mode then client must pass NULL.
+ * @p_params: Pointer to params.
  *
- * @params p_hwfn
- * @params p_cb - Used for QED_SPQ_MODE_CB,where client would initialize
- *		  it with cookie and callback function address, if not
- *		  using this mode then client must pass NULL.
- * @params p_params
+ * Return: Void.
  */
 int
 qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
@@ -374,16 +374,17 @@ qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 		       struct qed_sp_vport_start_params *p_params);
 
 /**
- * @brief - Starts an Rx queue, when queue_cid is already prepared
+ * qed_eth_rxq_start_ramrod(): Starts an Rx queue, when queue_cid is
+ *                             already prepared
  *
- * @param p_hwfn
- * @param p_cid
- * @param bd_max_bytes
- * @param bd_chain_phys_addr
- * @param cqe_pbl_addr
- * @param cqe_pbl_size
+ * @p_hwfn: HW device data.
+ * @p_cid: Pointer CID.
+ * @bd_max_bytes: Max bytes.
+ * @bd_chain_phys_addr: Chain physcial address.
+ * @cqe_pbl_addr: PBL address.
+ * @cqe_pbl_size: PBL size.
  *
- * @return int
+ * Return: Int.
  */
 int
 qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
@@ -393,15 +394,16 @@ qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 			 dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
 
 /**
- * @brief - Starts a Tx queue, where queue_cid is already prepared
+ * qed_eth_txq_start_ramrod(): Starts a Tx queue, where queue_cid is
+ *                             already prepared
  *
- * @param p_hwfn
- * @param p_cid
- * @param pbl_addr
- * @param pbl_size
- * @param p_pq_params - parameters for choosing the PQ for this Tx queue
+ * @p_hwfn: HW device data.
+ * @p_cid: Pointer CID.
+ * @pbl_addr: PBL address.
+ * @pbl_size: PBL size.
+ * @pq_id: Parameters for choosing the PQ for this Tx queue.
  *
- * @return int
+ * Return: Int.
  */
 int
 qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index df88d00053a2..f80f7739ff8d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -119,41 +119,41 @@ struct qed_ll2_info {
 extern const struct qed_ll2_ops qed_ll2_ops_pass;
 
 /**
- * @brief qed_ll2_acquire_connection - allocate resources,
- *        starts rx & tx (if relevant) queues pair. Provides
- *        connecion handler as output parameter.
+ * qed_ll2_acquire_connection(): Allocate resources,
+ *                               starts rx & tx (if relevant) queues pair.
+ *                               Provides connecion handler as output
+ *                               parameter.
  *
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @data: Describes connection parameters.
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param data - describes connection parameters
- * @return int
+ * Return: Int.
  */
 int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data);
 
 /**
- * @brief qed_ll2_establish_connection - start previously
- *        allocated LL2 queues pair
+ * qed_ll2_establish_connection(): start previously allocated LL2 queues pair
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param p_ptt
- * @param connection_handle	LL2 connection's handle obtained from
- *                              qed_ll2_require_connection
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                     qed_ll2_require_connection.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_establish_connection(void *cxt, u8 connection_handle);
 
 /**
- * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue.
+ * qed_ll2_post_rx_buffer(): Submit buffers to LL2 Rx queue.
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle	LL2 connection's handle obtained from
- *				qed_ll2_require_connection
- * @param addr			rx (physical address) buffers to submit
- * @param cookie
- * @param notify_fw		produce corresponding Rx BD immediately
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                     qed_ll2_require_connection.
+ * @addr: RX (physical address) buffers to submit.
+ * @buf_len: Buffer Len.
+ * @cookie: Cookie.
+ * @notify_fw: Produce corresponding Rx BD immediately.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_post_rx_buffer(void *cxt,
 			   u8 connection_handle,
@@ -161,15 +161,15 @@ int qed_ll2_post_rx_buffer(void *cxt,
 			   u16 buf_len, void *cookie, u8 notify_fw);
 
 /**
- * @brief qed_ll2_prepare_tx_packet - request for start Tx BD
- *				      to prepare Tx packet submission to FW.
+ * qed_ll2_prepare_tx_packet(): Request for start Tx BD
+ *				to prepare Tx packet submission to FW.
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle
- * @param pkt - info regarding the tx packet
- * @param notify_fw - issue doorbell to fw for this packet
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: Connection handle.
+ * @pkt: Info regarding the tx packet.
+ * @notify_fw: Issue doorbell to fw for this packet.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_prepare_tx_packet(void *cxt,
 			      u8 connection_handle,
@@ -177,81 +177,83 @@ int qed_ll2_prepare_tx_packet(void *cxt,
 			      bool notify_fw);
 
 /**
- * @brief qed_ll2_release_connection -	releases resources
- *					allocated for LL2 connection
+ * qed_ll2_release_connection(): Releases resources allocated for LL2
+ *                               connection.
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle		LL2 connection's handle obtained from
- *					qed_ll2_require_connection
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                     qed_ll2_require_connection.
+ *
+ * Return: Void.
  */
 void qed_ll2_release_connection(void *cxt, u8 connection_handle);
 
 /**
- * @brief qed_ll2_set_fragment_of_tx_packet -	provides fragments to fill
- *						Tx BD of BDs requested by
- *						qed_ll2_prepare_tx_packet
+ * qed_ll2_set_fragment_of_tx_packet(): Provides fragments to fill
+ *                                      Tx BD of BDs requested by
+ *                                      qed_ll2_prepare_tx_packet
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle			LL2 connection's handle
- *						obtained from
- *						qed_ll2_require_connection
- * @param addr
- * @param nbytes
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                     qed_ll2_require_connection.
+ * @addr: Address.
+ * @nbytes: Number of bytes.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_set_fragment_of_tx_packet(void *cxt,
 				      u8 connection_handle,
 				      dma_addr_t addr, u16 nbytes);
 
 /**
- * @brief qed_ll2_terminate_connection -	stops Tx/Rx queues
- *
+ * qed_ll2_terminate_connection(): Stops Tx/Rx queues
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle			LL2 connection's handle
- *						obtained from
- *						qed_ll2_require_connection
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                    qed_ll2_require_connection.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_terminate_connection(void *cxt, u8 connection_handle);
 
 /**
- * @brief qed_ll2_get_stats -	get LL2 queue's statistics
- *
+ * qed_ll2_get_stats(): Get LL2 queue's statistics
  *
- * @param cxt - pointer to the hw-function [opaque to some]
- * @param connection_handle	LL2 connection's handle obtained from
- *				qed_ll2_require_connection
- * @param p_stats
+ * @cxt: Pointer to the hw-function [opaque to some].
+ * @connection_handle: LL2 connection's handle obtained from
+ *                    qed_ll2_require_connection.
+ * @p_stats: Pointer Status.
  *
- * @return 0 on success, failure otherwise
+ * Return: 0 on success, failure otherwise.
  */
 int qed_ll2_get_stats(void *cxt,
 		      u8 connection_handle, struct qed_ll2_stats *p_stats);
 
 /**
- * @brief qed_ll2_alloc - Allocates LL2 connections set
+ * qed_ll2_alloc(): Allocates LL2 connections set.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_ll2_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ll2_setup - Inits LL2 connections set
+ * qed_ll2_setup(): Inits LL2 connections set.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  *
  */
 void qed_ll2_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_ll2_free - Releases LL2 connections set
+ * qed_ll2_free(): Releases LL2 connections set
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  *
  */
 void qed_ll2_free(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 8edb450d0abf..352b757183e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -266,97 +266,97 @@ union qed_mfw_tlv_data {
 #define QED_NVM_CFG_OPTION_ENTITY_SEL	BIT(4)
 
 /**
- * @brief - returns the link params of the hw function
+ * qed_mcp_get_link_params(): Returns the link params of the hw function.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @returns pointer to link params
+ * Returns: Pointer to link params.
  */
-struct qed_mcp_link_params *qed_mcp_get_link_params(struct qed_hwfn *);
+struct qed_mcp_link_params *qed_mcp_get_link_params(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief - return the link state of the hw function
+ * qed_mcp_get_link_state(): Return the link state of the hw function.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @returns pointer to link state
+ * Returns: Pointer to link state.
  */
-struct qed_mcp_link_state *qed_mcp_get_link_state(struct qed_hwfn *);
+struct qed_mcp_link_state *qed_mcp_get_link_state(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief - return the link capabilities of the hw function
+ * qed_mcp_get_link_capabilities(): Return the link capabilities of the
+ *                                  hw function.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @returns pointer to link capabilities
+ * Returns: Pointer to link capabilities.
  */
 struct qed_mcp_link_capabilities
 	*qed_mcp_get_link_capabilities(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief Request the MFW to set the the link according to 'link_input'.
+ * qed_mcp_set_link(): Request the MFW to set the link according
+ *                     to 'link_input'.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param b_up - raise link if `true'. Reset link if `false'.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @b_up: Raise link if `true'. Reset link if `false'.
  *
- * @return int
+ * Return: Int.
  */
 int qed_mcp_set_link(struct qed_hwfn   *p_hwfn,
 		     struct qed_ptt     *p_ptt,
 		     bool               b_up);
 
 /**
- * @brief Get the management firmware version value
+ * qed_mcp_get_mfw_ver(): Get the management firmware version value.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_mfw_ver    - mfw version value
- * @param p_running_bundle_id	- image id in nvram; Optional.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_mfw_ver: MFW version value.
+ * @p_running_bundle_id: Image id in nvram; Optional.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - operation was successful.
  */
 int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt,
 			u32 *p_mfw_ver, u32 *p_running_bundle_id);
 
 /**
- * @brief Get the MBI version value
+ * qed_mcp_get_mbi_ver(): Get the MBI version value.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_mbi_ver - A pointer to a variable to be filled with the MBI version.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_mbi_ver: A pointer to a variable to be filled with the MBI version.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - operation was successful.
  */
 int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
 			struct qed_ptt *p_ptt, u32 *p_mbi_ver);
 
 /**
- * @brief Get media type value of the port.
+ * qed_mcp_get_media_type(): Get media type value of the port.
  *
- * @param cdev      - qed dev pointer
- * @param p_ptt
- * @param mfw_ver    - media type value
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @media_type: Media type value
  *
- * @return int -
- *      0 - Operation was successul.
- *      -EBUSY - Operation failed
+ * Return: Int - 0 - Operation was successul.
+ *              -EBUSY - Operation failed
  */
 int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt, u32 *media_type);
 
 /**
- * @brief Get transceiver data of the port.
+ * qed_mcp_get_transceiver_data(): Get transceiver data of the port.
  *
- * @param cdev      - qed dev pointer
- * @param p_ptt
- * @param p_transceiver_state - transceiver state.
- * @param p_transceiver_type - media type value
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_transceiver_state: Transceiver state.
+ * @p_tranceiver_type: Media type value.
  *
- * @return int -
- *      0 - Operation was successful.
- *      -EBUSY - Operation failed
+ * Return: Int - 0 - Operation was successul.
+ *              -EBUSY - Operation failed
  */
 int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt,
@@ -364,50 +364,48 @@ int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
 				 u32 *p_tranceiver_type);
 
 /**
- * @brief Get transceiver supported speed mask.
+ * qed_mcp_trans_speed_mask(): Get transceiver supported speed mask.
  *
- * @param cdev      - qed dev pointer
- * @param p_ptt
- * @param p_speed_mask - Bit mask of all supported speeds.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_speed_mask: Bit mask of all supported speeds.
  *
- * @return int -
- *      0 - Operation was successful.
- *      -EBUSY - Operation failed
+ * Return: Int - 0 - Operation was successul.
+ *              -EBUSY - Operation failed
  */
 
 int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *p_speed_mask);
 
 /**
- * @brief Get board configuration.
+ * qed_mcp_get_board_config(): Get board configuration.
  *
- * @param cdev      - qed dev pointer
- * @param p_ptt
- * @param p_board_config - Board config.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_board_config: Board config.
  *
- * @return int -
- *      0 - Operation was successful.
- *      -EBUSY - Operation failed
+ * Return: Int - 0 - Operation was successul.
+ *              -EBUSY - Operation failed
  */
 int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
 			     struct qed_ptt *p_ptt, u32 *p_board_config);
 
 /**
- * @brief General function for sending commands to the MCP
- *        mailbox. It acquire mutex lock for the entire
- *        operation, from sending the request until the MCP
- *        response. Waiting for MCP response will be checked up
- *        to 5 seconds every 5ms.
+ * qed_mcp_cmd(): General function for sending commands to the MCP
+ *                mailbox. It acquire mutex lock for the entire
+ *                operation, from sending the request until the MCP
+ *                response. Waiting for MCP response will be checked up
+ *                to 5 seconds every 5ms.
  *
- * @param p_hwfn     - hw function
- * @param p_ptt      - PTT required for register access
- * @param cmd        - command to be sent to the MCP.
- * @param param      - Optional param
- * @param o_mcp_resp - The MCP response code (exclude sequence).
- * @param o_mcp_param- Optional parameter provided by the MCP
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @cmd: command to be sent to the MCP.
+ * @param: Optional param
+ * @o_mcp_resp: The MCP response code (exclude sequence).
+ * @o_mcp_param: Optional parameter provided by the MCP
  *                     response
- * @return int - 0 - operation
- * was successul.
+ *
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 		struct qed_ptt *p_ptt,
@@ -417,37 +415,39 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 		u32 *o_mcp_param);
 
 /**
- * @brief - drains the nig, allowing completion to pass in case of pauses.
- *          (Should be called only from sleepable context)
+ * qed_mcp_drain(): drains the nig, allowing completion to pass in
+ *                  case of pauses.
+ *                  (Should be called only from sleepable context)
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ *
+ * Return: Int.
  */
 int qed_mcp_drain(struct qed_hwfn *p_hwfn,
 		  struct qed_ptt *p_ptt);
 
 /**
- * @brief Get the flash size value
+ * qed_mcp_get_flash_size(): Get the flash size value.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_flash_size  - flash size in bytes to be filled.
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @p_flash_size: Flash size in bytes to be filled.
  *
- * @return int - 0 - operation was successul.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_get_flash_size(struct qed_hwfn     *p_hwfn,
 			   struct qed_ptt       *p_ptt,
 			   u32 *p_flash_size);
 
 /**
- * @brief Send driver version to MFW
+ * qed_mcp_send_drv_version(): Send driver version to MFW.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param version - Version value
- * @param name - Protocol driver name
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @p_ver: Version value.
  *
- * @return int - 0 - operation was successul.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
@@ -455,146 +455,148 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_mcp_drv_version *p_ver);
 
 /**
- * @brief Read the MFW process kill counter
+ * qed_get_process_kill_counter(): Read the MFW process kill counter.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
  *
- * @return u32
+ * Return: u32.
  */
 u32 qed_get_process_kill_counter(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt);
 
 /**
- * @brief Trigger a recovery process
+ * qed_start_recovery_process(): Trigger a recovery process.
  *
- *  @param p_hwfn
- *  @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
  *
- * @return int
+ * Return: Int.
  */
 int qed_start_recovery_process(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief A recovery handler must call this function as its first step.
- *        It is assumed that the handler is not run from an interrupt context.
+ * qed_recovery_prolog(): A recovery handler must call this function
+ *                        as its first step.
+ *                        It is assumed that the handler is not run from
+ *                        an interrupt context.
  *
- *  @param cdev
- *  @param p_ptt
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: int.
  */
 int qed_recovery_prolog(struct qed_dev *cdev);
 
 /**
- * @brief Notify MFW about the change in base device properties
+ * qed_mcp_ov_update_current_config(): Notify MFW about the change in base
+ *                                    device properties
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param client - qed client type
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @client: Qed client type.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
 				     struct qed_ptt *p_ptt,
 				     enum qed_ov_client client);
 
 /**
- * @brief Notify MFW about the driver state
+ * qed_mcp_ov_update_driver_state(): Notify MFW about the driver state.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param drv_state - Driver state
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @drv_state: Driver state.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt,
 				   enum qed_ov_driver_state drv_state);
 
 /**
- * @brief Send MTU size to MFW
+ * qed_mcp_ov_update_mtu(): Send MTU size to MFW.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param mtu - MTU size
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @mtu: MTU size.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u16 mtu);
 
 /**
- * @brief Send MAC address to MFW
+ * qed_mcp_ov_update_mac(): Send MAC address to MFW.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param mac - MAC address
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @mac: MAC address.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u8 *mac);
 
 /**
- * @brief Send WOL mode to MFW
+ * qed_mcp_ov_update_wol(): Send WOL mode to MFW.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param wol - WOL mode
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @wol: WOL mode.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt,
 			  enum qed_ov_wol wol);
 
 /**
- * @brief Set LED status
+ * qed_mcp_set_led(): Set LED status.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param mode - LED mode
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @mode: LED mode.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt,
 		    enum qed_led_mode mode);
 
 /**
- * @brief Read from nvm
+ * qed_mcp_nvm_read(): Read from NVM.
  *
- *  @param cdev
- *  @param addr - nvm offset
- *  @param p_buf - nvm read buffer
- *  @param len - buffer len
+ * @cdev: Qed dev pointer.
+ * @addr: NVM offset.
+ * @p_buf: NVM read buffer.
+ * @len: Buffer len.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
 
 /**
- * @brief Write to nvm
+ * qed_mcp_nvm_write(): Write to NVM.
  *
- *  @param cdev
- *  @param addr - nvm offset
- *  @param cmd - nvm command
- *  @param p_buf - nvm write buffer
- *  @param len - buffer len
+ * @cdev: Qed dev pointer.
+ * @addr: NVM offset.
+ * @cmd: NVM command.
+ * @p_buf: NVM write buffer.
+ * @len: Buffer len.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_nvm_write(struct qed_dev *cdev,
 		      u32 cmd, u32 addr, u8 *p_buf, u32 len);
 
 /**
- * @brief Check latest response
+ * qed_mcp_nvm_resp(): Check latest response.
  *
- *  @param cdev
- *  @param p_buf - nvm write buffer
+ * @cdev: Qed dev pointer.
+ * @p_buf: NVM write buffer.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_nvm_resp(struct qed_dev *cdev, u8 *p_buf);
 
@@ -604,13 +606,13 @@ struct qed_nvm_image_att {
 };
 
 /**
- * @brief Allows reading a whole nvram image
+ * qed_mcp_get_nvm_image_att(): Allows reading a whole nvram image.
  *
- * @param p_hwfn
- * @param image_id - image to get attributes for
- * @param p_image_att - image attributes structure into which to fill data
+ * @p_hwfn: HW device data.
+ * @image_id: Image to get attributes for.
+ * @p_image_att: Image attributes structure into which to fill data.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
@@ -618,64 +620,65 @@ qed_mcp_get_nvm_image_att(struct qed_hwfn *p_hwfn,
 			  struct qed_nvm_image_att *p_image_att);
 
 /**
- * @brief Allows reading a whole nvram image
+ * qed_mcp_get_nvm_image(): Allows reading a whole nvram image.
  *
- * @param p_hwfn
- * @param image_id - image requested for reading
- * @param p_buffer - allocated buffer into which to fill data
- * @param buffer_len - length of the allocated buffer.
+ * @p_hwfn: HW device data.
+ * @image_id: image requested for reading.
+ * @p_buffer: allocated buffer into which to fill data.
+ * @buffer_len: length of the allocated buffer.
  *
- * @return 0 iff p_buffer now contains the nvram image.
+ * Return: 0 if p_buffer now contains the nvram image.
  */
 int qed_mcp_get_nvm_image(struct qed_hwfn *p_hwfn,
 			  enum qed_nvm_images image_id,
 			  u8 *p_buffer, u32 buffer_len);
 
 /**
- * @brief Bist register test
+ * qed_mcp_bist_register_test(): Bist register test.
  *
- *  @param p_hwfn    - hw function
- *  @param p_ptt     - PTT required for register access
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn,
 			       struct qed_ptt *p_ptt);
 
 /**
- * @brief Bist clock test
+ * qed_mcp_bist_clock_test(): Bist clock test.
  *
- *  @param p_hwfn    - hw function
- *  @param p_ptt     - PTT required for register access
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt);
 
 /**
- * @brief Bist nvm test - get number of images
+ * qed_mcp_bist_nvm_get_num_images(): Bist nvm test - get number of images.
  *
- *  @param p_hwfn       - hw function
- *  @param p_ptt        - PTT required for register access
- *  @param num_images   - number of images if operation was
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @num_images: number of images if operation was
  *			  successful. 0 if not.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_bist_nvm_get_num_images(struct qed_hwfn *p_hwfn,
 				    struct qed_ptt *p_ptt,
 				    u32 *num_images);
 
 /**
- * @brief Bist nvm test - get image attributes by index
+ * qed_mcp_bist_nvm_get_image_att(): Bist nvm test - get image attributes
+ *                                   by index.
  *
- *  @param p_hwfn      - hw function
- *  @param p_ptt       - PTT required for register access
- *  @param p_image_att - Attributes of image
- *  @param image_index - Index of image to get information for
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @p_image_att: Attributes of image.
+ * @image_index: Index of image to get information for.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
 				   struct qed_ptt *p_ptt,
@@ -683,23 +686,26 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
 				   u32 image_index);
 
 /**
- * @brief - Processes the TLV request from MFW i.e., get the required TLV info
- *          from the qed client and send it to the MFW.
+ * qed_mfw_process_tlv_req(): Processes the TLV request from MFW i.e.,
+ *                            get the required TLV info
+ *                            from the qed client and send it to the MFW.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Send raw debug data to the MFW
+ * qed_mcp_send_raw_debug_data(): Send raw debug data to the MFW
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_buf: raw debug data buffer.
+ * @size: Buffer size.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_buf - raw debug data buffer
- * @param size - buffer size
+ * Return : Int.
  */
 int
 qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
@@ -796,47 +802,49 @@ qed_mcp_is_ext_speed_supported(const struct qed_hwfn *p_hwfn)
 }
 
 /**
- * @brief Initialize the interface with the MCP
+ * qed_mcp_cmd_init(): Initialize the interface with the MCP.
  *
- * @param p_hwfn - HW func
- * @param p_ptt - PTT required for register access
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
  *
- * @return int
+ * Return: Int.
  */
 int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt);
 
 /**
- * @brief Initialize the port interface with the MCP
+ * qed_mcp_cmd_port_init(): Initialize the port interface with the MCP
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Void.
  *
- * @param p_hwfn
- * @param p_ptt
  * Can only be called after `num_ports_in_engines' is set
  */
 void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt);
 /**
- * @brief Releases resources allocated during the init process.
+ * qed_mcp_free(): Releases resources allocated during the init process.
  *
- * @param p_hwfn - HW func
- * @param p_ptt - PTT required for register access
+ * @p_hwfn: HW function.
  *
- * @return int
+ * Return: Int.
  */
 
 int qed_mcp_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief This function is called from the DPC context. After
- * pointing PTT to the mfw mb, check for events sent by the MCP
- * to the driver and ack them. In case a critical event
- * detected, it will be handled here, otherwise the work will be
- * queued to a sleepable work-queue.
+ * qed_mcp_handle_events(): This function is called from the DPC context.
+ *           After pointing PTT to the mfw mb, check for events sent by
+ *           the MCP to the driver and ack them. In case a critical event
+ *           detected, it will be handled here, otherwise the work will be
+ *            queued to a sleepable work-queue.
+ *
+ * @p_hwfn: HW function.
+ * @p_ptt: PTT required for register access.
  *
- * @param p_hwfn - HW function
- * @param p_ptt - PTT required for register access
- * @return int - 0 - operation
- * was successul.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt);
@@ -858,106 +866,111 @@ struct qed_load_req_params {
 };
 
 /**
- * @brief Sends a LOAD_REQ to the MFW, and in case the operation succeeds,
- *        returns whether this PF is the first on the engine/port or function.
+ * qed_mcp_load_req(): Sends a LOAD_REQ to the MFW, and in case the
+ *                     operation succeeds, returns whether this PF is
+ *                     the first on the engine/port or function.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_params
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_params: Params.
  *
- * @return int - 0 - Operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt,
 		     struct qed_load_req_params *p_params);
 
 /**
- * @brief Sends a LOAD_DONE message to the MFW
+ * qed_mcp_load_done(): Sends a LOAD_DONE message to the MFW.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int - 0 - Operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_load_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Sends a UNLOAD_REQ message to the MFW
+ * qed_mcp_unload_req(): Sends a UNLOAD_REQ message to the MFW.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int - 0 - Operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_unload_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Sends a UNLOAD_DONE message to the MFW
+ * qed_mcp_unload_done(): Sends a UNLOAD_DONE message to the MFW
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int - 0 - Operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_unload_done(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Read the MFW mailbox into Current buffer.
+ * qed_mcp_read_mb(): Read the MFW mailbox into Current buffer.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Void.
  */
 void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
 		     struct qed_ptt *p_ptt);
 
 /**
- * @brief Ack to mfw that driver finished FLR process for VFs
+ * qed_mcp_ack_vf_flr(): Ack to mfw that driver finished FLR process for VFs
  *
- * @param p_hwfn
- * @param p_ptt
- * @param vfs_to_ack - bit mask of all engine VFs for which the PF acks.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @vfs_to_ack: bit mask of all engine VFs for which the PF acks.
  *
- * @param return int - 0 upon success.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ack_vf_flr(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt, u32 *vfs_to_ack);
 
 /**
- * @brief - calls during init to read shmem of all function-related info.
+ * qed_mcp_fill_shmem_func_info(): Calls during init to read shmem of
+ *                                 all function-related info.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 				 struct qed_ptt *p_ptt);
 
 /**
- * @brief - Reset the MCP using mailbox command.
+ * qed_mcp_reset(): Reset the MCP using mailbox command.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mcp_reset(struct qed_hwfn *p_hwfn,
 		  struct qed_ptt *p_ptt);
 
 /**
- * @brief - Sends an NVM read command request to the MFW to get
- *        a buffer.
+ * qed_mcp_nvm_rd_cmd(): Sends an NVM read command request to the MFW to get
+ *                       a buffer.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or
- *            DRV_MSG_CODE_NVM_READ_NVRAM commands
- * @param param - [0:23] - Offset [24:31] - Size
- * @param o_mcp_resp - MCP response
- * @param o_mcp_param - MCP response param
- * @param o_txn_size -  Buffer size output
- * @param o_buf - Pointer to the buffer returned by the MFW.
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @cmd: (Command) DRV_MSG_CODE_NVM_GET_FILE_DATA or
+ *            DRV_MSG_CODE_NVM_READ_NVRAM commands.
+ * @param: [0:23] - Offset [24:31] - Size.
+ * @o_mcp_resp: MCP response.
+ * @o_mcp_param: MCP response param.
+ * @o_txn_size: Buffer size output.
+ * @o_buf: Pointer to the buffer returned by the MFW.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
@@ -967,60 +980,61 @@ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
 		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf);
 
 /**
- * @brief Read from sfp
+ * qed_mcp_phy_sfp_read(): Read from sfp.
  *
- *  @param p_hwfn - hw function
- *  @param p_ptt  - PTT required for register access
- *  @param port   - transceiver port
- *  @param addr   - I2C address
- *  @param offset - offset in sfp
- *  @param len    - buffer length
- *  @param p_buf  - buffer to read into
+ * @p_hwfn: HW device data.
+ * @p_ptt: PTT required for register access.
+ * @port: transceiver port.
+ * @addr: I2C address.
+ * @offset: offset in sfp.
+ * @len: buffer length.
+ * @p_buf: buffer to read into.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_phy_sfp_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			 u32 port, u32 addr, u32 offset, u32 len, u8 *p_buf);
 
 /**
- * @brief indicates whether the MFW objects [under mcp_info] are accessible
+ * qed_mcp_is_init(): indicates whether the MFW objects [under mcp_info]
+ *                    are accessible
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return true iff MFW is running and mcp_info is initialized
+ * Return: true if MFW is running and mcp_info is initialized.
  */
 bool qed_mcp_is_init(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief request MFW to configure MSI-X for a VF
+ * qed_mcp_config_vf_msix(): Request MFW to configure MSI-X for a VF.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param vf_id - absolute inside engine
- * @param num_sbs - number of entries to request
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @vf_id: absolute inside engine.
+ * @num: number of entries to request.
  *
- * @return int
+ * Return: Int.
  */
 int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt, u8 vf_id, u8 num);
 
 /**
- * @brief - Halt the MCP.
+ * qed_mcp_halt(): Halt the MCP.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief - Wake up the MCP.
+ * qed_mcp_resume: Wake up the MCP.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param return 0 upon success.
+ * Return: 0 upon success.
  */
 int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
@@ -1038,13 +1052,13 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u32 mask_parities);
 
-/* @brief - Gets the mdump retained data from the MFW.
+/* qed_mcp_mdump_get_retain(): Gets the mdump retained data from the MFW.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_mdump_retain
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_mdump_retain: mdump retain.
  *
- * @param return 0 upon success.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
@@ -1052,15 +1066,15 @@ qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
 			 struct mdump_retain_data_stc *p_mdump_retain);
 
 /**
- * @brief - Sets the MFW's max value for the given resource
+ * qed_mcp_set_resc_max_val(): Sets the MFW's max value for the given resource.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param res_id
- *  @param resc_max_val
- *  @param p_mcp_resp
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @res_id: RES ID.
+ * @resc_max_val: Resec max val.
+ * @p_mcp_resp: MCP Resp
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
@@ -1069,16 +1083,17 @@ qed_mcp_set_resc_max_val(struct qed_hwfn *p_hwfn,
 			 u32 resc_max_val, u32 *p_mcp_resp);
 
 /**
- * @brief - Gets the MFW allocation info for the given resource
+ * qed_mcp_get_resc_info(): Gets the MFW allocation info for the given
+ *                          resource.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param res_id
- *  @param p_mcp_resp
- *  @param p_resc_num
- *  @param p_resc_start
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @res_id: Res ID.
+ * @p_mcp_resp: MCP resp.
+ * @p_resc_num: Resc num.
+ * @p_resc_start: Resc start.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
@@ -1087,13 +1102,13 @@ qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
 		      u32 *p_mcp_resp, u32 *p_resc_num, u32 *p_resc_start);
 
 /**
- * @brief Send eswitch mode to MFW
+ * qed_mcp_ov_update_eswitch(): Send eswitch mode to MFW.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param eswitch - eswitch mode
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @eswitch: eswitch mode.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt,
@@ -1113,12 +1128,12 @@ enum qed_resc_lock {
 };
 
 /**
- * @brief - Initiates PF FLR
+ * qed_mcp_initiate_pf_flr(): Initiates PF FLR.
  *
- *  @param p_hwfn
- *  @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_initiate_pf_flr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 struct qed_resc_lock_params {
@@ -1151,13 +1166,13 @@ struct qed_resc_lock_params {
 };
 
 /**
- * @brief Acquires MFW generic resource lock
+ * qed_mcp_resc_lock(): Acquires MFW generic resource lock.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param p_params
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_params: Params.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_resc_lock(struct qed_hwfn *p_hwfn,
@@ -1175,13 +1190,13 @@ struct qed_resc_unlock_params {
 };
 
 /**
- * @brief Releases MFW generic resource lock
+ * qed_mcp_resc_unlock(): Releases MFW generic resource lock.
  *
- *  @param p_hwfn
- *  @param p_ptt
- *  @param p_params
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_params: Params.
  *
- * @return int - 0 - operation was successful.
+ * Return: Int - 0 - Operation was successul.
  */
 int
 qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
@@ -1189,12 +1204,15 @@ qed_mcp_resc_unlock(struct qed_hwfn *p_hwfn,
 		    struct qed_resc_unlock_params *p_params);
 
 /**
- * @brief - default initialization for lock/unlock resource structs
+ * qed_mcp_resc_lock_default_init(): Default initialization for
+ *                                   lock/unlock resource structs.
  *
- * @param p_lock - lock params struct to be initialized; Can be NULL
- * @param p_unlock - unlock params struct to be initialized; Can be NULL
- * @param resource - the requested resource
- * @paral b_is_permanent - disable retries & aging when set
+ * @p_lock: lock params struct to be initialized; Can be NULL.
+ * @p_unlock: unlock params struct to be initialized; Can be NULL.
+ * @resource: the requested resource.
+ * @b_is_permanent: disable retries & aging when set.
+ *
+ * Return: Void.
  */
 void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 				    struct qed_resc_unlock_params *p_unlock,
@@ -1202,94 +1220,117 @@ void qed_mcp_resc_lock_default_init(struct qed_resc_lock_params *p_lock,
 				    resource, bool b_is_permanent);
 
 /**
- * @brief - Return whether management firmware support smart AN
+ * qed_mcp_is_smart_an_supported(): Return whether management firmware
+ *                                  support smart AN
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return bool - true if feature is supported.
+ * Return: bool true if feature is supported.
  */
 bool qed_mcp_is_smart_an_supported(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief Learn of supported MFW features; To be done during early init
+ * qed_mcp_get_capabilities(): Learn of supported MFW features;
+ *                             To be done during early init.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Int.
  */
 int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Inform MFW of set of features supported by driver. Should be done
- * inside the content of the LOAD_REQ.
+ * qed_mcp_set_capabilities(): Inform MFW of set of features supported
+ *                             by driver. Should be done inside the content
+ *                             of the LOAD_REQ.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Return: Int.
  */
 int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Read ufp config from the shared memory.
+ * qed_mcp_read_ufp_config(): Read ufp config from the shared memory.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
  *
- * @param p_hwfn
- * @param p_ptt
+ * Return: Void.
  */
 void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Populate the nvm info shadow in the given hardware function
+ * qed_mcp_nvm_info_populate(): Populate the nvm info shadow in the given
+ *                              hardware function.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Int.
  */
 int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief Delete nvm info shadow in the given hardware function
+ * qed_mcp_nvm_info_free(): Delete nvm info shadow in the given
+ *                          hardware function.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_mcp_nvm_info_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief Get the engine affinity configuration.
+ * qed_mcp_get_engine_config(): Get the engine affinity configuration.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Int.
  */
 int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Get the PPFID bitmap.
+ * qed_mcp_get_ppfid_bitmap(): Get the PPFID bitmap.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ *
+ * Return: Int.
  */
 int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
 /**
- * @brief Get NVM config attribute value.
+ * qed_mcp_nvm_get_cfg(): Get NVM config attribute value.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @option_id: Option ID.
+ * @entity_id: Entity ID.
+ * @flags: Flags.
+ * @p_buf: Buf.
+ * @p_len: Len.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param option_id
- * @param entity_id
- * @param flags
- * @param p_buf
- * @param p_len
+ * Return: Int.
  */
 int qed_mcp_nvm_get_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
 			u32 *p_len);
 
 /**
- * @brief Set NVM config attribute value.
+ * qed_mcp_nvm_set_cfg(): Set NVM config attribute value.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param option_id
- * @param entity_id
- * @param flags
- * @param p_buf
- * @param len
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @option_id: Option ID.
+ * @entity_id: Entity ID.
+ * @flags: Flags.
+ * @p_buf: Buf.
+ * @len: Len.
+ *
+ * Return: Int.
  */
 int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			u16 option_id, u8 entity_id, u16 flags, u8 *p_buf,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
index e27dd9a4547e..7a3bd749e1e4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
@@ -6,47 +6,47 @@
 #include <linux/types.h>
 
 /**
- * @brief qed_selftest_memory - Perform memory test
+ * qed_selftest_memory(): Perform memory test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_selftest_memory(struct qed_dev *cdev);
 
 /**
- * @brief qed_selftest_interrupt - Perform interrupt test
+ * qed_selftest_interrupt(): Perform interrupt test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_selftest_interrupt(struct qed_dev *cdev);
 
 /**
- * @brief qed_selftest_register - Perform register test
+ * qed_selftest_register(): Perform register test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_selftest_register(struct qed_dev *cdev);
 
 /**
- * @brief qed_selftest_clock - Perform clock test
+ * qed_selftest_clock(): Perform clock test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_selftest_clock(struct qed_dev *cdev);
 
 /**
- * @brief qed_selftest_nvram - Perform nvram test
+ * qed_selftest_nvram(): Perform nvram test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return int
+ * Return: Int.
  */
 int qed_selftest_nvram(struct qed_dev *cdev);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index 60ff3222bf55..c5a38f3c92b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -31,23 +31,18 @@ struct qed_spq_comp_cb {
 };
 
 /**
- * @brief qed_eth_cqe_completion - handles the completion of a
- *        ramrod on the cqe ring
+ * qed_eth_cqe_completion(): handles the completion of a
+ *                           ramrod on the cqe ring.
  *
- * @param p_hwfn
- * @param cqe
+ * @p_hwfn: HW device data.
+ * @cqe: CQE.
  *
- * @return int
+ * Return: Int.
  */
 int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
 			   struct eth_slow_path_rx_cqe *cqe);
 
-/**
- *  @file
- *
- *  QED Slow-hwfn queue interface
- */
-
+ /*  QED Slow-hwfn queue interface */
 union ramrod_data {
 	struct pf_start_ramrod_data pf_start;
 	struct pf_update_ramrod_data pf_update;
@@ -207,117 +202,128 @@ struct qed_spq {
 };
 
 /**
- * @brief qed_spq_post - Posts a Slow hwfn request to FW, or lacking that
- *        Pends it to the future list.
+ * qed_spq_post(): Posts a Slow hwfn request to FW, or lacking that
+ *                 Pends it to the future list.
  *
- * @param p_hwfn
- * @param p_req
+ * @p_hwfn: HW device data.
+ * @p_ent: Ent.
+ * @fw_return_code: Return code from firmware.
  *
- * @return int
+ * Return: Int.
  */
 int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 struct qed_spq_entry *p_ent,
 		 u8 *fw_return_code);
 
 /**
- * @brief qed_spq_allocate - Alloocates & initializes the SPQ and EQ.
+ * qed_spq_alloc(): Alloocates & initializes the SPQ and EQ.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_spq_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_spq_setup - Reset the SPQ to its start state.
+ * qed_spq_setup(): Reset the SPQ to its start state.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_spq_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_spq_deallocate - Deallocates the given SPQ struct.
+ * qed_spq_free(): Deallocates the given SPQ struct.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_spq_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_spq_get_entry - Obtain an entrry from the spq
- *        free pool list.
- *
- *
+ * qed_spq_get_entry(): Obtain an entrry from the spq
+ *                      free pool list.
  *
- * @param p_hwfn
- * @param pp_ent
+ * @p_hwfn: HW device data.
+ * @pp_ent: PP ENT.
  *
- * @return int
+ * Return: Int.
  */
 int
 qed_spq_get_entry(struct qed_hwfn *p_hwfn,
 		  struct qed_spq_entry **pp_ent);
 
 /**
- * @brief qed_spq_return_entry - Return an entry to spq free
- *                                 pool list
+ * qed_spq_return_entry(): Return an entry to spq free pool list.
  *
- * @param p_hwfn
- * @param p_ent
+ * @p_hwfn: HW device data.
+ * @p_ent: P ENT.
+ *
+ * Return: Void.
  */
 void qed_spq_return_entry(struct qed_hwfn *p_hwfn,
 			  struct qed_spq_entry *p_ent);
 /**
- * @brief qed_eq_allocate - Allocates & initializes an EQ struct
+ * qed_eq_alloc(): Allocates & initializes an EQ struct.
  *
- * @param p_hwfn
- * @param num_elem number of elements in the eq
+ * @p_hwfn: HW device data.
+ * @num_elem: number of elements in the eq.
  *
- * @return int
+ * Return: Int.
  */
 int qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem);
 
 /**
- * @brief qed_eq_setup - Reset the EQ to its start state.
+ * qed_eq_setup(): Reset the EQ to its start state.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_eq_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_eq_free - deallocates the given EQ struct.
+ * qed_eq_free(): deallocates the given EQ struct.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_eq_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_eq_prod_update - update the FW with default EQ producer
+ * qed_eq_prod_update(): update the FW with default EQ producer.
+ *
+ * @p_hwfn: HW device data.
+ * @prod: Prod.
  *
- * @param p_hwfn
- * @param prod
+ * Return: Void.
  */
 void qed_eq_prod_update(struct qed_hwfn *p_hwfn,
 			u16 prod);
 
 /**
- * @brief qed_eq_completion - Completes currently pending EQ elements
+ * qed_eq_completion(): Completes currently pending EQ elements.
  *
- * @param p_hwfn
- * @param cookie
+ * @p_hwfn: HW device data.
+ * @cookie: Cookie.
  *
- * @return int
+ * Return: Int.
  */
 int qed_eq_completion(struct qed_hwfn *p_hwfn,
 		      void *cookie);
 
 /**
- * @brief qed_spq_completion - Completes a single event
+ * qed_spq_completion(): Completes a single event.
  *
- * @param p_hwfn
- * @param echo - echo value from cookie (used for determining completion)
- * @param p_data - data from cookie (used in callback function if applicable)
+ * @p_hwfn: HW device data.
+ * @echo: echo value from cookie (used for determining completion).
+ * @fw_return_code: FW return code.
+ * @p_data: data from cookie (used in callback function if applicable).
  *
- * @return int
+ * Return: Int.
  */
 int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		       __le16 echo,
@@ -325,44 +331,43 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn,
 		       union event_ring_data *p_data);
 
 /**
- * @brief qed_spq_get_cid - Given p_hwfn, return cid for the hwfn's SPQ
+ * qed_spq_get_cid(): Given p_hwfn, return cid for the hwfn's SPQ.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return u32 - SPQ CID
+ * Return: u32 - SPQ CID.
  */
 u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_consq_alloc - Allocates & initializes an ConsQ
- *        struct
+ * qed_consq_alloc(): Allocates & initializes an ConsQ struct.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_consq_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_consq_setup - Reset the ConsQ to its start state.
+ * qed_consq_setup(): Reset the ConsQ to its start state.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return Void.
  */
 void qed_consq_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_consq_free - deallocates the given ConsQ struct.
+ * qed_consq_free(): deallocates the given ConsQ struct.
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return Void.
  */
 void qed_consq_free(struct qed_hwfn *p_hwfn);
 int qed_spq_pend_post(struct qed_hwfn *p_hwfn);
 
-/**
- * @file
- *
- * @brief Slow-hwfn low-level commands (Ramrods) function definitions.
- */
+/* Slow-hwfn low-level commands (Ramrods) function definitions. */
 
 #define QED_SP_EQ_COMPLETION  0x01
 #define QED_SP_CQE_COMPLETION 0x02
@@ -377,12 +382,15 @@ struct qed_sp_init_data {
 };
 
 /**
- * @brief Returns a SPQ entry to the pool / frees the entry if allocated.
- *        Should be called on in error flows after initializing the SPQ entry
- *        and before posting it.
+ * qed_sp_destroy_request(): Returns a SPQ entry to the pool / frees the
+ *                           entry if allocated. Should be called on in error
+ *                           flows after initializing the SPQ entry
+ *                           and before posting it.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ent: Ent.
  *
- * @param p_hwfn
- * @param p_ent
+ * Return: Void.
  */
 void qed_sp_destroy_request(struct qed_hwfn *p_hwfn,
 			    struct qed_spq_entry *p_ent);
@@ -394,7 +402,14 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 			struct qed_sp_init_data *p_data);
 
 /**
- * @brief qed_sp_pf_start - PF Function Start Ramrod
+ * qed_sp_pf_start(): PF Function Start Ramrod.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: P_ptt.
+ * @p_tunn: P_tunn.
+ * @allow_npar_tx_switch: Allow NPAR TX Switch.
+ *
+ * Return: Int.
  *
  * This ramrod is sent to initialize a physical function (PF). It will
  * configure the function related parameters and write its completion to the
@@ -404,12 +419,6 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
  * allocated by the driver on host memory and its parameters are written
  * to the internal RAM of the UStorm by the Function Start Ramrod.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_tunn
- * @param allow_npar_tx_switch
- *
- * @return int
  */
 
 int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
@@ -418,47 +427,33 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 		    bool allow_npar_tx_switch);
 
 /**
- * @brief qed_sp_pf_update - PF Function Update Ramrod
+ * qed_sp_pf_update(): PF Function Update Ramrod.
  *
- * This ramrod updates function-related parameters. Every parameter can be
- * updated independently, according to configuration flags.
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Int.
  *
- * @return int
+ * This ramrod updates function-related parameters. Every parameter can be
+ * updated independently, according to configuration flags.
  */
 
 int qed_sp_pf_update(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_sp_pf_update_stag - Update firmware of new outer tag
+ * qed_sp_pf_update_stag(): Update firmware of new outer tag.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_sp_pf_update_stag(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_sp_pf_stop - PF Function Stop Ramrod
- *
- * This ramrod is sent to close a Physical Function (PF). It is the last ramrod
- * sent and the last completion written to the PFs Event Ring. This ramrod also
- * deletes the context for the Slowhwfn connection on this PF.
- *
- * @note Not required for first packet.
- *
- * @param p_hwfn
- *
- * @return int
- */
-
-/**
- * @brief qed_sp_pf_update_ufp - PF ufp update Ramrod
+ * qed_sp_pf_update_ufp(): PF ufp update Ramrod.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn);
 
@@ -470,11 +465,11 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
 			      enum spq_mode comp_mode,
 			      struct qed_spq_comp_cb *p_comp_data);
 /**
- * @brief qed_sp_heartbeat_ramrod - Send empty Ramrod
+ * qed_sp_heartbeat_ramrod(): Send empty Ramrod.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 
 int qed_sp_heartbeat_ramrod(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index eacd6457f195..9f81295c6f45 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -250,29 +250,31 @@ extern const struct qed_iov_hv_ops qed_iov_ops_pass;
 
 #ifdef CONFIG_QED_SRIOV
 /**
- * @brief Check if given VF ID @vfid is valid
- *        w.r.t. @b_enabled_only value
- *        if b_enabled_only = true - only enabled VF id is valid
- *        else any VF id less than max_vfs is valid
+ * qed_iov_is_valid_vfid(): Check if given VF ID @vfid is valid
+ *                          w.r.t. @b_enabled_only value
+ *                          if b_enabled_only = true - only enabled
+ *                          VF id is valid.
+ *                          else any VF id less than max_vfs is valid.
  *
- * @param p_hwfn
- * @param rel_vf_id - Relative VF ID
- * @param b_enabled_only - consider only enabled VF
- * @param b_non_malicious - true iff we want to validate vf isn't malicious.
+ * @p_hwfn: HW device data.
+ * @rel_vf_id: Relative VF ID.
+ * @b_enabled_only: consider only enabled VF.
+ * @b_non_malicious: true iff we want to validate vf isn't malicious.
  *
- * @return bool - true for valid VF ID
+ * Return: bool - true for valid VF ID
  */
 bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
 			   int rel_vf_id,
 			   bool b_enabled_only, bool b_non_malicious);
 
 /**
- * @brief - Given a VF index, return index of next [including that] active VF.
+ * qed_iov_get_next_active_vf(): Given a VF index, return index of
+ *                               next [including that] active VF.
  *
- * @param p_hwfn
- * @param rel_vf_id
+ * @p_hwfn: HW device data.
+ * @rel_vf_id: VF ID.
  *
- * @return MAX_NUM_VFS in case no further active VFs, otherwise index.
+ * Return: MAX_NUM_VFS in case no further active VFs, otherwise index.
  */
 u16 qed_iov_get_next_active_vf(struct qed_hwfn *p_hwfn, u16 rel_vf_id);
 
@@ -280,83 +282,92 @@ void qed_iov_bulletin_set_udp_ports(struct qed_hwfn *p_hwfn,
 				    int vfid, u16 vxlan_port, u16 geneve_port);
 
 /**
- * @brief Read sriov related information and allocated resources
- *  reads from configuration space, shmem, etc.
+ * qed_iov_hw_info(): Read sriov related information and allocated resources
+ *                    reads from configuration space, shmem, etc.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_iov_hw_info(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_add_tlv - place a given tlv on the tlv buffer at next offset
+ * qed_add_tlv(): place a given tlv on the tlv buffer at next offset
  *
- * @param p_hwfn
- * @param p_iov
- * @param type
- * @param length
+ * @p_hwfn: HW device data.
+ * @offset: offset.
+ * @type: Type
+ * @length: Length.
  *
- * @return pointer to the newly placed tlv
+ * Return: pointer to the newly placed tlv
  */
 void *qed_add_tlv(struct qed_hwfn *p_hwfn, u8 **offset, u16 type, u16 length);
 
 /**
- * @brief list the types and lengths of the tlvs on the buffer
+ * qed_dp_tlv_list(): list the types and lengths of the tlvs on the buffer
  *
- * @param p_hwfn
- * @param tlvs_list
+ * @p_hwfn: HW device data.
+ * @tlvs_list: Tlvs_list.
+ *
+ * Return: Void.
  */
 void qed_dp_tlv_list(struct qed_hwfn *p_hwfn, void *tlvs_list);
 
 /**
- * @brief qed_iov_alloc - allocate sriov related resources
+ * qed_iov_alloc(): allocate sriov related resources
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_iov_alloc(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_iov_setup - setup sriov related resources
+ * qed_iov_setup(): setup sriov related resources
+ *
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
+ * Return: Void.
  */
 void qed_iov_setup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_iov_free - free sriov related resources
+ * qed_iov_free(): free sriov related resources
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
+ *
+ * Return: Void.
  */
 void qed_iov_free(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief free sriov related memory that was allocated during hw_prepare
+ * qed_iov_free_hw_info(): free sriov related memory that was
+ *                          allocated during hw_prepare
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Void.
  */
 void qed_iov_free_hw_info(struct qed_dev *cdev);
 
 /**
- * @brief Mark structs of vfs that have been FLR-ed.
+ * qed_iov_mark_vf_flr(): Mark structs of vfs that have been FLR-ed.
  *
- * @param p_hwfn
- * @param disabled_vfs - bitmask of all VFs on path that were FLRed
+ * @p_hwfn: HW device data.
+ * @disabled_vfs: bitmask of all VFs on path that were FLRed
  *
- * @return true iff one of the PF's vfs got FLRed. false otherwise.
+ * Return: true iff one of the PF's vfs got FLRed. false otherwise.
  */
 bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *disabled_vfs);
 
 /**
- * @brief Search extended TLVs in request/reply buffer.
+ * qed_iov_search_list_tlvs(): Search extended TLVs in request/reply buffer.
  *
- * @param p_hwfn
- * @param p_tlvs_list - Pointer to tlvs list
- * @param req_type - Type of TLV
+ * @p_hwfn: HW device data.
+ * @p_tlvs_list: Pointer to tlvs list
+ * @req_type: Type of TLV
  *
- * @return pointer to tlv type if found, otherwise returns NULL.
+ * Return: pointer to tlv type if found, otherwise returns NULL.
  */
 void *qed_iov_search_list_tlvs(struct qed_hwfn *p_hwfn,
 			       void *p_tlvs_list, u16 req_type);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 60d2bb64e65f..976201fc7d4a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -688,13 +688,16 @@ struct qed_vf_iov {
 };
 
 /**
- * @brief VF - Set Rx/Tx coalesce per VF's relative queue.
- *             Coalesce value '0' will omit the configuration.
+ * qed_vf_pf_set_coalesce(): VF - Set Rx/Tx coalesce per VF's relative queue.
+ *                                Coalesce value '0' will omit the
+ *                                configuration.
  *
- * @param p_hwfn
- * @param rx_coal - coalesce value in micro second for rx queue
- * @param tx_coal - coalesce value in micro second for tx queue
- * @param p_cid   - queue cid
+ * @p_hwfn: HW device data.
+ * @rx_coal: coalesce value in micro second for rx queue.
+ * @tx_coal: coalesce value in micro second for tx queue.
+ * @p_cid: queue cid.
+ *
+ * Return: Int.
  *
  **/
 int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
@@ -702,148 +705,172 @@ int qed_vf_pf_set_coalesce(struct qed_hwfn *p_hwfn,
 			   u16 tx_coal, struct qed_queue_cid *p_cid);
 
 /**
- * @brief VF - Get coalesce per VF's relative queue.
+ * qed_vf_pf_get_coalesce(): VF - Get coalesce per VF's relative queue.
  *
- * @param p_hwfn
- * @param p_coal - coalesce value in micro second for VF queues.
- * @param p_cid  - queue cid
+ * @p_hwfn: HW device data.
+ * @p_coal: coalesce value in micro second for VF queues.
+ * @p_cid: queue cid.
  *
+ * Return: Int.
  **/
 int qed_vf_pf_get_coalesce(struct qed_hwfn *p_hwfn,
 			   u16 *p_coal, struct qed_queue_cid *p_cid);
 
 #ifdef CONFIG_QED_SRIOV
 /**
- * @brief Read the VF bulletin and act on it if needed
+ * qed_vf_read_bulletin(): Read the VF bulletin and act on it if needed.
  *
- * @param p_hwfn
- * @param p_change - qed fills 1 iff bulletin board has changed, 0 otherwise.
+ * @p_hwfn: HW device data.
+ * @p_change: qed fills 1 iff bulletin board has changed, 0 otherwise.
  *
- * @return enum _qed_status
+ * Return: enum _qed_status.
  */
 int qed_vf_read_bulletin(struct qed_hwfn *p_hwfn, u8 *p_change);
 
 /**
- * @brief Get link paramters for VF from qed
+ * qed_vf_get_link_params(): Get link parameters for VF from qed
+ *
+ * @p_hwfn: HW device data.
+ * @params: the link params structure to be filled for the VF.
  *
- * @param p_hwfn
- * @param params - the link params structure to be filled for the VF
+ * Return: Void.
  */
 void qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
 			    struct qed_mcp_link_params *params);
 
 /**
- * @brief Get link state for VF from qed
+ * qed_vf_get_link_state(): Get link state for VF from qed.
+ *
+ * @p_hwfn: HW device data.
+ * @link: the link state structure to be filled for the VF
  *
- * @param p_hwfn
- * @param link - the link state structure to be filled for the VF
+ * Return: Void.
  */
 void qed_vf_get_link_state(struct qed_hwfn *p_hwfn,
 			   struct qed_mcp_link_state *link);
 
 /**
- * @brief Get link capabilities for VF from qed
+ * qed_vf_get_link_caps(): Get link capabilities for VF from qed.
  *
- * @param p_hwfn
- * @param p_link_caps - the link capabilities structure to be filled for the VF
+ * @p_hwfn: HW device data.
+ * @p_link_caps: the link capabilities structure to be filled for the VF
+ *
+ * Return: Void.
  */
 void qed_vf_get_link_caps(struct qed_hwfn *p_hwfn,
 			  struct qed_mcp_link_capabilities *p_link_caps);
 
 /**
- * @brief Get number of Rx queues allocated for VF by qed
+ * qed_vf_get_num_rxqs(): Get number of Rx queues allocated for VF by qed
+ *
+ * @p_hwfn: HW device data.
+ * @num_rxqs: allocated RX queues
  *
- *  @param p_hwfn
- *  @param num_rxqs - allocated RX queues
+ * Return: Void.
  */
 void qed_vf_get_num_rxqs(struct qed_hwfn *p_hwfn, u8 *num_rxqs);
 
 /**
- * @brief Get number of Rx queues allocated for VF by qed
+ * qed_vf_get_num_txqs(): Get number of Rx queues allocated for VF by qed
  *
- *  @param p_hwfn
- *  @param num_txqs - allocated RX queues
+ * @p_hwfn: HW device data.
+ * @num_txqs: allocated RX queues
+ *
+ * Return: Void.
  */
 void qed_vf_get_num_txqs(struct qed_hwfn *p_hwfn, u8 *num_txqs);
 
 /**
- * @brief Get number of available connections [both Rx and Tx] for VF
+ * qed_vf_get_num_cids(): Get number of available connections
+ *                        [both Rx and Tx] for VF
+ *
+ * @p_hwfn: HW device data.
+ * @num_cids: allocated number of connections
  *
- * @param p_hwfn
- * @param num_cids - allocated number of connections
+ * Return: Void.
  */
 void qed_vf_get_num_cids(struct qed_hwfn *p_hwfn, u8 *num_cids);
 
 /**
- * @brief Get port mac address for VF
+ * qed_vf_get_port_mac(): Get port mac address for VF.
  *
- * @param p_hwfn
- * @param port_mac - destination location for port mac
+ * @p_hwfn: HW device data.
+ * @port_mac: destination location for port mac
+ *
+ * Return: Void.
  */
 void qed_vf_get_port_mac(struct qed_hwfn *p_hwfn, u8 *port_mac);
 
 /**
- * @brief Get number of VLAN filters allocated for VF by qed
+ * qed_vf_get_num_vlan_filters(): Get number of VLAN filters allocated
+ *                                for VF by qed.
+ *
+ * @p_hwfn: HW device data.
+ * @num_vlan_filters: allocated VLAN filters
  *
- *  @param p_hwfn
- *  @param num_rxqs - allocated VLAN filters
+ * Return: Void.
  */
 void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
 				 u8 *num_vlan_filters);
 
 /**
- * @brief Get number of MAC filters allocated for VF by qed
+ * qed_vf_get_num_mac_filters(): Get number of MAC filters allocated
+ *                               for VF by qed
  *
- *  @param p_hwfn
- *  @param num_rxqs - allocated MAC filters
+ * @p_hwfn: HW device data.
+ * @num_mac_filters: allocated MAC filters
+ *
+ * Return: Void.
  */
 void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters);
 
 /**
- * @brief Check if VF can set a MAC address
+ * qed_vf_check_mac(): Check if VF can set a MAC address
  *
- * @param p_hwfn
- * @param mac
+ * @p_hwfn: HW device data.
+ * @mac: Mac.
  *
- * @return bool
+ * Return: bool.
  */
 bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac);
 
 /**
- * @brief Set firmware version information in dev_info from VFs acquire response tlv
+ * qed_vf_get_fw_version(): Set firmware version information
+ *                          in dev_info from VFs acquire response tlv
+ *
+ * @p_hwfn: HW device data.
+ * @fw_major: FW major.
+ * @fw_minor: FW minor.
+ * @fw_rev: FW rev.
+ * @fw_eng: FW eng.
  *
- * @param p_hwfn
- * @param fw_major
- * @param fw_minor
- * @param fw_rev
- * @param fw_eng
+ * Return: Void.
  */
 void qed_vf_get_fw_version(struct qed_hwfn *p_hwfn,
 			   u16 *fw_major, u16 *fw_minor,
 			   u16 *fw_rev, u16 *fw_eng);
 
 /**
- * @brief hw preparation for VF
- *      sends ACQUIRE message
+ * qed_vf_hw_prepare(): hw preparation for VF  sends ACQUIRE message
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return int
+ * Return: Int.
  */
 int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief VF - start the RX Queue by sending a message to the PF
- * @param p_hwfn
- * @param p_cid			- Only relative fields are relevant
- * @param bd_max_bytes          - maximum number of bytes per bd
- * @param bd_chain_phys_addr    - physical address of bd chain
- * @param cqe_pbl_addr          - physical address of pbl
- * @param cqe_pbl_size          - pbl size
- * @param pp_prod               - pointer to the producer to be
- *				  used in fastpath
+ * qed_vf_pf_rxq_start(): start the RX Queue by sending a message to the PF
+ *
+ * @p_hwfn: HW device data.
+ * @p_cid: Only relative fields are relevant
+ * @bd_max_bytes: maximum number of bytes per bd
+ * @bd_chain_phys_addr: physical address of bd chain
+ * @cqe_pbl_addr: physical address of pbl
+ * @cqe_pbl_size: pbl size
+ * @pp_prod: pointer to the producer to be used in fastpath
  *
- * @return int
+ * Return: Int.
  */
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 			struct qed_queue_cid *p_cid,
@@ -853,18 +880,16 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 			u16 cqe_pbl_size, void __iomem **pp_prod);
 
 /**
- * @brief VF - start the TX queue by sending a message to the
- *        PF.
+ * qed_vf_pf_txq_start(): VF - start the TX queue by sending a message to the
+ *                        PF.
  *
- * @param p_hwfn
- * @param tx_queue_id           - zero based within the VF
- * @param sb                    - status block for this queue
- * @param sb_index              - index within the status block
- * @param bd_chain_phys_addr    - physical address of tx chain
- * @param pp_doorbell           - pointer to address to which to
- *                      write the doorbell too..
+ * @p_hwfn: HW device data.
+ * @p_cid: CID.
+ * @pbl_addr: PBL address.
+ * @pbl_size: PBL Size.
+ * @pp_doorbell: pointer to address to which to write the doorbell too.
  *
- * @return int
+ * Return: Int.
  */
 int
 qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
@@ -873,90 +898,91 @@ qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 		    u16 pbl_size, void __iomem **pp_doorbell);
 
 /**
- * @brief VF - stop the RX queue by sending a message to the PF
+ * qed_vf_pf_rxq_stop(): VF - stop the RX queue by sending a message to the PF.
  *
- * @param p_hwfn
- * @param p_cid
- * @param cqe_completion
+ * @p_hwfn: HW device data.
+ * @p_cid: CID.
+ * @cqe_completion: CQE Completion.
  *
- * @return int
+ * Return: Int.
  */
 int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
 		       struct qed_queue_cid *p_cid, bool cqe_completion);
 
 /**
- * @brief VF - stop the TX queue by sending a message to the PF
+ * qed_vf_pf_txq_stop(): VF - stop the TX queue by sending a message to the PF.
  *
- * @param p_hwfn
- * @param tx_qid
+ * @p_hwfn: HW device data.
+ * @p_cid: CID.
  *
- * @return int
+ * Return: Int.
  */
 int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid);
 
 /**
- * @brief VF - send a vport update command
+ * qed_vf_pf_vport_update(): VF - send a vport update command.
  *
- * @param p_hwfn
- * @param params
+ * @p_hwfn: HW device data.
+ * @p_params: Params
  *
- * @return int
+ * Return: Int.
  */
 int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn,
 			   struct qed_sp_vport_update_params *p_params);
 
 /**
+ * qed_vf_pf_reset(): VF - send a close message to PF.
  *
- * @brief VF - send a close message to PF
+ * @p_hwfn: HW device data.
  *
- * @param p_hwfn
- *
- * @return enum _qed_status
+ * Return: enum _qed_status
  */
 int qed_vf_pf_reset(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief VF - free vf`s memories
+ * qed_vf_pf_release(): VF - free vf`s memories.
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return enum _qed_status
+ * Return: enum _qed_status
  */
 int qed_vf_pf_release(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief qed_vf_get_igu_sb_id - Get the IGU SB ID for a given
+ * qed_vf_get_igu_sb_id(): Get the IGU SB ID for a given
  *        sb_id. For VFs igu sbs don't have to be contiguous
  *
- * @param p_hwfn
- * @param sb_id
+ * @p_hwfn: HW device data.
+ * @sb_id: SB ID.
  *
- * @return INLINE u16
+ * Return: INLINE u16
  */
 u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id);
 
 /**
- * @brief Stores [or removes] a configured sb_info.
+ * qed_vf_set_sb_info(): Stores [or removes] a configured sb_info.
+ *
+ * @p_hwfn: HW device data.
+ * @sb_id: zero-based SB index [for fastpath]
+ * @p_sb:  may be NULL [during removal].
  *
- * @param p_hwfn
- * @param sb_id - zero-based SB index [for fastpath]
- * @param sb_info - may be NULL [during removal].
+ * Return: Void.
  */
 void qed_vf_set_sb_info(struct qed_hwfn *p_hwfn,
 			u16 sb_id, struct qed_sb_info *p_sb);
 
 /**
- * @brief qed_vf_pf_vport_start - perform vport start for VF.
+ * qed_vf_pf_vport_start(): perform vport start for VF.
  *
- * @param p_hwfn
- * @param vport_id
- * @param mtu
- * @param inner_vlan_removal
- * @param tpa_mode
- * @param max_buffers_per_cqe,
- * @param only_untagged - default behavior regarding vlan acceptance
+ * @p_hwfn: HW device data.
+ * @vport_id: Vport ID.
+ * @mtu: MTU.
+ * @inner_vlan_removal: Innter VLAN removal.
+ * @tpa_mode: TPA mode
+ * @max_buffers_per_cqe: Max buffer pre CQE.
+ * @only_untagged: default behavior regarding vlan acceptance
  *
- * @return enum _qed_status
+ * Return: enum _qed_status
  */
 int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn,
 			  u8 vport_id,
@@ -966,11 +992,11 @@ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn,
 			  u8 max_buffers_per_cqe, u8 only_untagged);
 
 /**
- * @brief qed_vf_pf_vport_stop - stop the VF's vport
+ * qed_vf_pf_vport_stop(): stop the VF's vport
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return enum _qed_status
+ * Return: enum _qed_status
  */
 int qed_vf_pf_vport_stop(struct qed_hwfn *p_hwfn);
 
@@ -981,42 +1007,49 @@ void qed_vf_pf_filter_mcast(struct qed_hwfn *p_hwfn,
 			    struct qed_filter_mcast *p_filter_cmd);
 
 /**
- * @brief qed_vf_pf_int_cleanup - clean the SB of the VF
+ * qed_vf_pf_int_cleanup(): clean the SB of the VF
  *
- * @param p_hwfn
+ * @p_hwfn: HW device data.
  *
- * @return enum _qed_status
+ * Return: enum _qed_status
  */
 int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn);
 
 /**
- * @brief - return the link params in a given bulletin board
+ * __qed_vf_get_link_params(): return the link params in a given bulletin board
  *
- * @param p_hwfn
- * @param p_params - pointer to a struct to fill with link params
- * @param p_bulletin
+ * @p_hwfn: HW device data.
+ * @p_params: pointer to a struct to fill with link params
+ * @p_bulletin: Bulletin.
+ *
+ * Return: Void.
  */
 void __qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
 			      struct qed_mcp_link_params *p_params,
 			      struct qed_bulletin_content *p_bulletin);
 
 /**
- * @brief - return the link state in a given bulletin board
+ * __qed_vf_get_link_state(): return the link state in a given bulletin board
+ *
+ * @p_hwfn: HW device data.
+ * @p_link: pointer to a struct to fill with link state
+ * @p_bulletin: Bulletin.
  *
- * @param p_hwfn
- * @param p_link - pointer to a struct to fill with link state
- * @param p_bulletin
+ * Return: Void.
  */
 void __qed_vf_get_link_state(struct qed_hwfn *p_hwfn,
 			     struct qed_mcp_link_state *p_link,
 			     struct qed_bulletin_content *p_bulletin);
 
 /**
- * @brief - return the link capabilities in a given bulletin board
+ * __qed_vf_get_link_caps(): return the link capabilities in a given
+ *                           bulletin board
  *
- * @param p_hwfn
- * @param p_link - pointer to a struct to fill with link capabilities
- * @param p_bulletin
+ * @p_hwfn: HW device data.
+ * @p_link_caps: pointer to a struct to fill with link capabilities
+ * @p_bulletin: Bulletin.
+ *
+ * Return: Void.
  */
 void __qed_vf_get_link_caps(struct qed_hwfn *p_hwfn,
 			    struct qed_mcp_link_capabilities *p_link_caps,
@@ -1029,9 +1062,13 @@ int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
 
 u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id);
 /**
- * @brief - Ask PF to update the MAC address in it's bulletin board
+ * qed_vf_pf_bulletin_update_mac(): Ask PF to update the MAC address in
+ *                                  it's bulletin board
+ *
+ * @p_hwfn: HW device data.
+ * @p_mac: mac address to be updated in bulletin board
  *
- * @param p_mac - mac address to be updated in bulletin board
+ * Return: Int.
  */
 int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac);
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 75adb71adf18..be33bde0f731 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2800,10 +2800,13 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
 }
 
 /**
- * qede_io_error_detected - called when PCI error is detected
+ * qede_io_error_detected(): Called when PCI error is detected
+ *
  * @pdev: Pointer to PCI device
  * @state: The current pci connection state
  *
+ *Return: pci_ers_result_t.
+ *
  * This function is called after a PCI bus error affecting
  * this device has been detected.
  */
diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h
index f34dbd0db795..a84063492c71 100644
--- a/include/linux/qed/qed_chain.h
+++ b/include/linux/qed/qed_chain.h
@@ -268,14 +268,15 @@ static inline dma_addr_t qed_chain_get_pbl_phys(const struct qed_chain *chain)
 }
 
 /**
- * @brief qed_chain_advance_page -
+ * qed_chain_advance_page(): Advance the next element across pages for a
+ *                           linked chain.
  *
- * Advance the next element across pages for a linked chain
+ * @p_chain: P_chain.
+ * @p_next_elem: P_next_elem.
+ * @idx_to_inc: Idx_to_inc.
+ * @page_to_inc: page_to_inc.
  *
- * @param p_chain
- * @param p_next_elem
- * @param idx_to_inc
- * @param page_to_inc
+ * Return: Void.
  */
 static inline void
 qed_chain_advance_page(struct qed_chain *p_chain,
@@ -336,12 +337,14 @@ qed_chain_advance_page(struct qed_chain *p_chain,
 	} while (0)
 
 /**
- * @brief qed_chain_return_produced -
+ * qed_chain_return_produced(): A chain in which the driver "Produces"
+ *                              elements should use this API
+ *                              to indicate previous produced elements
+ *                              are now consumed.
  *
- * A chain in which the driver "Produces" elements should use this API
- * to indicate previous produced elements are now consumed.
+ * @p_chain: Chain.
  *
- * @param p_chain
+ * Return: Void.
  */
 static inline void qed_chain_return_produced(struct qed_chain *p_chain)
 {
@@ -353,15 +356,15 @@ static inline void qed_chain_return_produced(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_produce -
+ * qed_chain_produce(): A chain in which the driver "Produces"
+ *                      elements should use this to get a pointer to
+ *                      the next element which can be "Produced". It's driver
+ *                      responsibility to validate that the chain has room for
+ *                      new element.
  *
- * A chain in which the driver "Produces" elements should use this to get
- * a pointer to the next element which can be "Produced". It's driver
- * responsibility to validate that the chain has room for new element.
+ * @p_chain: Chain.
  *
- * @param p_chain
- *
- * @return void*, a pointer to next element
+ * Return: void*, a pointer to next element.
  */
 static inline void *qed_chain_produce(struct qed_chain *p_chain)
 {
@@ -395,14 +398,11 @@ static inline void *qed_chain_produce(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_get_capacity -
- *
- * Get the maximum number of BDs in chain
+ * qed_chain_get_capacity(): Get the maximum number of BDs in chain
  *
- * @param p_chain
- * @param num
+ * @p_chain: Chain.
  *
- * @return number of unusable BDs
+ * Return: number of unusable BDs.
  */
 static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
 {
@@ -410,12 +410,14 @@ static inline u32 qed_chain_get_capacity(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_recycle_consumed -
+ * qed_chain_recycle_consumed(): Returns an element which was
+ *                               previously consumed;
+ *                               Increments producers so they could
+ *                               be written to FW.
  *
- * Returns an element which was previously consumed;
- * Increments producers so they could be written to FW.
+ * @p_chain: Chain.
  *
- * @param p_chain
+ * Return: Void.
  */
 static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
 {
@@ -427,14 +429,13 @@ static inline void qed_chain_recycle_consumed(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_consume -
+ * qed_chain_consume(): A Chain in which the driver utilizes data written
+ *                      by a different source (i.e., FW) should use this to
+ *                      access passed buffers.
  *
- * A Chain in which the driver utilizes data written by a different source
- * (i.e., FW) should use this to access passed buffers.
+ * @p_chain: Chain.
  *
- * @param p_chain
- *
- * @return void*, a pointer to the next buffer written
+ * Return: void*, a pointer to the next buffer written.
  */
 static inline void *qed_chain_consume(struct qed_chain *p_chain)
 {
@@ -468,9 +469,11 @@ static inline void *qed_chain_consume(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_reset - Resets the chain to its start state
+ * qed_chain_reset(): Resets the chain to its start state.
+ *
+ * @p_chain: pointer to a previously allocated chain.
  *
- * @param p_chain pointer to a previously allocated chain
+ * Return Void.
  */
 static inline void qed_chain_reset(struct qed_chain *p_chain)
 {
@@ -519,13 +522,12 @@ static inline void qed_chain_reset(struct qed_chain *p_chain)
 }
 
 /**
- * @brief qed_chain_get_last_elem -
+ * qed_chain_get_last_elem(): Returns a pointer to the last element of the
+ *                            chain.
  *
- * Returns a pointer to the last element of the chain
+ * @p_chain: Chain.
  *
- * @param p_chain
- *
- * @return void*
+ * Return: void*.
  */
 static inline void *qed_chain_get_last_elem(struct qed_chain *p_chain)
 {
@@ -563,10 +565,13 @@ out:
 }
 
 /**
- * @brief qed_chain_set_prod - sets the prod to the given value
+ * qed_chain_set_prod(): sets the prod to the given value.
+ *
+ * @p_chain: Chain.
+ * @prod_idx: Prod Idx.
+ * @p_prod_elem: Prod elem.
  *
- * @param prod_idx
- * @param p_prod_elem
+ * Return Void.
  */
 static inline void qed_chain_set_prod(struct qed_chain *p_chain,
 				      u32 prod_idx, void *p_prod_elem)
@@ -610,9 +615,11 @@ static inline void qed_chain_set_prod(struct qed_chain *p_chain,
 }
 
 /**
- * @brief qed_chain_pbl_zero_mem - set chain memory to 0
+ * qed_chain_pbl_zero_mem(): set chain memory to 0.
+ *
+ * @p_chain: Chain.
  *
- * @param p_chain
+ * Return: Void.
  */
 static inline void qed_chain_pbl_zero_mem(struct qed_chain *p_chain)
 {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 850b98991670..f39451aaaeec 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -819,47 +819,47 @@ struct qed_common_cb_ops {
 
 struct qed_selftest_ops {
 /**
- * @brief selftest_interrupt - Perform interrupt test
+ * selftest_interrupt(): Perform interrupt test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*selftest_interrupt)(struct qed_dev *cdev);
 
 /**
- * @brief selftest_memory - Perform memory test
+ * selftest_memory(): Perform memory test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*selftest_memory)(struct qed_dev *cdev);
 
 /**
- * @brief selftest_register - Perform register test
+ * selftest_register(): Perform register test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*selftest_register)(struct qed_dev *cdev);
 
 /**
- * @brief selftest_clock - Perform clock test
+ * selftest_clock(): Perform clock test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*selftest_clock)(struct qed_dev *cdev);
 
 /**
- * @brief selftest_nvram - Perform nvram test
+ * selftest_nvram(): Perform nvram test.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*selftest_nvram) (struct qed_dev *cdev);
 };
@@ -927,47 +927,53 @@ struct qed_common_ops {
 				  enum qed_hw_err_type err_type);
 
 /**
- * @brief can_link_change - can the instance change the link or not
+ * can_link_change(): can the instance change the link or not.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return true if link-change is allowed, false otherwise.
+ * Return: true if link-change is allowed, false otherwise.
  */
 	bool (*can_link_change)(struct qed_dev *cdev);
 
 /**
- * @brief set_link - set links according to params
+ * set_link(): set links according to params.
  *
- * @param cdev
- * @param params - values used to override the default link configuration
+ * @cdev: Qed dev pointer.
+ * @params: values used to override the default link configuration.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int		(*set_link)(struct qed_dev *cdev,
 				    struct qed_link_params *params);
 
 /**
- * @brief get_link - returns the current link state.
+ * get_link(): returns the current link state.
  *
- * @param cdev
- * @param if_link - structure to be filled with current link configuration.
+ * @cdev: Qed dev pointer.
+ * @if_link: structure to be filled with current link configuration.
+ *
+ * Return: Void.
  */
 	void		(*get_link)(struct qed_dev *cdev,
 				    struct qed_link_output *if_link);
 
 /**
- * @brief - drains chip in case Tx completions fail to arrive due to pause.
+ * drain(): drains chip in case Tx completions fail to arrive due to pause.
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: Int.
  */
 	int		(*drain)(struct qed_dev *cdev);
 
 /**
- * @brief update_msglvl - update module debug level
+ * update_msglvl(): update module debug level.
  *
- * @param cdev
- * @param dp_module
- * @param dp_level
+ * @cdev: Qed dev pointer.
+ * @dp_module: Debug module.
+ * @dp_level: Debug level.
+ *
+ * Return: Void.
  */
 	void		(*update_msglvl)(struct qed_dev *cdev,
 					 u32 dp_module,
@@ -981,70 +987,73 @@ struct qed_common_ops {
 				      struct qed_chain *p_chain);
 
 /**
- * @brief nvm_flash - Flash nvm data.
+ * nvm_flash(): Flash nvm data.
  *
- * @param cdev
- * @param name - file containing the data
+ * @cdev: Qed dev pointer.
+ * @name: file containing the data.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*nvm_flash)(struct qed_dev *cdev, const char *name);
 
 /**
- * @brief nvm_get_image - reads an entire image from nvram
+ * nvm_get_image(): reads an entire image from nvram.
  *
- * @param cdev
- * @param type - type of the request nvram image
- * @param buf - preallocated buffer to fill with the image
- * @param len - length of the allocated buffer
+ * @cdev: Qed dev pointer.
+ * @type: type of the request nvram image.
+ * @buf: preallocated buffer to fill with the image.
+ * @len: length of the allocated buffer.
  *
- * @return 0 on success, error otherwise
+ * Return: 0 on success, error otherwise.
  */
 	int (*nvm_get_image)(struct qed_dev *cdev,
 			     enum qed_nvm_images type, u8 *buf, u16 len);
 
 /**
- * @brief set_coalesce - Configure Rx coalesce value in usec
+ * set_coalesce(): Configure Rx coalesce value in usec.
  *
- * @param cdev
- * @param rx_coal - Rx coalesce value in usec
- * @param tx_coal - Tx coalesce value in usec
- * @param qid - Queue index
- * @param sb_id - Status Block Id
+ * @cdev: Qed dev pointer.
+ * @rx_coal: Rx coalesce value in usec.
+ * @tx_coal: Tx coalesce value in usec.
+ * @handle: Handle.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*set_coalesce)(struct qed_dev *cdev,
 			    u16 rx_coal, u16 tx_coal, void *handle);
 
 /**
- * @brief set_led - Configure LED mode
+ * set_led() - Configure LED mode.
  *
- * @param cdev
- * @param mode - LED mode
+ * @cdev: Qed dev pointer.
+ * @mode: LED mode.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*set_led)(struct qed_dev *cdev,
 		       enum qed_led_mode mode);
 
 /**
- * @brief attn_clr_enable - Prevent attentions from being reasserted
+ * attn_clr_enable(): Prevent attentions from being reasserted.
+ *
+ * @cdev: Qed dev pointer.
+ * @clr_enable: Clear enable.
  *
- * @param cdev
- * @param clr_enable
+ * Return: Void.
  */
 	void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
 
 /**
- * @brief db_recovery_add - add doorbell information to the doorbell
- * recovery mechanism.
+ * db_recovery_add(): add doorbell information to the doorbell
+ *                    recovery mechanism.
  *
- * @param cdev
- * @param db_addr - doorbell address
- * @param db_data - address of where db_data is stored
- * @param db_is_32b - doorbell is 32b pr 64b
- * @param db_is_user - doorbell recovery addresses are user or kernel space
+ * @cdev: Qed dev pointer.
+ * @db_addr: Doorbell address.
+ * @db_data: Dddress of where db_data is stored.
+ * @db_width: Doorbell is 32b or 64b.
+ * @db_space: Doorbell recovery addresses are user or kernel space.
+ *
+ * Return: Int.
  */
 	int (*db_recovery_add)(struct qed_dev *cdev,
 			       void __iomem *db_addr,
@@ -1053,114 +1062,130 @@ struct qed_common_ops {
 			       enum qed_db_rec_space db_space);
 
 /**
- * @brief db_recovery_del - remove doorbell information from the doorbell
+ * db_recovery_del(): remove doorbell information from the doorbell
  * recovery mechanism. db_data serves as key (db_addr is not unique).
  *
- * @param cdev
- * @param db_addr - doorbell address
- * @param db_data - address where db_data is stored. Serves as key for the
- *		    entry to delete.
+ * @cdev: Qed dev pointer.
+ * @db_addr: Doorbell address.
+ * @db_data: Address where db_data is stored. Serves as key for the
+ *           entry to delete.
+ *
+ * Return: Int.
  */
 	int (*db_recovery_del)(struct qed_dev *cdev,
 			       void __iomem *db_addr, void *db_data);
 
 /**
- * @brief recovery_process - Trigger a recovery process
+ * recovery_process(): Trigger a recovery process.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*recovery_process)(struct qed_dev *cdev);
 
 /**
- * @brief recovery_prolog - Execute the prolog operations of a recovery process
+ * recovery_prolog(): Execute the prolog operations of a recovery process.
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*recovery_prolog)(struct qed_dev *cdev);
 
 /**
- * @brief update_drv_state - API to inform the change in the driver state.
+ * update_drv_state(): API to inform the change in the driver state.
  *
- * @param cdev
- * @param active
+ * @cdev: Qed dev pointer.
+ * @active: Active
  *
+ * Return: Int.
  */
 	int (*update_drv_state)(struct qed_dev *cdev, bool active);
 
 /**
- * @brief update_mac - API to inform the change in the mac address
+ * update_mac(): API to inform the change in the mac address.
  *
- * @param cdev
- * @param mac
+ * @cdev: Qed dev pointer.
+ * @mac: MAC.
  *
+ * Return: Int.
  */
 	int (*update_mac)(struct qed_dev *cdev, u8 *mac);
 
 /**
- * @brief update_mtu - API to inform the change in the mtu
+ * update_mtu(): API to inform the change in the mtu.
  *
- * @param cdev
- * @param mtu
+ * @cdev: Qed dev pointer.
+ * @mtu: MTU.
  *
+ * Return: Int.
  */
 	int (*update_mtu)(struct qed_dev *cdev, u16 mtu);
 
 /**
- * @brief update_wol - update of changes in the WoL configuration
+ * update_wol(): Update of changes in the WoL configuration.
+ *
+ * @cdev: Qed dev pointer.
+ * @enabled: true iff WoL should be enabled.
  *
- * @param cdev
- * @param enabled - true iff WoL should be enabled.
+ * Return: Int.
  */
 	int (*update_wol) (struct qed_dev *cdev, bool enabled);
 
 /**
- * @brief read_module_eeprom
+ * read_module_eeprom(): Read EEPROM.
  *
- * @param cdev
- * @param buf - buffer
- * @param dev_addr - PHY device memory region
- * @param offset - offset into eeprom contents to be read
- * @param len - buffer length, i.e., max bytes to be read
+ * @cdev: Qed dev pointer.
+ * @buf: buffer.
+ * @dev_addr: PHY device memory region.
+ * @offset: offset into eeprom contents to be read.
+ * @len: buffer length, i.e., max bytes to be read.
+ *
+ * Return: Int.
  */
 	int (*read_module_eeprom)(struct qed_dev *cdev,
 				  char *buf, u8 dev_addr, u32 offset, u32 len);
 
 /**
- * @brief get_affin_hwfn_idx
+ * get_affin_hwfn_idx(): Get affine HW function.
+ *
+ * @cdev: Qed dev pointer.
  *
- * @param cdev
+ * Return: u8.
  */
 	u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev);
 
 /**
- * @brief read_nvm_cfg - Read NVM config attribute value.
- * @param cdev
- * @param buf - buffer
- * @param cmd - NVM CFG command id
- * @param entity_id - Entity id
+ * read_nvm_cfg(): Read NVM config attribute value.
+ *
+ * @cdev: Qed dev pointer.
+ * @buf: Buffer.
+ * @cmd: NVM CFG command id.
+ * @entity_id: Entity id.
  *
+ * Return: Int.
  */
 	int (*read_nvm_cfg)(struct qed_dev *cdev, u8 **buf, u32 cmd,
 			    u32 entity_id);
 /**
- * @brief read_nvm_cfg - Read NVM config attribute value.
- * @param cdev
- * @param cmd - NVM CFG command id
+ * read_nvm_cfg_len(): Read NVM config attribute value.
  *
- * @return config id length, 0 on error.
+ * @cdev: Qed dev pointer.
+ * @cmd: NVM CFG command id.
+ *
+ * Return: config id length, 0 on error.
  */
 	int (*read_nvm_cfg_len)(struct qed_dev *cdev, u32 cmd);
 
 /**
- * @brief set_grc_config - Configure value for grc config id.
- * @param cdev
- * @param cfg_id - grc config id
- * @param val - grc config value
+ * set_grc_config(): Configure value for grc config id.
+ *
+ * @cdev: Qed dev pointer.
+ * @cfg_id: grc config id
+ * @val: grc config value
  *
+ * Return: Int.
  */
 	int (*set_grc_config)(struct qed_dev *cdev, u32 cfg_id, u32 val);
 
@@ -1397,18 +1422,16 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
 }
 
 /**
+ * qed_sb_ack(): This function creates an update command for interrupts
+ *               that is  written to the IGU.
  *
- * @brief This function creates an update command for interrupts that is
- *        written to the IGU.
- *
- * @param sb_info       - This is the structure allocated and
- *                 initialized per status block. Assumption is
- *                 that it was initialized using qed_sb_init
- * @param int_cmd       - Enable/Disable/Nop
- * @param upd_flg       - whether igu consumer should be
- *                 updated.
+ * @sb_info: This is the structure allocated and
+ *           initialized per status block. Assumption is
+ *           that it was initialized using qed_sb_init
+ * @int_cmd: Enable/Disable/Nop
+ * @upd_flg: Whether igu consumer should be updated.
  *
- * @return inline void
+ * Return: inline void.
  */
 static inline void qed_sb_ack(struct qed_sb_info *sb_info,
 			      enum igu_int_cmd int_cmd,
diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h
index 04180d9af560..494cdc3cd840 100644
--- a/include/linux/qed/qed_iscsi_if.h
+++ b/include/linux/qed/qed_iscsi_if.h
@@ -182,7 +182,7 @@ struct qed_iscsi_cb_ops {
  *			@param stats - pointer to struck that would be filled
  *				we stats
  *			@return 0 on success, error otherwise.
- * @change_mac		Change MAC of interface
+ * @change_mac:		Change MAC of interface
  *			@param cdev
  *			@param handle - the connection handle.
  *			@param mac - new MAC to configure.
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index ff808d248883..5b67cd03276e 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -208,57 +208,57 @@ enum qed_ll2_xmit_flags {
 
 struct qed_ll2_ops {
 /**
- * @brief start - initializes ll2
+ * start(): Initializes ll2.
  *
- * @param cdev
- * @param params - protocol driver configuration for the ll2.
+ * @cdev: Qed dev pointer.
+ * @params: Protocol driver configuration for the ll2.
  *
- * @return 0 on success, otherwise error value.
+ * Return: 0 on success, otherwise error value.
  */
 	int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params);
 
 /**
- * @brief stop - stops the ll2
+ * stop(): Stops the ll2
  *
- * @param cdev
+ * @cdev: Qed dev pointer.
  *
- * @return 0 on success, otherwise error value.
+ * Return: 0 on success, otherwise error value.
  */
 	int (*stop)(struct qed_dev *cdev);
 
 /**
- * @brief start_xmit - transmits an skb over the ll2 interface
+ * start_xmit(): Transmits an skb over the ll2 interface
  *
- * @param cdev
- * @param skb
- * @param xmit_flags - Transmit options defined by the enum qed_ll2_xmit_flags.
+ * @cdev: Qed dev pointer.
+ * @skb: SKB.
+ * @xmit_flags: Transmit options defined by the enum qed_ll2_xmit_flags.
  *
- * @return 0 on success, otherwise error value.
+ * Return: 0 on success, otherwise error value.
  */
 	int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb,
 			  unsigned long xmit_flags);
 
 /**
- * @brief register_cb_ops - protocol driver register the callback for Rx/Tx
+ * register_cb_ops(): Protocol driver register the callback for Rx/Tx
  * packets. Should be called before `start'.
  *
- * @param cdev
- * @param cookie - to be passed to the callback functions.
- * @param ops - the callback functions to register for Rx / Tx.
+ * @cdev: Qed dev pointer.
+ * @cookie: to be passed to the callback functions.
+ * @ops: the callback functions to register for Rx / Tx.
  *
- * @return 0 on success, otherwise error value.
+ * Return: 0 on success, otherwise error value.
  */
 	void (*register_cb_ops)(struct qed_dev *cdev,
 				const struct qed_ll2_cb_ops *ops,
 				void *cookie);
 
 /**
- * @brief get LL2 related statistics
+ * get_stats(): Get LL2 related statistics.
  *
- * @param cdev
- * @param stats - pointer to struct that would be filled with stats
+ * @cdev: Qed dev pointer.
+ * @stats: Pointer to struct that would be filled with stats.
  *
- * @return 0 on success, error otherwise.
+ * Return: 0 on success, error otherwise.
  */
 	int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats);
 };
diff --git a/include/linux/qed/qed_nvmetcp_if.h b/include/linux/qed/qed_nvmetcp_if.h
index 14671bc19ed1..1d51df347560 100644
--- a/include/linux/qed/qed_nvmetcp_if.h
+++ b/include/linux/qed/qed_nvmetcp_if.h
@@ -171,6 +171,23 @@ struct nvmetcp_task_params {
  *			@param dest_port
  * @clear_all_filters: Clear all filters.
  *			@param cdev
+ * @init_read_io: Init read IO.
+ *			@task_params
+ *			@cmd_pdu_header
+ *			@nvme_cmd
+ *			@sgl_task_params
+ * @init_write_io: Init write IO.
+ *			@task_params
+ *			@cmd_pdu_header
+ *			@nvme_cmd
+ *			@sgl_task_params
+ * @init_icreq_exchange: Exchange ICReq.
+ *			@task_params
+ *			@init_conn_req_pdu_hdr
+ *			@tx_sgl_task_params
+ *			@rx_sgl_task_params
+ * @init_task_cleanup: Init task cleanup.
+ *			@task_params
  */
 struct qed_nvmetcp_ops {
 	const struct qed_common_ops *common;
-- 
cgit v1.2.3


From fb09a1ed5c6e507499a9da54bfd34f71a2673961 Mon Sep 17 00:00:00 2001
From: Shai Malin <smalin@marvell.com>
Date: Mon, 4 Oct 2021 09:58:40 +0300
Subject: qed: Remove e4_ and _e4 from FW HSI

The existing qed/qede/qedr/qedi/qedf code uses chip-specific naming in
structures,  functions, variables and defines in FW HSI (Hardware
Software Interface).

The new FW version introduced a generic naming convention in HSI
in-which the same code will be used across different versions
for simpler maintainability. It also eases in providing support for
new features.

With this patch every "_e4" or "e4_" prefix or suffix is not needed
anymore and it will be removed.

Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Reviewed-by: Javed Hasan <jhasan@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/qedr/main.c                  |    2 +-
 drivers/net/ethernet/qlogic/qed/qed.h              |    4 +-
 drivers/net/ethernet/qlogic/qed/qed_cxt.c          |   16 +-
 drivers/net/ethernet/qlogic/qed/qed_debug.c        |    8 +-
 drivers/net/ethernet/qlogic/qed/qed_fcoe.c         |   10 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h          | 6794 ++++++++++----------
 .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c    |   29 +-
 drivers/net/ethernet/qlogic/qed/qed_int.c          |    4 +-
 drivers/net/ethernet/qlogic/qed/qed_int.h          |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_ll2.c          |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c          |    4 -
 drivers/net/ethernet/qlogic/qed/qed_reg_addr.h     |    2 +-
 drivers/net/ethernet/qlogic/qed/qed_spq.c          |    8 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c        |   10 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c       |    2 +-
 drivers/scsi/qedf/drv_fcoe_fw_funcs.c              |    8 +-
 drivers/scsi/qedf/drv_fcoe_fw_funcs.h              |    2 +-
 drivers/scsi/qedf/qedf.h                           |    4 +-
 drivers/scsi/qedf/qedf_els.c                       |    2 +-
 drivers/scsi/qedf/qedf_io.c                        |   12 +-
 drivers/scsi/qedf/qedf_main.c                      |    8 +-
 drivers/scsi/qedi/qedi_debugfs.c                   |    4 +-
 drivers/scsi/qedi/qedi_fw.c                        |   40 +-
 drivers/scsi/qedi/qedi_fw_api.c                    |   22 +-
 drivers/scsi/qedi/qedi_fw_iscsi.h                  |    2 +-
 drivers/scsi/qedi/qedi_iscsi.h                     |    2 +-
 drivers/scsi/qedi/qedi_main.c                      |    8 +-
 include/linux/qed/common_hsi.h                     |   28 +-
 include/linux/qed/fcoe_common.h                    |  362 +-
 include/linux/qed/iscsi_common.h                   |  360 +-
 include/linux/qed/nvmetcp_common.h                 |   18 +-
 include/linux/qed/qed_if.h                         |    5 +-
 32 files changed, 3888 insertions(+), 3896 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 755930be01b8..dc203f3d0f25 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -272,7 +272,7 @@ static int qedr_register_device(struct qedr_dev *dev)
 static int qedr_alloc_mem_sb(struct qedr_dev *dev,
 			     struct qed_sb_info *sb_info, u16 sb_id)
 {
-	struct status_block_e4 *sb_virt;
+	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int rc;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b656408b9d70..cd1537bf5392 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -703,8 +703,6 @@ struct qed_dev {
 #define QED_IS_BB_B0(dev)		(QED_IS_BB(dev) && CHIP_REV_IS_B0(dev))
 #define QED_IS_AH(dev)			((dev)->type == QED_DEV_TYPE_AH)
 #define QED_IS_K2(dev)			QED_IS_AH(dev)
-#define QED_IS_E4(dev)			(QED_IS_BB(dev) || QED_IS_AH(dev))
-#define QED_IS_E5(dev)			((dev)->type == QED_DEV_TYPE_E5)
 
 	u16				vendor_id;
 
@@ -903,7 +901,7 @@ static inline u8 qed_concrete_to_sw_fid(struct qed_dev *cdev,
 }
 
 #define PKT_LB_TC	9
-#define MAX_NUM_VOQS_E4	20
+#define MAX_NUM_VOQS	20
 
 int qed_configure_vport_wfq(struct qed_dev *cdev, u16 vp_id, u32 rate);
 void qed_configure_vp_wfq_on_link_change(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index cb0f2a3a1ac9..452494f8c298 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -54,22 +54,22 @@
 
 /* connection context union */
 union conn_context {
-	struct e4_core_conn_context core_ctx;
-	struct e4_eth_conn_context eth_ctx;
-	struct e4_iscsi_conn_context iscsi_ctx;
-	struct e4_fcoe_conn_context fcoe_ctx;
-	struct e4_roce_conn_context roce_ctx;
+	struct core_conn_context core_ctx;
+	struct eth_conn_context eth_ctx;
+	struct iscsi_conn_context iscsi_ctx;
+	struct fcoe_conn_context fcoe_ctx;
+	struct roce_conn_context roce_ctx;
 };
 
 /* TYPE-0 task context - iSCSI, FCOE */
 union type0_task_context {
-	struct e4_iscsi_task_context iscsi_ctx;
-	struct e4_fcoe_task_context fcoe_ctx;
+	struct iscsi_task_context iscsi_ctx;
+	struct fcoe_task_context fcoe_ctx;
 };
 
 /* TYPE-1 task context - ROCE */
 union type1_task_context {
-	struct e4_rdma_task_context roce_ctx;
+	struct rdma_task_context roce_ctx;
 };
 
 struct src_ent {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 6ab3e60d4928..380cf4963cbb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -4744,9 +4744,9 @@ static u32 qed_ilt_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "num_pf_cids_per_conn_type", 1);
 	offset += qed_dump_num_param(dump_buf + offset,
-				     dump, "size", NUM_OF_CONNECTION_TYPES_E4);
+				     dump, "size", NUM_OF_CONNECTION_TYPES);
 	for (conn_type = 0, valid_conn_pf_cids = 0;
-	     conn_type < NUM_OF_CONNECTION_TYPES_E4; conn_type++, offset++) {
+	     conn_type < NUM_OF_CONNECTION_TYPES; conn_type++, offset++) {
 		u32 num_pf_cids =
 		    p_hwfn->p_cxt_mngr->conn_cfg[conn_type].cid_count;
 
@@ -4759,9 +4759,9 @@ static u32 qed_ilt_dump(struct qed_hwfn *p_hwfn,
 	offset += qed_dump_section_hdr(dump_buf + offset,
 				       dump, "num_vf_cids_per_conn_type", 1);
 	offset += qed_dump_num_param(dump_buf + offset,
-				     dump, "size", NUM_OF_CONNECTION_TYPES_E4);
+				     dump, "size", NUM_OF_CONNECTION_TYPES);
 	for (conn_type = 0, valid_conn_vf_cids = 0;
-	     conn_type < NUM_OF_CONNECTION_TYPES_E4; conn_type++, offset++) {
+	     conn_type < NUM_OF_CONNECTION_TYPES; conn_type++, offset++) {
 		u32 num_vf_cids =
 		    p_hwfn->p_cxt_mngr->conn_cfg[conn_type].cids_per_vf;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index b768f0698170..ba246d90344a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -89,7 +89,7 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 	struct qed_fcoe_pf_params *fcoe_pf_params = NULL;
 	struct fcoe_init_ramrod_params *p_ramrod = NULL;
 	struct fcoe_init_func_ramrod_data *p_data;
-	struct e4_fcoe_conn_context *p_cxt = NULL;
+	struct fcoe_conn_context *p_cxt = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	struct qed_cxt_info cxt_info;
@@ -144,7 +144,7 @@ qed_sp_fcoe_func_start(struct qed_hwfn *p_hwfn,
 	memset(p_cxt, 0, sizeof(*p_cxt));
 
 	SET_FIELD(p_cxt->tstorm_ag_context.flags3,
-		  E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN, 1);
+		  TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN, 1);
 
 	fcoe_pf_params->dummy_icid = (u16)dummy_cid;
 
@@ -549,7 +549,7 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
 
 void qed_fcoe_setup(struct qed_hwfn *p_hwfn)
 {
-	struct e4_fcoe_task_context *p_task_ctx = NULL;
+	struct fcoe_task_context *p_task_ctx = NULL;
 	u32 i, lc;
 	int rc;
 
@@ -561,7 +561,7 @@ void qed_fcoe_setup(struct qed_hwfn *p_hwfn)
 		if (rc)
 			continue;
 
-		memset(p_task_ctx, 0, sizeof(struct e4_fcoe_task_context));
+		memset(p_task_ctx, 0, sizeof(struct fcoe_task_context));
 
 		lc = 0;
 		SET_FIELD(lc, TIMERS_CONTEXT_VALIDLC0, 1);
@@ -572,7 +572,7 @@ void qed_fcoe_setup(struct qed_hwfn *p_hwfn)
 		p_task_ctx->timer_context.logical_client_1 = cpu_to_le32(lc);
 
 		SET_FIELD(p_task_ctx->tstorm_ag_context.flags0,
-			  E4_TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE, 1);
+			  TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE, 1);
 	}
 }
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 744c82a10875..a17baa98baa4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -394,216 +394,216 @@ struct xstorm_core_conn_st_ctx {
 	__le32 reserved0[55];
 };
 
-struct e4_xstorm_core_conn_ag_ctx {
+struct xstorm_core_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED1_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED1_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED2_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED3_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED4_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED4_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED5_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED5_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED6_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED6_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED1_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED2_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED3_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED4_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED5_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED6_SHIFT	7
 	u8 flags1;
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED7_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED7_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED8_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED8_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED9_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED9_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT13_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT13_SHIFT		5
-#define E4_XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED7_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED8_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED9_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT12_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT13_SHIFT		5
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
 	u8 flags2;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF15_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF17_MASK			0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF17_SHIFT			2
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_MASK			0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_SHIFT			4
-#define E4_XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF17_MASK			0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF17_SHIFT			2
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_MASK			0x3
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_SHIFT			4
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
 	u8 flags7;
-#define E4_XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED10_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED10_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		6
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		7
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED10_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF7EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF8EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF9EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT	7
 	u8 flags9;
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF10EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT			0
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF11EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF11EN_SHIFT			1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF12EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF12EN_SHIFT			2
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF13EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF13EN_SHIFT			3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF14EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF14EN_SHIFT			4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF15EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF15EN_SHIFT			5
-#define E4_XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF17EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF17EN_SHIFT			7
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF14EN_SHIFT			4
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF15EN_SHIFT			5
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF17EN_SHIFT			7
 	u8 flags10;
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
-#define E4_XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED11_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED11_SHIFT		3
-#define E4_XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF23EN_MASK			0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF23EN_SHIFT			5
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED12_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED12_SHIFT		6
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED13_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED13_SHIFT		7
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED11_SHIFT		3
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_MASK			0x1
+#define XSTORM_CORE_CONN_AG_CTX_CF23EN_SHIFT			5
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED12_SHIFT		6
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED13_SHIFT		7
 	u8 flags11;
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED14_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED14_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED15_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RESERVED15_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE9EN_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED14_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RESERVED15_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE9EN_SHIFT	7
 	u8 flags12;
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE10EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE10EN_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE11EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE11EN_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE14EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE14EN_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE15EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE15EN_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE16EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE16EN_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE17EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE17EN_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE10EN_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE11EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE14EN_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE15EN_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE16EN_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE17EN_SHIFT	7
 	u8 flags13;
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE18EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE18EN_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE19EN_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_RULE19EN_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE18EN_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_RULE19EN_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT16_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT16_SHIFT	0
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT17_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT17_SHIFT	1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT18_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT18_SHIFT	2
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT19_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT19_SHIFT	3
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT20_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT20_SHIFT	4
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT21_MASK	0x1
-#define E4_XSTORM_CORE_CONN_AG_CTX_BIT21_SHIFT	5
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF23_MASK	0x3
-#define E4_XSTORM_CORE_CONN_AG_CTX_CF23_SHIFT	6
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT16_SHIFT	0
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT17_SHIFT	1
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT18_SHIFT	2
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT19_SHIFT	3
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT20_SHIFT	4
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_MASK	0x1
+#define XSTORM_CORE_CONN_AG_CTX_BIT21_SHIFT	5
+#define XSTORM_CORE_CONN_AG_CTX_CF23_MASK	0x3
+#define XSTORM_CORE_CONN_AG_CTX_CF23_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 consolid_prod;
@@ -657,89 +657,89 @@ struct e4_xstorm_core_conn_ag_ctx {
 	__le16 word15;
 };
 
-struct e4_tstorm_core_conn_ag_ctx {
+struct tstorm_core_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT2_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT	2
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT3_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT	3
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT4_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT	4
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT5_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT	5
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT2_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT3_SHIFT	3
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT4_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_BIT5_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	6
 	u8 flags1;
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	0
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	2
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	4
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF3_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF4_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	0
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	2
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	4
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	0
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	2
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	4
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	5
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	6
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_CF9_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF9_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_CF10_MASK	0x3
+#define TSTORM_CORE_CONN_AG_CTX_CF10_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT		0
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT		1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT		2
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT		3
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT		4
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT		5
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT		6
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT		0
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT		1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT		2
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF7EN_SHIFT		3
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF8EN_SHIFT		4
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF9EN_SHIFT		5
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_CF10EN_SHIFT		6
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 reg2;
@@ -761,63 +761,63 @@ struct e4_tstorm_core_conn_ag_ctx {
 	__le32 reg10;
 };
 
-struct e4_ustorm_core_conn_ag_ctx {
+struct ustorm_core_conn_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_USTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_USTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_USTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+#define USTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF3_SHIFT	0
-#define E4_USTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF4_SHIFT	2
-#define E4_USTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF5_SHIFT	4
-#define E4_USTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF6_SHIFT	6
+#define USTORM_CORE_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_CF4_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_CF5_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_CORE_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_USTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_USTORM_CORE_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT		3
-#define E4_USTORM_CORE_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT		4
-#define E4_USTORM_CORE_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT		5
-#define E4_USTORM_CORE_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT		6
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF3EN_SHIFT		3
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF4EN_SHIFT		4
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF5EN_SHIFT		5
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_CF6EN_SHIFT		6
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define USTORM_CORE_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -846,15 +846,15 @@ struct tstorm_core_conn_st_ctx {
 };
 
 /* core connection context */
-struct e4_core_conn_context {
+struct core_conn_context {
 	struct ystorm_core_conn_st_ctx ystorm_st_context;
 	struct regpair ystorm_st_padding[2];
 	struct pstorm_core_conn_st_ctx pstorm_st_context;
 	struct regpair pstorm_st_padding[2];
 	struct xstorm_core_conn_st_ctx xstorm_st_context;
-	struct e4_xstorm_core_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_core_conn_ag_ctx tstorm_ag_context;
-	struct e4_ustorm_core_conn_ag_ctx ustorm_ag_context;
+	struct xstorm_core_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_core_conn_ag_ctx tstorm_ag_context;
+	struct ustorm_core_conn_ag_ctx ustorm_ag_context;
 	struct mstorm_core_conn_st_ctx mstorm_st_context;
 	struct ustorm_core_conn_st_ctx ustorm_st_context;
 	struct regpair ustorm_st_padding[2];
@@ -1525,74 +1525,74 @@ enum dmae_cmd_src_enum {
 	MAX_DMAE_CMD_SRC_ENUM
 };
 
-struct e4_mstorm_core_conn_ag_ctx {
+struct mstorm_core_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define MSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_ystorm_core_conn_ag_ctx {
+struct ystorm_core_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_CORE_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_CORE_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_CORE_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_CORE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define YSTORM_CORE_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -1778,22 +1778,22 @@ struct qm_rf_opportunistic_mask {
 };
 
 /* QM hardware structure of QM map memory */
-struct qm_rf_pq_map_e4 {
+struct qm_rf_pq_map {
 	__le32 reg;
-#define QM_RF_PQ_MAP_E4_PQ_VALID_MASK		0x1
-#define QM_RF_PQ_MAP_E4_PQ_VALID_SHIFT		0
-#define QM_RF_PQ_MAP_E4_RL_ID_MASK		0xFF
-#define QM_RF_PQ_MAP_E4_RL_ID_SHIFT		1
-#define QM_RF_PQ_MAP_E4_VP_PQ_ID_MASK		0x1FF
-#define QM_RF_PQ_MAP_E4_VP_PQ_ID_SHIFT		9
-#define QM_RF_PQ_MAP_E4_VOQ_MASK		0x1F
-#define QM_RF_PQ_MAP_E4_VOQ_SHIFT		18
-#define QM_RF_PQ_MAP_E4_WRR_WEIGHT_GROUP_MASK	0x3
-#define QM_RF_PQ_MAP_E4_WRR_WEIGHT_GROUP_SHIFT	23
-#define QM_RF_PQ_MAP_E4_RL_VALID_MASK		0x1
-#define QM_RF_PQ_MAP_E4_RL_VALID_SHIFT		25
-#define QM_RF_PQ_MAP_E4_RESERVED_MASK		0x3F
-#define QM_RF_PQ_MAP_E4_RESERVED_SHIFT		26
+#define QM_RF_PQ_MAP_PQ_VALID_MASK		0x1
+#define QM_RF_PQ_MAP_PQ_VALID_SHIFT		0
+#define QM_RF_PQ_MAP_RL_ID_MASK		0xFF
+#define QM_RF_PQ_MAP_RL_ID_SHIFT		1
+#define QM_RF_PQ_MAP_VP_PQ_ID_MASK		0x1FF
+#define QM_RF_PQ_MAP_VP_PQ_ID_SHIFT		9
+#define QM_RF_PQ_MAP_VOQ_MASK		0x1F
+#define QM_RF_PQ_MAP_VOQ_SHIFT		18
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_MASK	0x3
+#define QM_RF_PQ_MAP_WRR_WEIGHT_GROUP_SHIFT	23
+#define QM_RF_PQ_MAP_RL_VALID_MASK		0x1
+#define QM_RF_PQ_MAP_RL_VALID_SHIFT		25
+#define QM_RF_PQ_MAP_RESERVED_MASK		0x3F
+#define QM_RF_PQ_MAP_RESERVED_SHIFT		26
 };
 
 /* Completion params for aggregated interrupt completion */
@@ -4892,216 +4892,216 @@ struct xstorm_eth_conn_st_ctx {
 	__le32 reserved[60];
 };
 
-struct e4_xstorm_eth_conn_ag_ctx {
+struct xstorm_eth_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED1_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED1_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED2_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED3_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED4_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED4_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED5_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED5_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED6_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED6_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED1_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED2_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED3_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED4_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED5_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED6_SHIFT	7
 		u8 flags1;
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED7_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED7_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED8_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED8_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED9_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED9_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ETH_CONN_AG_CTX_E5_RESERVED2_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED7_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED8_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED9_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_E5_RESERVED2_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT	7
 	u8 flags2;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF7_SHIFT	6
 		u8 flags4;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF15_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK		0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_MASK		0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_MASK			0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_SHIFT			4
-#define E4_XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_MASK			0x3
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_SHIFT			4
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
 	u8 flags7;
-#define E4_XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED10_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED10_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT		6
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT		7
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED10_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT	3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF7EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF8EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF9EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT	7
 	u8 flags9;
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF10EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT			0
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF11EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF11EN_SHIFT			1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF12EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF12EN_SHIFT			2
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF13EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF13EN_SHIFT			3
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF14EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF14EN_SHIFT			4
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF15EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_CF15EN_SHIFT			5
-#define E4_XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF14EN_SHIFT			4
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_CF15EN_SHIFT			5
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT	7
 	u8 flags10;
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_MASK			0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
-#define E4_XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT		1
-#define E4_XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED11_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED11_SHIFT		3
-#define E4_XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
-#define E4_XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED12_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED12_SHIFT		6
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED13_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED13_SHIFT		7
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_MASK			0x1
+#define XSTORM_ETH_CONN_AG_CTX_DQ_CF_EN_SHIFT		0
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT		1
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED11_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED12_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED13_SHIFT		7
 	u8 flags11;
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED14_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED14_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED15_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RESERVED15_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		3
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT		4
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		5
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE9EN_SHIFT		7
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED14_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_RESERVED15_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE10EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE10EN_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE11EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE11EN_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE14EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE14EN_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE15EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE15EN_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE16EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE16EN_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE17EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE17EN_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE10EN_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE11EN_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE14EN_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE15EN_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE16EN_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE17EN_SHIFT	7
 	u8 flags13;
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE18EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE18EN_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE19EN_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_RULE19EN_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE18EN_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_RULE19EN_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT	0
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT	1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT	2
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK	0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT	3
-#define E4_XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT		4
-#define E4_XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK		0x1
-#define E4_XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
-#define E4_XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_MASK		0x3
-#define E4_XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT	0
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT	1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT	2
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK	0x1
+#define XSTORM_ETH_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT	3
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT		4
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK		0x1
+#define XSTORM_ETH_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_MASK		0x3
+#define XSTORM_ETH_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
 	u8 edpm_event_id;
 	__le16 physical_q0;
 	__le16 e5_reserved1;
@@ -5160,37 +5160,37 @@ struct ystorm_eth_conn_st_ctx {
 	__le32 reserved[8];
 };
 
-struct e4_ystorm_eth_conn_ag_ctx {
+struct ystorm_eth_conn_ag_ctx {
 	u8 byte0;
 	u8 state;
 	u8 flags0;
-#define E4_YSTORM_ETH_CONN_AG_CTX_BIT0_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT			0
-#define E4_YSTORM_ETH_CONN_AG_CTX_BIT1_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT			1
-#define E4_YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK	0x3
-#define E4_YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT	2
-#define E4_YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK		0x3
-#define E4_YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT	4
-#define E4_YSTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT			6
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT			0
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT			1
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK	0x3
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT	2
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_MASK		0x3
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_SHIFT	4
+#define YSTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
+#define YSTORM_ETH_CONN_AG_CTX_CF2_SHIFT			6
 	u8 flags1;
-#define E4_YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	0
-#define E4_YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK	0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT	1
-#define E4_YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			3
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT			4
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT			5
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT			6
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK			0x1
-#define E4_YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT			7
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
+#define YSTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	0
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_MASK	0x1
+#define YSTORM_ETH_CONN_AG_CTX_PMD_TERMINATE_CF_EN_SHIFT	1
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			3
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT			4
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT			5
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT			6
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK			0x1
+#define YSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT			7
 	u8 tx_q0_int_coallecing_timeset;
 	u8 byte3;
 	__le16 word0;
@@ -5204,89 +5204,89 @@ struct e4_ystorm_eth_conn_ag_ctx {
 	__le32 reg3;
 };
 
-struct e4_tstorm_eth_conn_ag_ctx {
+struct tstorm_eth_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT2_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT	2
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT3_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT	3
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT4_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT	4
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT5_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT	5
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT	6
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT0_SHIFT	0
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT	1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT2_SHIFT	2
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT3_SHIFT	3
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT4_SHIFT	4
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_BIT5_SHIFT	5
+#define TSTORM_ETH_CONN_AG_CTX_CF0_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF0_SHIFT	6
 	u8 flags1;
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT	0
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT	2
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT	4
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT	6
+#define TSTORM_ETH_CONN_AG_CTX_CF1_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF1_SHIFT	0
+#define TSTORM_ETH_CONN_AG_CTX_CF2_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF2_SHIFT	2
+#define TSTORM_ETH_CONN_AG_CTX_CF3_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF3_SHIFT	4
+#define TSTORM_ETH_CONN_AG_CTX_CF4_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF4_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT	0
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT	2
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT	4
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_ETH_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_ETH_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_ETH_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_ETH_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT	0
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT	2
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT	4
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT	5
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	6
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT	7
+#define TSTORM_ETH_CONN_AG_CTX_CF9_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF9_SHIFT	0
+#define TSTORM_ETH_CONN_AG_CTX_CF10_MASK	0x3
+#define TSTORM_ETH_CONN_AG_CTX_CF10_SHIFT	2
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT	4
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT	5
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	6
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT	0
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT	1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT	2
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT	3
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT	4
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT	5
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT	6
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF4EN_SHIFT	0
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF5EN_SHIFT	1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF6EN_SHIFT	2
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF7EN_SHIFT	3
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF8EN_SHIFT	4
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF9EN_SHIFT	5
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_CF10EN_SHIFT	6
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT		0
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT		1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT		2
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT		3
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		4
-#define E4_TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT	5
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		6
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT		7
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT		0
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT		1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT		2
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT		3
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT		4
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RX_BD_EN_SHIFT	5
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT		6
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT		7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 reg2;
@@ -5308,63 +5308,63 @@ struct e4_tstorm_eth_conn_ag_ctx {
 	__le32 reg10;
 };
 
-struct e4_ustorm_eth_conn_ag_ctx {
+struct ustorm_eth_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_ETH_CONN_AG_CTX_BIT0_MASK			0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT			0
-#define E4_USTORM_ETH_CONN_AG_CTX_BIT1_MASK			0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT			1
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK	0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT	2
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK	0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT	4
-#define E4_USTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_CF2_SHIFT			6
+#define USTORM_ETH_CONN_AG_CTX_BIT0_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT0_SHIFT			0
+#define USTORM_ETH_CONN_AG_CTX_BIT1_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_BIT1_SHIFT			1
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_MASK	0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_SHIFT	2
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_MASK	0x3
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_SHIFT	4
+#define USTORM_ETH_CONN_AG_CTX_CF2_MASK			0x3
+#define USTORM_ETH_CONN_AG_CTX_CF2_SHIFT			6
 	u8 flags1;
-#define E4_USTORM_ETH_CONN_AG_CTX_CF3_MASK			0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_CF3_SHIFT			0
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK		0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT		2
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK		0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT		4
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK	0x3
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT	6
+#define USTORM_ETH_CONN_AG_CTX_CF3_MASK			0x3
+#define USTORM_ETH_CONN_AG_CTX_CF3_SHIFT			0
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_MASK		0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_SHIFT		2
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_MASK		0x3
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_SHIFT		4
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_MASK	0x3
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT	0
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT	1
-#define E4_USTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
-#define E4_USTORM_ETH_CONN_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT			3
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK		0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT		4
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK		0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT		5
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	6
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			7
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_PMD_TERMINATE_CF_EN_SHIFT	0
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RX_PMD_TERMINATE_CF_EN_SHIFT	1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT			2
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_MASK		0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_ARM_CF_EN_SHIFT		4
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_MASK		0x1
+#define USTORM_ETH_CONN_AG_CTX_RX_ARM_CF_EN_SHIFT		5
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_TX_BD_CONS_UPD_CF_EN_SHIFT	6
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT			7
 	u8 flags3;
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_ETH_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -5388,16 +5388,16 @@ struct mstorm_eth_conn_st_ctx {
 };
 
 /* eth connection context */
-struct e4_eth_conn_context {
+struct eth_conn_context {
 	struct tstorm_eth_conn_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2];
 	struct pstorm_eth_conn_st_ctx pstorm_st_context;
 	struct xstorm_eth_conn_st_ctx xstorm_st_context;
-	struct e4_xstorm_eth_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_eth_conn_ag_ctx tstorm_ag_context;
+	struct xstorm_eth_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_eth_conn_ag_ctx tstorm_ag_context;
 	struct ystorm_eth_conn_st_ctx ystorm_st_context;
-	struct e4_ystorm_eth_conn_ag_ctx ystorm_ag_context;
-	struct e4_ustorm_eth_conn_ag_ctx ustorm_ag_context;
+	struct ystorm_eth_conn_ag_ctx ystorm_ag_context;
+	struct ustorm_eth_conn_ag_ctx ustorm_ag_context;
 	struct ustorm_eth_conn_st_ctx ustorm_st_context;
 	struct mstorm_eth_conn_st_ctx mstorm_st_context;
 };
@@ -6006,7 +6006,7 @@ struct vport_update_ramrod_data {
 	struct eth_vport_rss_config rss_config;
 };
 
-struct e4_xstorm_eth_conn_ag_ctx_dq_ext_ldpart {
+struct xstorm_eth_conn_ag_ctx_dq_ext_ldpart {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
@@ -6235,253 +6235,253 @@ struct e4_xstorm_eth_conn_ag_ctx_dq_ext_ldpart {
 	__le32 reg4;
 };
 
-struct e4_mstorm_eth_conn_ag_ctx {
+struct mstorm_eth_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	 0
-#define E4_MSTORM_ETH_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT		2
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT		4
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT		6
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	 0
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_MASK		0x1
+#define MSTORM_ETH_CONN_AG_CTX_BIT1_SHIFT		1
+#define MSTORM_ETH_CONN_AG_CTX_CF0_MASK		0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF0_SHIFT		2
+#define MSTORM_ETH_CONN_AG_CTX_CF1_MASK		0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF1_SHIFT		4
+#define MSTORM_ETH_CONN_AG_CTX_CF2_MASK		0x3
+#define MSTORM_ETH_CONN_AG_CTX_CF2_SHIFT		6
 	u8 flags1;
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ETH_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_xstorm_eth_hw_conn_ag_ctx {
+struct xstorm_eth_hw_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT	1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED1_SHIFT	1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED2_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED3_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED4_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED5_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED6_SHIFT	7
 	u8 flags1;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT		0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT		1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT		2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED2_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED2_SHIFT		4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED3_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED3_SHIFT		5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT		7
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED7_SHIFT		0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED8_SHIFT		1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED9_SHIFT		2
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED2_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED2_SHIFT		4
+#define XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED3_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_E5_RESERVED3_SHIFT		5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_RULE_ACTIVE_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_ACTIVE_SHIFT		7
 	u8 flags2;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK			0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT		4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_MASK			0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_SHIFT		4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_MASK		0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_SHIFT		6
 	u8 flags7;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED10_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF0EN_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF1EN_SHIFT	7
 	u8 flags8;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT	1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT	3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF3EN_SHIFT	1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF9EN_SHIFT	7
 	u8 flags9;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT		0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT		1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT		2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT		3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT		4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT		5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF10EN_SHIFT		0
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF11EN_SHIFT		1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF12EN_SHIFT		2
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF13EN_SHIFT		3
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF14EN_SHIFT		4
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_CF15EN_SHIFT		5
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_GO_TO_BD_CONS_CF_EN_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_MULTI_UNICAST_CF_EN_SHIFT	7
 	u8 flags10;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT			0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT		1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT			2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT			3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT			4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT			6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK			0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT			7
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_DQ_CF_EN_SHIFT			0
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TERMINATE_CF_EN_SHIFT		1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT			2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED11_SHIFT			3
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_SLOW_PATH_EN_SHIFT			4
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_EN_RESERVED_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED12_SHIFT			6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_MASK			0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED13_SHIFT			7
 	u8 flags11;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT		0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT		1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT		3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT		4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT		5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT		6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT		7
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED14_SHIFT		0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RESERVED15_SHIFT		1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_TX_DEC_RULE_EN_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED1_SHIFT		6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT	1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE10EN_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE11EN_SHIFT	1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE14EN_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE15EN_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE16EN_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE17EN_SHIFT	7
 	u8 flags13;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT	1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE18EN_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_RULE19EN_SHIFT	1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT	0
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT	1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT	2
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT	3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT	4
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK		0x3
-#define E4_XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_USE_EXT_HDR_SHIFT	0
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_RAW_L3L4_SHIFT	1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_INBAND_PROP_HDR_SHIFT	2
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_EDPM_SEND_EXT_TUNNEL_SHIFT	3
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_L2_EDPM_ENABLE_SHIFT	4
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
+#define XSTORM_ETH_HW_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_MASK		0x3
+#define XSTORM_ETH_HW_CONN_AG_CTX_TPH_ENABLE_SHIFT		6
 	u8 edpm_event_id;
 	__le16 physical_q0;
 	__le16 e5_reserved1;
@@ -6682,49 +6682,49 @@ struct ystorm_rdma_task_st_ctx {
 	struct regpair temp[4];
 };
 
-struct e4_ystorm_rdma_task_ag_ctx {
+struct ystorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	__le16 msem_ctx_upd_seq;
 	u8 flags0;
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
-#define E4_YSTORM_RDMA_TASK_AG_CTX_VALID_MASK			0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_VALID_SHIFT			6
-#define E4_YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
+#define YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define YSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define YSTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
+#define YSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
+#define YSTORM_RDMA_TASK_AG_CTX_VALID_MASK			0x1
+#define YSTORM_RDMA_TASK_AG_CTX_VALID_SHIFT			6
+#define YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
 	u8 flags1;
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF0_MASK		0x3
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT		0
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF1_MASK		0x3
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT		2
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT		6
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT		7
+#define YSTORM_RDMA_TASK_AG_CTX_CF0_MASK		0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT		0
+#define YSTORM_RDMA_TASK_AG_CTX_CF1_MASK		0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT		2
+#define YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
+#define YSTORM_RDMA_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
+#define YSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT		6
+#define YSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT		7
 	u8 flags2;
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT4_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT		0
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	2
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	3
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	4
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	5
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	6
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_YSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT	7
+#define YSTORM_RDMA_TASK_AG_CTX_BIT4_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT		0
+#define YSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	2
+#define YSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	3
+#define YSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	4
+#define YSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	5
+#define YSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	6
+#define YSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK		0x1
+#define YSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT	7
 	u8 key;
 	__le32 mw_cnt_or_qp_id;
 	u8 ref_cnt_seq;
@@ -6738,49 +6738,49 @@ struct e4_ystorm_rdma_task_ag_ctx {
 	__le32 fbo_hi;
 };
 
-struct e4_mstorm_rdma_task_ag_ctx {
+struct mstorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	__le16 icid;
 	u8 flags0;
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT2_MASK			0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT			6
-#define E4_MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
+#define MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define MSTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define MSTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
+#define MSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
+#define MSTORM_RDMA_TASK_AG_CTX_BIT2_MASK			0x1
+#define MSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT			6
+#define MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_DIF_FIRST_IO_SHIFT		7
 	u8 flags1;
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT	0
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT	2
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT	4
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK	0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT	6
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK	0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT	7
+#define MSTORM_RDMA_TASK_AG_CTX_CF0_MASK	0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT	0
+#define MSTORM_RDMA_TASK_AG_CTX_CF1_MASK	0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT	2
+#define MSTORM_RDMA_TASK_AG_CTX_CF2_MASK	0x3
+#define MSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT	4
+#define MSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT	6
+#define MSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT	7
 	u8 flags2;
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT		0
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	2
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	3
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	4
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	5
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	6
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_MSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT	7
+#define MSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT		0
+#define MSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	2
+#define MSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	3
+#define MSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	4
+#define MSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	5
+#define MSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	6
+#define MSTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK		0x1
+#define MSTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT	7
 	u8 key;
 	__le32 mw_cnt_or_qp_id;
 	u8 ref_cnt_seq;
@@ -6804,56 +6804,56 @@ struct ustorm_rdma_task_st_ctx {
 	struct regpair temp[6];
 };
 
-struct e4_ustorm_rdma_task_ag_ctx {
+struct ustorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 state;
 	__le16 icid;
 	u8 flags0;
-#define E4_USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_USTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_MASK	0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_SHIFT	6
+#define USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define USTORM_RDMA_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define USTORM_RDMA_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define USTORM_RDMA_TASK_AG_CTX_BIT1_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT			5
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_MASK	0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_MASK	0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_SHIFT	0
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_MASK		0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_SHIFT		2
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_MASK          0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_SHIFT         4
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_MASK		0x3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_SHIFT		6
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_MASK	0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RESULT_TOGGLE_BIT_SHIFT	0
+#define USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_MASK		0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_TX_IO_FLG_SHIFT		2
+#define USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_MASK          0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_BLOCK_SIZE_SHIFT         4
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_MASK		0x3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_SHIFT		6
 	u8 flags2;
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_MASK	0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_SHIFT	0
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED2_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED2_SHIFT		1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED3_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED3_SHIFT		2
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED4_MASK               0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RESERVED4_SHIFT              3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK		0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT		5
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT		6
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT		7
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_MASK	0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_RESULT_CF_EN_SHIFT	0
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED2_MASK		0x1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED2_SHIFT		1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED3_MASK		0x1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED3_SHIFT		2
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED4_MASK               0x1
+#define USTORM_RDMA_TASK_AG_CTX_RESERVED4_SHIFT              3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK		0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
+#define USTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT		5
+#define USTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT		6
+#define USTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT		7
 	u8 flags3;
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_MASK	0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_SHIFT	0
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT		1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_MASK	0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_SHIFT	2
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK			0x1
-#define E4_USTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT		3
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_MASK		0xF
-#define E4_USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT		4
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_MASK	0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_RXMIT_PROD_CONS_EN_SHIFT	0
+#define USTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT		1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_MASK	0x1
+#define USTORM_RDMA_TASK_AG_CTX_DIF_WRITE_PROD_CONS_EN_SHIFT	2
+#define USTORM_RDMA_TASK_AG_CTX_RULE6EN_MASK			0x1
+#define USTORM_RDMA_TASK_AG_CTX_RULE6EN_SHIFT		3
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_MASK		0xF
+#define USTORM_RDMA_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT		4
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
 	__le32 dif_rxmit_cons;
@@ -6870,16 +6870,16 @@ struct e4_ustorm_rdma_task_ag_ctx {
 };
 
 /* RDMA task context */
-struct e4_rdma_task_context {
+struct rdma_task_context {
 	struct ystorm_rdma_task_st_ctx ystorm_st_context;
-	struct e4_ystorm_rdma_task_ag_ctx ystorm_ag_context;
+	struct ystorm_rdma_task_ag_ctx ystorm_ag_context;
 	struct tdif_task_context tdif_context;
-	struct e4_mstorm_rdma_task_ag_ctx mstorm_ag_context;
+	struct mstorm_rdma_task_ag_ctx mstorm_ag_context;
 	struct mstorm_rdma_task_st_ctx mstorm_st_context;
 	struct rdif_task_context rdif_context;
 	struct ustorm_rdma_task_st_ctx ustorm_st_context;
 	struct regpair ustorm_st_padding[2];
-	struct e4_ustorm_rdma_task_ag_ctx ustorm_ag_context;
+	struct ustorm_rdma_task_ag_ctx ustorm_ag_context;
 };
 
 /* rdma function init ramrod data */
@@ -7135,73 +7135,73 @@ struct rdma_xrc_srq_context {
 	struct regpair temp[9];
 };
 
-struct e4_tstorm_rdma_task_ag_ctx {
+struct tstorm_rdma_task_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	__le16 word0;
 	u8 flags0;
-#define E4_TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_MASK		0xF
-#define E4_TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_SHIFT	0
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT0_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT0_SHIFT		4
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT		5
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT		6
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT3_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT		7
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_MASK		0xF
+#define TSTORM_RDMA_TASK_AG_CTX_NIBBLE0_SHIFT	0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT0_SHIFT		4
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT1_SHIFT		5
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT2_SHIFT		6
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT3_SHIFT		7
 	u8 flags1;
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT4_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT	0
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT5_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_BIT5_SHIFT	1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF0_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT	2
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF1_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT	4
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF2_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT	6
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT4_SHIFT	0
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_BIT5_SHIFT	1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF0_SHIFT	2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF1_SHIFT	4
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF3_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF3_SHIFT	0
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF4_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF4_SHIFT	2
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF5_SHIFT	4
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF6_SHIFT	6
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF3_SHIFT	0
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF4_SHIFT	2
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF5_SHIFT	4
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF6_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF7_SHIFT	0
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT	2
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT	3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT	4
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF3EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT	5
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF4EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF4EN_SHIFT	6
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF5EN_MASK	0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF5EN_SHIFT	7
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_MASK	0x3
+#define TSTORM_RDMA_TASK_AG_CTX_CF7_SHIFT	0
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF0EN_SHIFT	2
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF1EN_SHIFT	3
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF2EN_SHIFT	4
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF3EN_SHIFT	5
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF4EN_SHIFT	6
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_MASK	0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF5EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF6EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF6EN_SHIFT		0
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF7EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_CF7EN_SHIFT		1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	2
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	3
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	4
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	5
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	6
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	7
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF6EN_SHIFT		0
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_CF7EN_SHIFT		1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE0EN_SHIFT	2
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE1EN_SHIFT	3
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE2EN_SHIFT	4
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE3EN_SHIFT	5
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE4EN_SHIFT	6
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_RDMA_TASK_AG_CTX_RULE5EN_SHIFT	7
 	u8 byte2;
 	__le16 word1;
 	__le32 reg0;
@@ -7214,63 +7214,63 @@ struct e4_tstorm_rdma_task_ag_ctx {
 	__le32 reg2;
 };
 
-struct e4_ustorm_rdma_conn_ag_ctx {
+struct ustorm_rdma_conn_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_MASK  0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_SHIFT 1
-#define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	2
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF1_SHIFT		4
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF2_SHIFT		6
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define USTORM_RDMA_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_MASK  0x1
+#define USTORM_RDMA_CONN_AG_CTX_DIF_ERROR_REPORTED_SHIFT 1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	2
+#define USTORM_RDMA_CONN_AG_CTX_CF1_MASK		0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF1_SHIFT		4
+#define USTORM_RDMA_CONN_AG_CTX_CF2_MASK		0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF2_SHIFT		6
 	u8 flags1;
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF3_MASK		0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF3_SHIFT		0
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_MASK	0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT	2
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_MASK	0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_SHIFT	4
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF6_MASK		0x3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF6_SHIFT		6
+#define USTORM_RDMA_CONN_AG_CTX_CF3_MASK		0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF3_SHIFT		0
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_MASK	0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT	2
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_MASK	0x3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_SHIFT	4
+#define USTORM_RDMA_CONN_AG_CTX_CF6_MASK		0x3
+#define USTORM_RDMA_CONN_AG_CTX_CF6_SHIFT		6
 	u8 flags2;
-#define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT			1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT			2
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT			3
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT	4
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT		5
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT			6
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_SHIFT		7
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_MASK			0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF1EN_SHIFT			1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_MASK			0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF2EN_SHIFT			2
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT	4
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT		5
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_MASK			0x1
+#define USTORM_RDMA_CONN_AG_CTX_CF6EN_SHIFT			6
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_SE_EN_SHIFT		7
 	u8 flags3;
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_CQ_EN_SHIFT		0
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_CQ_EN_SHIFT		0
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 nvmf_only;
 	__le16 conn_dpi;
@@ -7283,214 +7283,214 @@ struct e4_ustorm_rdma_conn_ag_ctx {
 	__le16 word3;
 };
 
-struct e4_xstorm_roce_conn_ag_ctx {
+struct xstorm_roce_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT     0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT1_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT             1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT2_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT             2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT     3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT4_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT             4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT5_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT             5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT6_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT6_SHIFT             6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT7_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT7_SHIFT             7
+#define XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT     0
+#define XSTORM_ROCE_CONN_AG_CTX_BIT1_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT             1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT2_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT             2
+#define XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT     3
+#define XSTORM_ROCE_CONN_AG_CTX_BIT4_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT             4
+#define XSTORM_ROCE_CONN_AG_CTX_BIT5_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT             5
+#define XSTORM_ROCE_CONN_AG_CTX_BIT6_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT6_SHIFT             6
+#define XSTORM_ROCE_CONN_AG_CTX_BIT7_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT7_SHIFT             7
 	u8 flags1;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT8_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT8_SHIFT             0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT9_MASK              0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT9_SHIFT             1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT10_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT10_SHIFT            2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT11_SHIFT            3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_MASK        0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_SHIFT       4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_MASK        0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_SHIFT       5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT14_MASK	       0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT14_SHIFT	       6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
+#define XSTORM_ROCE_CONN_AG_CTX_BIT8_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT8_SHIFT             0
+#define XSTORM_ROCE_CONN_AG_CTX_BIT9_MASK              0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT9_SHIFT             1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT10_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT10_SHIFT            2
+#define XSTORM_ROCE_CONN_AG_CTX_BIT11_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT11_SHIFT            3
+#define XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_MASK        0x1
+#define XSTORM_ROCE_CONN_AG_CTX_MSDM_FLUSH_SHIFT       4
+#define XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_MASK        0x1
+#define XSTORM_ROCE_CONN_AG_CTX_MSEM_FLUSH_SHIFT       5
+#define XSTORM_ROCE_CONN_AG_CTX_BIT14_MASK	       0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT14_SHIFT	       6
+#define XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_YSTORM_FLUSH_SHIFT     7
 	u8 flags2;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT              0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT              2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT              4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3_SHIFT              6
+#define XSTORM_ROCE_CONN_AG_CTX_CF0_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT              0
+#define XSTORM_ROCE_CONN_AG_CTX_CF1_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT              2
+#define XSTORM_ROCE_CONN_AG_CTX_CF2_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT              4
+#define XSTORM_ROCE_CONN_AG_CTX_CF3_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF3_SHIFT              6
 	u8 flags3;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4_SHIFT              0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT              2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT              4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK       0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT      6
+#define XSTORM_ROCE_CONN_AG_CTX_CF4_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF4_SHIFT              0
+#define XSTORM_ROCE_CONN_AG_CTX_CF5_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT              2
+#define XSTORM_ROCE_CONN_AG_CTX_CF6_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT              4
+#define XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK       0x3
+#define XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT      6
 	u8 flags4;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT              0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9_MASK               0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT              2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT             4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11_SHIFT             6
+#define XSTORM_ROCE_CONN_AG_CTX_CF8_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT              0
+#define XSTORM_ROCE_CONN_AG_CTX_CF9_MASK               0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT              2
+#define XSTORM_ROCE_CONN_AG_CTX_CF10_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT             4
+#define XSTORM_ROCE_CONN_AG_CTX_CF11_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF11_SHIFT             6
 	u8 flags5;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12_SHIFT             0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13_SHIFT             2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14_SHIFT             4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15_SHIFT             6
+#define XSTORM_ROCE_CONN_AG_CTX_CF12_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF12_SHIFT             0
+#define XSTORM_ROCE_CONN_AG_CTX_CF13_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF13_SHIFT             2
+#define XSTORM_ROCE_CONN_AG_CTX_CF14_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF14_SHIFT             4
+#define XSTORM_ROCE_CONN_AG_CTX_CF15_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF15_SHIFT             6
 	u8 flags6;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16_SHIFT             0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17_SHIFT             2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18_SHIFT             4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19_SHIFT             6
+#define XSTORM_ROCE_CONN_AG_CTX_CF16_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF16_SHIFT             0
+#define XSTORM_ROCE_CONN_AG_CTX_CF17_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF17_SHIFT             2
+#define XSTORM_ROCE_CONN_AG_CTX_CF18_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF18_SHIFT             4
+#define XSTORM_ROCE_CONN_AG_CTX_CF19_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF19_SHIFT             6
 	u8 flags7;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20_SHIFT             0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21_SHIFT             2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_MASK         0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_SHIFT        4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT            6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT            7
+#define XSTORM_ROCE_CONN_AG_CTX_CF20_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF20_SHIFT             0
+#define XSTORM_ROCE_CONN_AG_CTX_CF21_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF21_SHIFT             2
+#define XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_MASK         0x3
+#define XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_SHIFT        4
+#define XSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT            6
+#define XSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT            7
 	u8 flags8;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT            0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF3EN_SHIFT            1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF4EN_SHIFT            2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT            3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT            4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK    0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT   5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT            6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT            7
+#define XSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT            0
+#define XSTORM_ROCE_CONN_AG_CTX_CF3EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF3EN_SHIFT            1
+#define XSTORM_ROCE_CONN_AG_CTX_CF4EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF4EN_SHIFT            2
+#define XSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT            3
+#define XSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT            4
+#define XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK    0x1
+#define XSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT   5
+#define XSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT            6
+#define XSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT            7
 	u8 flags9;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT           0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF11EN_SHIFT           1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF12EN_SHIFT           2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF13EN_SHIFT           3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF14EN_SHIFT           4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF15EN_SHIFT           5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF16EN_SHIFT           6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF17EN_SHIFT           7
+#define XSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT           0
+#define XSTORM_ROCE_CONN_AG_CTX_CF11EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF11EN_SHIFT           1
+#define XSTORM_ROCE_CONN_AG_CTX_CF12EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF12EN_SHIFT           2
+#define XSTORM_ROCE_CONN_AG_CTX_CF13EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF13EN_SHIFT           3
+#define XSTORM_ROCE_CONN_AG_CTX_CF14EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF14EN_SHIFT           4
+#define XSTORM_ROCE_CONN_AG_CTX_CF15EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF15EN_SHIFT           5
+#define XSTORM_ROCE_CONN_AG_CTX_CF16EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF16EN_SHIFT           6
+#define XSTORM_ROCE_CONN_AG_CTX_CF17EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF17EN_SHIFT           7
 	u8 flags10;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF18EN_SHIFT           0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF19EN_SHIFT           1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF20EN_SHIFT           2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF21EN_SHIFT           3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT     4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23EN_MASK            0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23EN_SHIFT           5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT          6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT          7
+#define XSTORM_ROCE_CONN_AG_CTX_CF18EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF18EN_SHIFT           0
+#define XSTORM_ROCE_CONN_AG_CTX_CF19EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF19EN_SHIFT           1
+#define XSTORM_ROCE_CONN_AG_CTX_CF20EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF20EN_SHIFT           2
+#define XSTORM_ROCE_CONN_AG_CTX_CF21EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF21EN_SHIFT           3
+#define XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT     4
+#define XSTORM_ROCE_CONN_AG_CTX_CF23EN_MASK            0x1
+#define XSTORM_ROCE_CONN_AG_CTX_CF23EN_SHIFT           5
+#define XSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT          6
+#define XSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT          7
 	u8 flags11;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT          0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT          1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT          2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT          3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT          4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT          5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_SHIFT     6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE9EN_MASK           0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE9EN_SHIFT          7
+#define XSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT          0
+#define XSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT          1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT          2
+#define XSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT          3
+#define XSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT          4
+#define XSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT          5
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED1_SHIFT     6
+#define XSTORM_ROCE_CONN_AG_CTX_RULE9EN_MASK           0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE9EN_SHIFT          7
 	u8 flags12;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE10EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE10EN_SHIFT         0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE11EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE11EN_SHIFT         1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_SHIFT     2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_SHIFT     3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE14EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE14EN_SHIFT         4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE15EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE15EN_SHIFT         5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE16EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE16EN_SHIFT         6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE17EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE17EN_SHIFT         7
+#define XSTORM_ROCE_CONN_AG_CTX_RULE10EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE10EN_SHIFT         0
+#define XSTORM_ROCE_CONN_AG_CTX_RULE11EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE11EN_SHIFT         1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED2_SHIFT     2
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED3_SHIFT     3
+#define XSTORM_ROCE_CONN_AG_CTX_RULE14EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE14EN_SHIFT         4
+#define XSTORM_ROCE_CONN_AG_CTX_RULE15EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE15EN_SHIFT         5
+#define XSTORM_ROCE_CONN_AG_CTX_RULE16EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE16EN_SHIFT         6
+#define XSTORM_ROCE_CONN_AG_CTX_RULE17EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE17EN_SHIFT         7
 	u8 flags13;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE18EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE18EN_SHIFT         0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE19EN_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RULE19EN_SHIFT         1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_SHIFT     2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_SHIFT     3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_SHIFT     4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_SHIFT     5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_SHIFT     6
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_MASK      0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_SHIFT     7
+#define XSTORM_ROCE_CONN_AG_CTX_RULE18EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE18EN_SHIFT         0
+#define XSTORM_ROCE_CONN_AG_CTX_RULE19EN_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RULE19EN_SHIFT         1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED4_SHIFT     2
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED5_SHIFT     3
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED6_SHIFT     4
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED7_SHIFT     5
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED8_SHIFT     6
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_MASK      0x1
+#define XSTORM_ROCE_CONN_AG_CTX_A0_RESERVED9_SHIFT     7
 	u8 flags14;
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MIGRATION_MASK         0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_MIGRATION_SHIFT        0
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT17_MASK             0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_BIT17_SHIFT            1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_MASK      0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_SHIFT     2
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RESERVED_MASK          0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_RESERVED_SHIFT         4
-#define E4_XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK  0x1
-#define E4_XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT 5
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23_MASK              0x3
-#define E4_XSTORM_ROCE_CONN_AG_CTX_CF23_SHIFT             6
+#define XSTORM_ROCE_CONN_AG_CTX_MIGRATION_MASK         0x1
+#define XSTORM_ROCE_CONN_AG_CTX_MIGRATION_SHIFT        0
+#define XSTORM_ROCE_CONN_AG_CTX_BIT17_MASK             0x1
+#define XSTORM_ROCE_CONN_AG_CTX_BIT17_SHIFT            1
+#define XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_MASK      0x3
+#define XSTORM_ROCE_CONN_AG_CTX_DPM_PORT_NUM_SHIFT     2
+#define XSTORM_ROCE_CONN_AG_CTX_RESERVED_MASK          0x1
+#define XSTORM_ROCE_CONN_AG_CTX_RESERVED_SHIFT         4
+#define XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK  0x1
+#define XSTORM_ROCE_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT 5
+#define XSTORM_ROCE_CONN_AG_CTX_CF23_MASK              0x3
+#define XSTORM_ROCE_CONN_AG_CTX_CF23_SHIFT             6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 word1;
@@ -7512,89 +7512,89 @@ struct e4_xstorm_roce_conn_ag_ctx {
 	__le32 reg6;
 };
 
-struct e4_tstorm_roce_conn_ag_ctx {
+struct tstorm_roce_conn_ag_ctx {
 	u8 reserved0;
 	u8 byte1;
 	u8 flags0;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT1_MASK                  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT                 1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT2_MASK                  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT                 2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT3_MASK                  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT3_SHIFT                 3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT4_MASK                  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT                 4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT5_MASK                  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT                 5
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT                  6
+#define TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_MASK          0x1
+#define TSTORM_ROCE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT         0
+#define TSTORM_ROCE_CONN_AG_CTX_BIT1_MASK                  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT                 1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT2_MASK                  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT2_SHIFT                 2
+#define TSTORM_ROCE_CONN_AG_CTX_BIT3_MASK                  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT3_SHIFT                 3
+#define TSTORM_ROCE_CONN_AG_CTX_BIT4_MASK                  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT4_SHIFT                 4
+#define TSTORM_ROCE_CONN_AG_CTX_BIT5_MASK                  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_BIT5_SHIFT                 5
+#define TSTORM_ROCE_CONN_AG_CTX_CF0_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT                  6
 	u8 flags1;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT                  2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
+#define TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK       0x3
+#define TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT      0
+#define TSTORM_ROCE_CONN_AG_CTX_CF2_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT                  2
+#define TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK     0x3
+#define TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT    4
+#define TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_MASK           0x3
+#define TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT          6
 	u8 flags2;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT                  0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT                  2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7_SHIFT                  4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT                  6
+#define TSTORM_ROCE_CONN_AG_CTX_CF5_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF5_SHIFT                  0
+#define TSTORM_ROCE_CONN_AG_CTX_CF6_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF6_SHIFT                  2
+#define TSTORM_ROCE_CONN_AG_CTX_CF7_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF7_SHIFT                  4
+#define TSTORM_ROCE_CONN_AG_CTX_CF8_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF8_SHIFT                  6
 	u8 flags3;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9_MASK                   0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT                  0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10_MASK                  0x3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT                 2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT                4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   5
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT                6
-#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
+#define TSTORM_ROCE_CONN_AG_CTX_CF9_MASK                   0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF9_SHIFT                  0
+#define TSTORM_ROCE_CONN_AG_CTX_CF10_MASK                  0x3
+#define TSTORM_ROCE_CONN_AG_CTX_CF10_SHIFT                 2
+#define TSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT                4
+#define TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK    0x1
+#define TSTORM_ROCE_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT   5
+#define TSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT                6
+#define TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK  0x1
+#define TSTORM_ROCE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT 7
 	u8 flags4;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT                1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT                2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF7EN_SHIFT                3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT                4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK                 0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT                5
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK                0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT               6
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT              7
+#define TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK        0x1
+#define TSTORM_ROCE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT       0
+#define TSTORM_ROCE_CONN_AG_CTX_CF5EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF5EN_SHIFT                1
+#define TSTORM_ROCE_CONN_AG_CTX_CF6EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF6EN_SHIFT                2
+#define TSTORM_ROCE_CONN_AG_CTX_CF7EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF7EN_SHIFT                3
+#define TSTORM_ROCE_CONN_AG_CTX_CF8EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF8EN_SHIFT                4
+#define TSTORM_ROCE_CONN_AG_CTX_CF9EN_MASK                 0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF9EN_SHIFT                5
+#define TSTORM_ROCE_CONN_AG_CTX_CF10EN_MASK                0x1
+#define TSTORM_ROCE_CONN_AG_CTX_CF10EN_SHIFT               6
+#define TSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT              7
 	u8 flags5;
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT              0
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT              1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT              2
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT              3
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT              4
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT              5
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT              6
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE8EN_MASK               0x1
-#define E4_TSTORM_ROCE_CONN_AG_CTX_RULE8EN_SHIFT              7
+#define TSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT              0
+#define TSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT              1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT              2
+#define TSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT              3
+#define TSTORM_ROCE_CONN_AG_CTX_RULE5EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE5EN_SHIFT              4
+#define TSTORM_ROCE_CONN_AG_CTX_RULE6EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE6EN_SHIFT              5
+#define TSTORM_ROCE_CONN_AG_CTX_RULE7EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE7EN_SHIFT              6
+#define TSTORM_ROCE_CONN_AG_CTX_RULE8EN_MASK               0x1
+#define TSTORM_ROCE_CONN_AG_CTX_RULE8EN_SHIFT              7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 reg2;
@@ -7647,15 +7647,15 @@ struct ustorm_roce_conn_st_ctx {
 };
 
 /* roce connection context */
-struct e4_roce_conn_context {
+struct roce_conn_context {
 	struct ystorm_roce_conn_st_ctx ystorm_st_context;
 	struct regpair ystorm_st_padding[2];
 	struct pstorm_roce_conn_st_ctx pstorm_st_context;
 	struct xstorm_roce_conn_st_ctx xstorm_st_context;
-	struct e4_xstorm_roce_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_roce_conn_ag_ctx tstorm_ag_context;
+	struct xstorm_roce_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_roce_conn_ag_ctx tstorm_ag_context;
 	struct timers_context timer_context;
-	struct e4_ustorm_rdma_conn_ag_ctx ustorm_ag_context;
+	struct ustorm_rdma_conn_ag_ctx ustorm_ag_context;
 	struct tstorm_roce_conn_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2];
 	struct mstorm_roce_conn_st_ctx mstorm_st_context;
@@ -8037,7 +8037,7 @@ struct roce_update_func_params {
 	__le32 cnp_send_timeout;
 };
 
-struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
+struct xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
@@ -8264,200 +8264,200 @@ struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 	__le32 reg4;
 };
 
-struct e4_mstorm_roce_conn_ag_ctx {
+struct mstorm_roce_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
-#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
+#define MSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
+#define MSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
+#define MSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
+#define MSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
+#define MSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
+#define MSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
+#define MSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
+#define MSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
+#define MSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
+#define MSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
 	u8 flags1;
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
-#define E4_MSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
+#define MSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
+#define MSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define MSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
+#define MSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define MSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
+#define MSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define MSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define MSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define MSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define MSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define MSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define MSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define MSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define MSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define MSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define MSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_mstorm_roce_req_conn_ag_ctx {
+struct mstorm_roce_req_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_mstorm_roce_resp_conn_ag_ctx {
+struct mstorm_roce_resp_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_tstorm_roce_req_conn_ag_ctx {
+struct tstorm_roce_req_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURRED_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURRED_SHIFT		1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURRED_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURRED_SHIFT	2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_MASK			0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_SHIFT			3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_MASK			0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_SHIFT			5
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_MASK			0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_SHIFT			6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURRED_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_OCCURRED_SHIFT		1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURRED_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_CQE_ERROR_OCCURRED_SHIFT	2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_MASK			0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_BIT3_SHIFT			3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_SHIFT		4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_MASK			0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_CACHED_ORQ_SHIFT			5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_MASK			0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_SHIFT			6
 	u8 flags1;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK             0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT            0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_MASK			0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_SHIFT		2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK		0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK			0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK             0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT            0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_MASK			0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_SHIFT		2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK		0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT		4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK			0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		6
 	u8 flags2;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_MASK               0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_SHIFT              0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_SHIFT	2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_SHIFT	4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_SHIFT	6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_MASK               0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_SHIFT              0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_MASK	0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_SHIFT	2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_MASK	0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_SHIFT	4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_MASK	0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_SHIFT	0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_MASK	0x3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_SHIFT	2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_MASK			0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK          0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT         5
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_SHIFT		6
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT	7
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_MASK	0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_SHIFT	0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_MASK	0x3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_SHIFT	2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_MASK			0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_CF_EN_SHIFT		4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK          0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT         5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_SQ_CF_EN_SHIFT		6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_MASK            0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_SHIFT           1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_SHIFT		2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_SHIFT	3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_SHIFT	5
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_SHIFT	6
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT			7
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_MASK            0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_FORCE_COMP_CF_EN_SHIFT           1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SET_TIMER_CF_EN_SHIFT		2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_TX_ASYNC_ERROR_CF_EN_SHIFT	3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RXMIT_DONE_CF_EN_SHIFT		4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_SCAN_COMPLETED_CF_EN_SHIFT	5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SQ_DRAIN_COMPLETED_CF_EN_SHIFT	6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT			7
 	u8 flags5;
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT		0
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_SHIFT		1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT		2
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT		3
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT		4
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_MASK	0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_SHIFT	5
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT		6
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT		7
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT		0
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_DIF_CNT_EN_SHIFT		1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT		2
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT		3
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT		4
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_MASK	0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_SND_SQ_CONS_EN_SHIFT	5
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT		6
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT		7
 	__le32 dif_rxmit_cnt;
 	__le32 snd_nxt_psn;
 	__le32 snd_max_psn;
@@ -8479,89 +8479,89 @@ struct e4_tstorm_roce_req_conn_ag_ctx {
 	__le32 reg10;
 };
 
-struct e4_tstorm_roce_resp_conn_ag_ctx {
+struct tstorm_roce_resp_conn_ag_ctx {
 	u8 byte0;
 	u8 state;
 	u8 flags0;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_MASK	0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_SHIFT	1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_SHIFT			2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_SHIFT			3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_SHIFT			5
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK			0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT			6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_MASK	0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_NOTIFY_REQUESTER_SHIFT	1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT2_SHIFT			2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT3_SHIFT			3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_SHIFT		4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_BIT5_SHIFT			5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK			0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT			6
 	u8 flags1;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK            0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT           0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_MASK	0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_SHIFT	2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_MASK            0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_SHIFT           0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_MASK	0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_SHIFT	2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT		4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK                0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT               0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT		2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT		6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK                0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT               0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT		2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7_SHIFT		4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT		6
 	u8 flags3;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT		0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK		0x3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT		2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK         0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT        5
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_MASK	0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_SHIFT	6
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT		7
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT		0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK		0x3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT		2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT		4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_MASK         0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_MSTORM_FLUSH_CF_EN_SHIFT        5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_MASK	0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_TX_ERROR_CF_EN_SHIFT	6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT		7
 	u8 flags4;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK             0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT            1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT			2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_SHIFT			3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT			4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT			5
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT			6
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT			7
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK             0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT            1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT			2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF7EN_SHIFT			3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT			4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT			5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT			6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT			7
 	u8 flags5;
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT		0
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT		1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT		2
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT		3
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT		4
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_SHIFT	5
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT		6
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT		7
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT		0
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT		1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT		2
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT		3
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT		4
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RQ_RULE_EN_SHIFT	5
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT		6
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT		7
 	__le32 psn_and_rxmit_id_echo;
 	__le32 reg1;
 	__le32 reg2;
@@ -8583,63 +8583,63 @@ struct e4_tstorm_roce_resp_conn_ag_ctx {
 	__le32 reg10;
 };
 
-struct e4_ustorm_roce_req_conn_ag_ctx {
+struct ustorm_roce_req_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT	0
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF4_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF4_SHIFT	2
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF5_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF5_SHIFT	4
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF6_MASK		0x3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF6_SHIFT	6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6_MASK		0x3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT	3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_SHIFT	4
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_SHIFT	5
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_SHIFT	6
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT	3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF4EN_SHIFT	4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF5EN_SHIFT	5
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_CF6EN_SHIFT	6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_ROCE_REQ_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -8652,63 +8652,63 @@ struct e4_ustorm_roce_req_conn_ag_ctx {
 	__le16 word3;
 };
 
-struct e4_ustorm_roce_resp_conn_ag_ctx {
+struct ustorm_roce_resp_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT	0
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF4_SHIFT	2
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF5_SHIFT	4
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT	6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT	3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_SHIFT	4
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_SHIFT	5
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT	6
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT	3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF4EN_SHIFT	4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF5EN_SHIFT	5
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_CF6EN_SHIFT	6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_ROCE_RESP_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -8721,214 +8721,214 @@ struct e4_ustorm_roce_resp_conn_ag_ctx {
 	__le16 word3;
 };
 
-struct e4_xstorm_roce_req_conn_ag_ctx {
+struct xstorm_roce_req_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_SHIFT		4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_SHIFT		5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_SHIFT		6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_SHIFT		7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED1_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED2_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED3_SHIFT		4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED4_SHIFT		5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED5_SHIFT		6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED6_SHIFT		7
 	u8 flags1;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_SHIFT		4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_SHIFT		5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_SHIFT	6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_SHIFT	7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED7_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED8_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT10_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MSDM_FLUSH_SHIFT		4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MSEM_FLUSH_SHIFT		5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ERROR_STATE_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_YSTORM_FLUSH_SHIFT	7
 	u8 flags2;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_MASK        0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_SHIFT       0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_MASK     0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_SHIFT    2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_MASK        0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_SHIFT       0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_MASK     0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_SHIFT    2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_SHIFT		6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FMR_ENDED_CF_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15_SHIFT		6
 	u8 flags6;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19_SHIFT	6
 	u8 flags7;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	7
 	u8 flags8;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT	3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_MASK     0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_SHIFT    6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_MASK  0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_SHIFT 7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF3EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT	3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SND_RXMIT_CF_EN_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_MASK     0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DIF_ERROR_CF_EN_SHIFT    6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_MASK  0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SCAN_SQ_FOR_COMP_CF_EN_SHIFT 7
 	u8 flags9;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_SHIFT		3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_SHIFT		5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_SHIFT		6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_SHIFT		7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF10EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF11EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF12EN_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF13EN_SHIFT		3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_FME_ENDED_CF_EN_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF15EN_SHIFT		5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF16EN_SHIFT		6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF17EN_SHIFT		7
 	u8 flags10;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_SHIFT		3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_SHIFT		5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT		6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT		7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF18EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF19EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF20EN_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF21EN_SHIFT		3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23EN_SHIFT		5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT		6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT		7
 	u8 flags11;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT		2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT		3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT		4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_SHIFT	5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_SHIFT		7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT		2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_E2E_CREDIT_RULE_EN_SHIFT	5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_SHIFT		5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_SHIFT	6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_SHIFT	7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_SQ_PROD_EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_INV_FENCE_RULE_EN_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ORQ_FENCE_RULE_EN_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MAX_ORD_RULE_EN_SHIFT	7
 	u8 flags13;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_SHIFT		0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE18EN_SHIFT		0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RULE19EN_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_SHIFT	0
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_SHIFT		1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_MASK	0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_SHIFT	2
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_MASK		0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_SHIFT		4
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_MASK		0x3
-#define E4_XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_SHIFT		6
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_MIGRATION_FLAG_SHIFT	0
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_BIT17_SHIFT		1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_MASK	0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_DPM_PORT_NUM_SHIFT	2
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_MASK		0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_RESERVED_SHIFT		4
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_MASK	0x1
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_ROCE_EDPM_ENABLE_SHIFT	5
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_MASK		0x3
+#define XSTORM_ROCE_REQ_CONN_AG_CTX_CF23_SHIFT		6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 word1;
@@ -8950,216 +8950,216 @@ struct e4_xstorm_roce_req_conn_ag_ctx {
 	__le32 orq_cons;
 };
 
-struct e4_xstorm_roce_resp_conn_ag_ctx {
+struct xstorm_roce_resp_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_SHIFT		2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_SHIFT		4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_SHIFT		5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_SHIFT		6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_SHIFT		7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED1_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED2_SHIFT		2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED3_SHIFT		4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED4_SHIFT		5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED5_SHIFT		6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED6_SHIFT		7
 	u8 flags1;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_SHIFT		2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_SHIFT	5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_SHIFT	6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_SHIFT	7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED7_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RESERVED8_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT10_SHIFT		2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_MSDM_FLUSH_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_MSEM_FLUSH_SHIFT	5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_ERROR_STATE_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_YSTORM_FLUSH_SHIFT	7
 	u8 flags2;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_MASK		0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_MASK		0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19_SHIFT	6
 	u8 flags7;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	7
 	u8 flags8;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT		6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT		7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF3EN_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RXMIT_CF_EN_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RX_ERROR_CF_EN_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FORCE_ACK_CF_EN_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT	5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF8EN_SHIFT		6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF9EN_SHIFT		7
 	u8 flags9;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_SHIFT	1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_SHIFT	5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_SHIFT	6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_SHIFT	7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF10EN_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF11EN_SHIFT	1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF12EN_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF13EN_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF14EN_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF15EN_SHIFT	5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF16EN_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF17EN_SHIFT	7
 	u8 flags10;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_SHIFT		2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_SHIFT		3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_SHIFT		5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT		6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT		7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF18EN_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF19EN_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF20EN_SHIFT		2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF21EN_SHIFT		3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23EN_SHIFT		5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT		6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT		7
 	u8 flags11;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT		2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT		3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT		4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT		5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_SHIFT		7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT		2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE9EN_SHIFT		7
 	u8 flags12;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE11EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE11EN_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_SHIFT		4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_SHIFT		5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_SHIFT		6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_SHIFT		7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_IRQ_PROD_RULE_EN_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE14EN_SHIFT		4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE16EN_SHIFT		6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE17EN_SHIFT		7
 	u8 flags13;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_SHIFT		0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_MASK		0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_SHIFT		1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE18EN_SHIFT		0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_RULE19EN_SHIFT		1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED4_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED5_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED6_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED7_SHIFT	5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED8_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_A0_RESERVED9_SHIFT	7
 	u8 flags14;
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_SHIFT	0
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_SHIFT	1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_SHIFT	2
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_SHIFT	3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_SHIFT	4
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_MASK	0x1
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_SHIFT	5
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_MASK	0x3
-#define E4_XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_SHIFT	6
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT16_SHIFT	0
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT17_SHIFT	1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT18_SHIFT	2
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT19_SHIFT	3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT20_SHIFT	4
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_MASK	0x1
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_BIT21_SHIFT	5
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_MASK	0x3
+#define XSTORM_ROCE_RESP_CONN_AG_CTX_CF23_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 irq_prod_shadow;
@@ -9181,37 +9181,37 @@ struct e4_xstorm_roce_resp_conn_ag_ctx {
 	__le32 msn_and_syndrome;
 };
 
-struct e4_ystorm_roce_conn_ag_ctx {
+struct ystorm_roce_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
-#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
+#define YSTORM_ROCE_CONN_AG_CTX_BIT0_MASK     0x1
+#define YSTORM_ROCE_CONN_AG_CTX_BIT0_SHIFT    0
+#define YSTORM_ROCE_CONN_AG_CTX_BIT1_MASK     0x1
+#define YSTORM_ROCE_CONN_AG_CTX_BIT1_SHIFT    1
+#define YSTORM_ROCE_CONN_AG_CTX_CF0_MASK      0x3
+#define YSTORM_ROCE_CONN_AG_CTX_CF0_SHIFT     2
+#define YSTORM_ROCE_CONN_AG_CTX_CF1_MASK      0x3
+#define YSTORM_ROCE_CONN_AG_CTX_CF1_SHIFT     4
+#define YSTORM_ROCE_CONN_AG_CTX_CF2_MASK      0x3
+#define YSTORM_ROCE_CONN_AG_CTX_CF2_SHIFT     6
 	u8 flags1;
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
-#define E4_YSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
+#define YSTORM_ROCE_CONN_AG_CTX_CF0EN_MASK    0x1
+#define YSTORM_ROCE_CONN_AG_CTX_CF0EN_SHIFT   0
+#define YSTORM_ROCE_CONN_AG_CTX_CF1EN_MASK    0x1
+#define YSTORM_ROCE_CONN_AG_CTX_CF1EN_SHIFT   1
+#define YSTORM_ROCE_CONN_AG_CTX_CF2EN_MASK    0x1
+#define YSTORM_ROCE_CONN_AG_CTX_CF2EN_SHIFT   2
+#define YSTORM_ROCE_CONN_AG_CTX_RULE0EN_MASK  0x1
+#define YSTORM_ROCE_CONN_AG_CTX_RULE0EN_SHIFT 3
+#define YSTORM_ROCE_CONN_AG_CTX_RULE1EN_MASK  0x1
+#define YSTORM_ROCE_CONN_AG_CTX_RULE1EN_SHIFT 4
+#define YSTORM_ROCE_CONN_AG_CTX_RULE2EN_MASK  0x1
+#define YSTORM_ROCE_CONN_AG_CTX_RULE2EN_SHIFT 5
+#define YSTORM_ROCE_CONN_AG_CTX_RULE3EN_MASK  0x1
+#define YSTORM_ROCE_CONN_AG_CTX_RULE3EN_SHIFT 6
+#define YSTORM_ROCE_CONN_AG_CTX_RULE4EN_MASK  0x1
+#define YSTORM_ROCE_CONN_AG_CTX_RULE4EN_SHIFT 7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -9225,37 +9225,37 @@ struct e4_ystorm_roce_conn_ag_ctx {
 	__le32 reg3;
 };
 
-struct e4_ystorm_roce_req_conn_ag_ctx {
+struct ystorm_roce_req_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_MASK		0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_MASK		0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_MASK		0x3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF0EN_SHIFT	0
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF1EN_SHIFT	1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_CF2EN_SHIFT	2
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_ROCE_REQ_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -9269,37 +9269,37 @@ struct e4_ystorm_roce_req_conn_ag_ctx {
 	__le32 reg3;
 };
 
-struct e4_ystorm_roce_resp_conn_ag_ctx {
+struct ystorm_roce_resp_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF0EN_SHIFT	0
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF1EN_SHIFT	1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_CF2EN_SHIFT	2
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_ROCE_RESP_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -9336,216 +9336,216 @@ struct xstorm_iwarp_conn_st_ctx {
 	__le32 reserved[48];
 };
 
-struct e4_xstorm_iwarp_conn_ag_ctx {
+struct xstorm_iwarp_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_SHIFT	1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RESERVED2_SHIFT	5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT6_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT6_SHIFT		6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT7_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT7_SHIFT		7
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM1_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM2_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_BIT4_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT		4
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED2_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_BIT6_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT6_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_BIT7_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT7_SHIFT		7
 	u8 flags1;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT8_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT8_SHIFT				0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT9_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT9_SHIFT				1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT10_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT10_SHIFT				2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT11_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT11_SHIFT				3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT12_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT12_SHIFT				4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT13_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT13_SHIFT				5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT14_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT14_SHIFT				6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_SHIFT 7
+#define XSTORM_IWARP_CONN_AG_CTX_BIT8_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT8_SHIFT				0
+#define XSTORM_IWARP_CONN_AG_CTX_BIT9_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT9_SHIFT				1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT10_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT10_SHIFT				2
+#define XSTORM_IWARP_CONN_AG_CTX_BIT11_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT11_SHIFT				3
+#define XSTORM_IWARP_CONN_AG_CTX_BIT12_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT12_SHIFT				4
+#define XSTORM_IWARP_CONN_AG_CTX_BIT13_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT13_SHIFT				5
+#define XSTORM_IWARP_CONN_AG_CTX_BIT14_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT14_SHIFT				6
+#define XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_YSTORM_FLUSH_OR_REWIND_SND_MAX_SHIFT 7
 	u8 flags2;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF0_MASK			0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT			0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF1_MASK			0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT			2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT			4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_CF0_MASK			0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT			0
+#define XSTORM_IWARP_CONN_AG_CTX_CF1_MASK			0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT			2
+#define XSTORM_IWARP_CONN_AG_CTX_CF2_MASK			0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT			4
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF12_MASK		0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF12_SHIFT		0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF13_MASK		0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF13_SHIFT		2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF15_MASK		0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF15_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_CF12_MASK		0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF12_SHIFT		0
+#define XSTORM_IWARP_CONN_AG_CTX_CF13_MASK		0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF13_SHIFT		2
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF15_MASK		0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF15_SHIFT		6
 	u8 flags6;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_SHIFT 0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF17_MASK				0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF17_SHIFT				2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF18_MASK				0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF18_SHIFT				4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_MASK			0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_SHIFT			6
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_SHIFT 0
+#define XSTORM_IWARP_CONN_AG_CTX_CF17_MASK				0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF17_SHIFT				2
+#define XSTORM_IWARP_CONN_AG_CTX_CF18_MASK				0x3
+#define XSTORM_IWARP_CONN_AG_CTX_CF18_SHIFT				4
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_MASK			0x3
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_SHIFT			6
 	u8 flags7;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT		6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT		7
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT			2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT			3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT			4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT			5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT			6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF9EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF9EN_SHIFT			7
+#define XSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			0
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT			2
+#define XSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT			3
+#define XSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT			4
+#define XSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT			5
+#define XSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT			6
+#define XSTORM_IWARP_CONN_AG_CTX_CF9EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF9EN_SHIFT			7
 	u8 flags9;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF10EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF10EN_SHIFT			0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF11EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF11EN_SHIFT			1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF12EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF12EN_SHIFT			2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF13EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF13EN_SHIFT			3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT		4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF15EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF15EN_SHIFT			5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_MASK 0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_SHIFT 6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF17EN_MASK				0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF17EN_SHIFT			7
+#define XSTORM_IWARP_CONN_AG_CTX_CF10EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_IWARP_CONN_AG_CTX_CF11EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_IWARP_CONN_AG_CTX_CF12EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_IWARP_CONN_AG_CTX_CF13EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FLUSH_CF_EN_SHIFT		4
+#define XSTORM_IWARP_CONN_AG_CTX_CF15EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF15EN_SHIFT			5
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_MASK 0x1
+#define XSTORM_IWARP_CONN_AG_CTX_MPA_OR_ERROR_WAKEUP_TRIGGER_CF_EN_SHIFT 6
+#define XSTORM_IWARP_CONN_AG_CTX_CF17EN_MASK				0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF17EN_SHIFT			7
 	u8 flags10;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF18EN_MASK			0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_CF18EN_SHIFT		0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT		1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT		3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_MASK               0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_SHIFT              5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT		6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_SHIFT	7
+#define XSTORM_IWARP_CONN_AG_CTX_CF18EN_MASK			0x1
+#define XSTORM_IWARP_CONN_AG_CTX_CF18EN_SHIFT		0
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT		1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT		3
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
+#define XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_MASK               0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_EN_SHIFT              5
+#define XSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_SHIFT	7
 	u8 flags11;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RESERVED3_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE9EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE9EN_SHIFT	7
+#define XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RESERVED3_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_RULE9EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE9EN_SHIFT	7
 	u8 flags12;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE11EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE11EN_SHIFT		1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_SHIFT		2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_SHIFT		3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE15EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE15EN_SHIFT		5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE16EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE16EN_SHIFT		6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE17EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE17EN_SHIFT		7
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_NOT_EMPTY_RULE_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED2_SHIFT		2
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED3_SHIFT		3
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_SQ_FENCE_RULE_EN_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_IWARP_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE16EN_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE17EN_SHIFT		7
 	u8 flags13;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_SHIFT	0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_SHIFT	1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_SHIFT	2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE21EN_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_RULE21EN_SHIFT		3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_SHIFT	5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
+#define XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_IRQ_NOT_EMPTY_RULE_EN_SHIFT	0
+#define XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_HQ_NOT_FULL_RULE_EN_SHIFT	1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_RD_FENCE_RULE_EN_SHIFT	2
+#define XSTORM_IWARP_CONN_AG_CTX_RULE21EN_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_RULE21EN_SHIFT		3
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_ORQ_NOT_FULL_RULE_EN_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
 	u8 flags14;
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT16_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT16_SHIFT		0
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT17_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT17_SHIFT		1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT18_MASK		0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_BIT18_SHIFT		2
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_SHIFT	3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
-#define E4_XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_MASK	0x3
-#define E4_XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_SHIFT	6
+#define XSTORM_IWARP_CONN_AG_CTX_BIT16_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT16_SHIFT		0
+#define XSTORM_IWARP_CONN_AG_CTX_BIT17_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT17_SHIFT		1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT18_MASK		0x1
+#define XSTORM_IWARP_CONN_AG_CTX_BIT18_SHIFT		2
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED1_SHIFT	3
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED2_SHIFT	4
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_MASK	0x1
+#define XSTORM_IWARP_CONN_AG_CTX_E5_RESERVED3_SHIFT	5
+#define XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_MASK	0x3
+#define XSTORM_IWARP_CONN_AG_CTX_SEND_TERMINATE_CF_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 physical_q1;
@@ -9593,89 +9593,89 @@ struct e4_xstorm_iwarp_conn_ag_ctx {
 	__le32 reg17;
 };
 
-struct e4_tstorm_iwarp_conn_ag_ctx {
+struct tstorm_iwarp_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_MASK  0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_SHIFT 3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_MASK	0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_SHIFT	5
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT		6
+#define TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT		1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT2_SHIFT		2
+#define TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_MASK  0x1
+#define TSTORM_IWARP_CONN_AG_CTX_MSTORM_FLUSH_OR_TERMINATE_SENT_SHIFT 3
+#define TSTORM_IWARP_CONN_AG_CTX_BIT4_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_BIT4_SHIFT		4
+#define TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CACHED_ORQ_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_CF0_MASK		0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT		6
 	u8 flags1;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_MASK		0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_SHIFT		0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_MASK		0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_SHIFT	2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF4_MASK			0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT			6
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_MASK		0x3
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_SHIFT		0
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_MASK		0x3
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_CF4_MASK			0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF4_SHIFT			6
 	u8 flags2;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_IWARP_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_IWARP_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_IWARP_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPLETE_MASK 0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPLETE_SHIFT 0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_MASK	0x3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_SHIFT	2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT				4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_MASK			0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_SHIFT			5
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_SHIFT		6
-#define E4_TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT		7
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPLETE_MASK 0x3
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPLETE_SHIFT 0
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_MASK	0x3
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_SHIFT	2
+#define TSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT				4
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_MASK			0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RQ_POST_CF_EN_SHIFT			5
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_MPA_TIMEOUT_CF_EN_SHIFT		6
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT		7
 	u8 flags4;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT				0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT				1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT				2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT				3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK				0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT				4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPL_EN_MASK 0x1
-#define	E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPL_EN_SHIFT 5
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_MASK	0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_SHIFT	6
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT			7
+#define TSTORM_IWARP_CONN_AG_CTX_CF4EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF4EN_SHIFT				0
+#define TSTORM_IWARP_CONN_AG_CTX_CF5EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF5EN_SHIFT				1
+#define TSTORM_IWARP_CONN_AG_CTX_CF6EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT				2
+#define TSTORM_IWARP_CONN_AG_CTX_CF7EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF7EN_SHIFT				3
+#define TSTORM_IWARP_CONN_AG_CTX_CF8EN_MASK				0x1
+#define TSTORM_IWARP_CONN_AG_CTX_CF8EN_SHIFT				4
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPL_EN_MASK 0x1
+#define	TSTORM_IWARP_CONN_AG_CTX_FLUSH_Q0_AND_TCP_HANDSHAKE_COMPL_EN_SHIFT 5
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_FLUSH_OR_ERROR_DETECTED_EN_SHIFT	6
+#define TSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT			7
 	u8 flags5;
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT		0
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT		1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT		2
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT		3
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT		4
-#define E4_TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_MASK	0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_SHIFT	5
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT		6
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT		7
+#define TSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT		0
+#define TSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT		1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT		2
+#define TSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT		3
+#define TSTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT		4
+#define TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_MASK	0x1
+#define TSTORM_IWARP_CONN_AG_CTX_SND_SQ_CONS_RULE_SHIFT	5
+#define TSTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT		6
+#define TSTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT		7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 unaligned_nxt_seq;
@@ -9713,16 +9713,16 @@ struct ustorm_iwarp_conn_st_ctx {
 };
 
 /* iwarp connection context */
-struct e4_iwarp_conn_context {
+struct iwarp_conn_context {
 	struct ystorm_iwarp_conn_st_ctx ystorm_st_context;
 	struct regpair ystorm_st_padding[2];
 	struct pstorm_iwarp_conn_st_ctx pstorm_st_context;
 	struct regpair pstorm_st_padding[2];
 	struct xstorm_iwarp_conn_st_ctx xstorm_st_context;
-	struct e4_xstorm_iwarp_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_iwarp_conn_ag_ctx tstorm_ag_context;
+	struct xstorm_iwarp_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_iwarp_conn_ag_ctx tstorm_ag_context;
 	struct timers_context timer_context;
-	struct e4_ustorm_rdma_conn_ag_ctx ustorm_ag_context;
+	struct ustorm_rdma_conn_ag_ctx ustorm_ag_context;
 	struct tstorm_iwarp_conn_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2];
 	struct mstorm_iwarp_conn_st_ctx mstorm_st_context;
@@ -10013,100 +10013,100 @@ struct unaligned_opaque_data {
 	__le32 cid;
 };
 
-struct e4_mstorm_iwarp_conn_ag_ctx {
+struct mstorm_iwarp_conn_ag_ctx {
 	u8 reserved;
 	u8 state;
 	u8 flags0;
-#define E4_MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
-#define E4_MSTORM_IWARP_CONN_AG_CTX_BIT1_MASK			0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT			1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_MASK	0x3
-#define E4_MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_SHIFT	2
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF1_MASK			0x3
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT			4
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT			6
+#define MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define MSTORM_IWARP_CONN_AG_CTX_BIT1_MASK			0x1
+#define MSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT			1
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_MASK	0x3
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_SHIFT	2
+#define MSTORM_IWARP_CONN_AG_CTX_CF1_MASK			0x3
+#define MSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT			4
+#define MSTORM_IWARP_CONN_AG_CTX_CF2_MASK			0x3
+#define MSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT			6
 	u8 flags1;
-#define E4_MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_MASK	0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_SHIFT	0
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT			1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			2
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT		3
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT		4
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT		5
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_SHIFT		6
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_MSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT		7
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_MASK	0x1
+#define MSTORM_IWARP_CONN_AG_CTX_INV_STAG_DONE_CF_EN_SHIFT	0
+#define MSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK			0x1
+#define MSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT			1
+#define MSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
+#define MSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			2
+#define MSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT		3
+#define MSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT		4
+#define MSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT		5
+#define MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RCQ_CONS_EN_SHIFT		6
+#define MSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define MSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT		7
 	__le16 rcq_cons;
 	__le16 rcq_cons_th;
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_ustorm_iwarp_conn_ag_ctx {
+struct ustorm_iwarp_conn_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_USTORM_IWARP_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF0_SHIFT		2
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF1_MASK		0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF1_SHIFT		4
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF2_MASK		0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF2_SHIFT		6
+#define USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define USTORM_IWARP_CONN_AG_CTX_BIT1_MASK		0x1
+#define USTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT		1
+#define USTORM_IWARP_CONN_AG_CTX_CF0_MASK		0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF0_SHIFT		2
+#define USTORM_IWARP_CONN_AG_CTX_CF1_MASK		0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF1_SHIFT		4
+#define USTORM_IWARP_CONN_AG_CTX_CF2_MASK		0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF2_SHIFT		6
 	u8 flags1;
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF3_MASK		0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF3_SHIFT		0
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_MASK	0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT	2
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_MASK	0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_SHIFT	4
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF6_MASK		0x3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF6_SHIFT		6
+#define USTORM_IWARP_CONN_AG_CTX_CF3_MASK		0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF3_SHIFT		0
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_MASK	0x3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_CF6_MASK		0x3
+#define USTORM_IWARP_CONN_AG_CTX_CF6_SHIFT		6
 	u8 flags2;
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF0EN_MASK			0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT			0
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT			1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			2
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF3EN_SHIFT			3
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT	4
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_MASK		0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT		5
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT			6
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_MASK		0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_SHIFT		7
+#define USTORM_IWARP_CONN_AG_CTX_CF0EN_MASK			0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT			0
+#define USTORM_IWARP_CONN_AG_CTX_CF1EN_MASK			0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT			1
+#define USTORM_IWARP_CONN_AG_CTX_CF2EN_MASK			0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT			2
+#define USTORM_IWARP_CONN_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_SE_CF_EN_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_MASK		0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_ARM_CF_EN_SHIFT		5
+#define USTORM_IWARP_CONN_AG_CTX_CF6EN_MASK			0x1
+#define USTORM_IWARP_CONN_AG_CTX_CF6EN_SHIFT			6
+#define USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_MASK		0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_SE_EN_SHIFT		7
 	u8 flags3;
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_EN_MASK		0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_CQ_EN_SHIFT		0
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_USTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_IWARP_CONN_AG_CTX_CQ_EN_MASK		0x1
+#define USTORM_IWARP_CONN_AG_CTX_CQ_EN_SHIFT		0
+#define USTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_IWARP_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_IWARP_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_IWARP_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_IWARP_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_IWARP_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -10119,37 +10119,37 @@ struct e4_ustorm_iwarp_conn_ag_ctx {
 	__le16 word3;
 };
 
-struct e4_ystorm_iwarp_conn_ag_ctx {
+struct ystorm_iwarp_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_IWARP_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_IWARP_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_IWARP_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_IWARP_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_IWARP_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_IWARP_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_IWARP_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_YSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_IWARP_CONN_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF0EN_SHIFT		0
+#define YSTORM_IWARP_CONN_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF1EN_SHIFT		1
+#define YSTORM_IWARP_CONN_AG_CTX_CF2EN_MASK		0x1
+#define YSTORM_IWARP_CONN_AG_CTX_CF2EN_SHIFT		2
+#define YSTORM_IWARP_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_IWARP_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_IWARP_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_IWARP_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_IWARP_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_IWARP_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -10339,216 +10339,216 @@ struct xstorm_fcoe_conn_st_ctx {
 	struct fcoe_wqe cached_wqes[16];
 };
 
-struct e4_xstorm_fcoe_conn_ag_ctx {
+struct xstorm_fcoe_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED1_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED1_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED2_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED3_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED4_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED4_SHIFT	5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED5_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED5_SHIFT	6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED6_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED6_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED1_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED1_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED2_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED3_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED4_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED4_SHIFT	5
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED5_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED5_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED6_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED6_SHIFT	7
 	u8 flags1;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED7_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED7_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED8_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED8_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED9_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED9_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT13_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT13_SHIFT		5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT14_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT14_SHIFT		6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT15_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT15_SHIFT		7
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED7_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED7_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED8_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED8_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED9_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED9_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_FCOE_CONN_AG_CTX_BIT12_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT12_SHIFT		4
+#define XSTORM_FCOE_CONN_AG_CTX_BIT13_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT13_SHIFT		5
+#define XSTORM_FCOE_CONN_AG_CTX_BIT14_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT14_SHIFT		6
+#define XSTORM_FCOE_CONN_AG_CTX_BIT15_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT15_SHIFT		7
 	u8 flags2;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF3_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF3_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF3_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF12_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF12_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF13_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF13_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF14_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF14_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF15_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF15_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF12_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF12_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF13_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF13_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF14_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF14_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF15_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF15_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF16_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF16_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF17_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF17_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF18_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF18_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_DQ_CF_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_DQ_CF_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF16_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF16_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF17_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF17_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF18_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF18_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_DQ_CF_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_DQ_CF_SHIFT	6
 	u8 flags7;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED10_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED10_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		7
+#define XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED10_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED10_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		6
+#define XSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		7
 	u8 flags8;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF3EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF3EN_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF4EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF5EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF6EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF7EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF7EN_SHIFT	5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF8EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF8EN_SHIFT	6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF9EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF9EN_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF3EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF3EN_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_CF4EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF5EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_CF6EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF7EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF7EN_SHIFT	5
+#define XSTORM_FCOE_CONN_AG_CTX_CF8EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF8EN_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF9EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF9EN_SHIFT	7
 	u8 flags9;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF10EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF10EN_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF11EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF11EN_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF12EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF12EN_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF13EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF13EN_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF14EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF14EN_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF15EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF15EN_SHIFT	5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF16EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF16EN_SHIFT	6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF17EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF17EN_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_CF10EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF10EN_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_CF11EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF11EN_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_CF12EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF12EN_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_CF13EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF13EN_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_CF14EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF14EN_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF15EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF15EN_SHIFT	5
+#define XSTORM_FCOE_CONN_AG_CTX_CF16EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF16EN_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_CF17EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF17EN_SHIFT	7
 	u8 flags10;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF18EN_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF18EN_SHIFT		0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_DQ_CF_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_DQ_CF_EN_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED11_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED11_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF23EN_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF23EN_SHIFT		5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED12_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED12_SHIFT	6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED13_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED13_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_CF18EN_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF18EN_SHIFT		0
+#define XSTORM_FCOE_CONN_AG_CTX_DQ_CF_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_DQ_CF_EN_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED11_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED11_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_CF23EN_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_CF23EN_SHIFT		5
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED12_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED12_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED13_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED13_SHIFT	7
 	u8 flags11;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED14_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED14_SHIFT		0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED15_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED15_SHIFT		1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED16_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESERVED16_SHIFT		2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK			0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT		3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK			0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT		4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK			0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT		5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED1_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED1_SHIFT		6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_XFERQ_DECISION_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_XFERQ_DECISION_EN_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED14_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED14_SHIFT		0
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED15_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED15_SHIFT		1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED16_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESERVED16_SHIFT		2
+#define XSTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK			0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT		3
+#define XSTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK			0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT		4
+#define XSTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK			0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT		5
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED1_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED1_SHIFT		6
+#define XSTORM_FCOE_CONN_AG_CTX_XFERQ_DECISION_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_XFERQ_DECISION_EN_SHIFT	7
 	u8 flags12;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SQ_DECISION_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_SQ_DECISION_EN_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE11EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE11EN_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE14EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE14EN_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE15EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE15EN_SHIFT	5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE16EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE16EN_SHIFT	6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE17EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE17EN_SHIFT	7
+#define XSTORM_FCOE_CONN_AG_CTX_SQ_DECISION_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_SQ_DECISION_EN_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_RULE11EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE11EN_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED2_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED2_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED3_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED3_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_RULE14EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE14EN_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_RULE15EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE15EN_SHIFT	5
+#define XSTORM_FCOE_CONN_AG_CTX_RULE16EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE16EN_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_RULE17EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE17EN_SHIFT	7
 	u8 flags13;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESPQ_DECISION_EN_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RESPQ_DECISION_EN_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE19EN_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_RULE19EN_SHIFT		1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED4_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED4_SHIFT		2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED5_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED5_SHIFT		3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED7_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED7_SHIFT		5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
+#define XSTORM_FCOE_CONN_AG_CTX_RESPQ_DECISION_EN_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RESPQ_DECISION_EN_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_RULE19EN_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_RULE19EN_SHIFT		1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED4_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED4_SHIFT		2
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED5_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED5_SHIFT		3
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED7_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED7_SHIFT		5
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
+#define XSTORM_FCOE_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
 	u8 flags14;
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT16_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT16_SHIFT	0
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT17_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT17_SHIFT	1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT18_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT18_SHIFT	2
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT19_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT19_SHIFT	3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT20_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT20_SHIFT	4
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT21_MASK	0x1
-#define E4_XSTORM_FCOE_CONN_AG_CTX_BIT21_SHIFT	5
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF23_MASK	0x3
-#define E4_XSTORM_FCOE_CONN_AG_CTX_CF23_SHIFT	6
+#define XSTORM_FCOE_CONN_AG_CTX_BIT16_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT16_SHIFT	0
+#define XSTORM_FCOE_CONN_AG_CTX_BIT17_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT17_SHIFT	1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT18_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT18_SHIFT	2
+#define XSTORM_FCOE_CONN_AG_CTX_BIT19_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT19_SHIFT	3
+#define XSTORM_FCOE_CONN_AG_CTX_BIT20_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT20_SHIFT	4
+#define XSTORM_FCOE_CONN_AG_CTX_BIT21_MASK	0x1
+#define XSTORM_FCOE_CONN_AG_CTX_BIT21_SHIFT	5
+#define XSTORM_FCOE_CONN_AG_CTX_CF23_MASK	0x3
+#define XSTORM_FCOE_CONN_AG_CTX_CF23_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 word1;
@@ -10586,150 +10586,150 @@ struct ustorm_fcoe_conn_st_ctx {
 	u8 reserved[2];
 };
 
-struct e4_tstorm_fcoe_conn_ag_ctx {
+struct tstorm_fcoe_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT3_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT3_SHIFT		3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT5_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_BIT5_SHIFT		5
-#define E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_SHIFT	6
+#define TSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define TSTORM_FCOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define TSTORM_FCOE_CONN_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT		1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT2_SHIFT		2
+#define TSTORM_FCOE_CONN_AG_CTX_BIT3_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT3_SHIFT		3
+#define TSTORM_FCOE_CONN_AG_CTX_BIT4_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT4_SHIFT		4
+#define TSTORM_FCOE_CONN_AG_CTX_BIT5_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_BIT5_SHIFT		5
+#define TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_SHIFT	6
 	u8 flags1;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT			2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT	4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF4_MASK			0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF4_SHIFT			6
+#define TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_MASK		0x3
+#define TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_SHIFT		0
+#define TSTORM_FCOE_CONN_AG_CTX_CF2_MASK			0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT			2
+#define TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_SHIFT	4
+#define TSTORM_FCOE_CONN_AG_CTX_CF4_MASK			0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF4_SHIFT			6
 	u8 flags2;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF7_SHIFT	4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_FCOE_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_FCOE_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF9_MASK			0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF9_SHIFT			0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF10_MASK			0x3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF10_SHIFT			2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN_SHIFT	4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		5
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT			6
-#define E4_TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT	7
+#define TSTORM_FCOE_CONN_AG_CTX_CF9_MASK			0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF9_SHIFT			0
+#define TSTORM_FCOE_CONN_AG_CTX_CF10_MASK			0x3
+#define TSTORM_FCOE_CONN_AG_CTX_CF10_SHIFT			2
+#define TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN_MASK	0x1
+#define TSTORM_FCOE_CONN_AG_CTX_DUMMY_TIMER_CF_EN_SHIFT	4
+#define TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_FLUSH_Q0_CF_EN_SHIFT		5
+#define TSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK			0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT			6
+#define TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_MASK	0x1
+#define TSTORM_FCOE_CONN_AG_CTX_TIMER_STOP_ALL_CF_EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT		0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT		1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT		2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF7EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF7EN_SHIFT		3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF8EN_SHIFT		4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF9EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF9EN_SHIFT		5
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF10EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_CF10EN_SHIFT		6
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define TSTORM_FCOE_CONN_AG_CTX_CF4EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT		0
+#define TSTORM_FCOE_CONN_AG_CTX_CF5EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT		1
+#define TSTORM_FCOE_CONN_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT		2
+#define TSTORM_FCOE_CONN_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF7EN_SHIFT		3
+#define TSTORM_FCOE_CONN_AG_CTX_CF8EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF8EN_SHIFT		4
+#define TSTORM_FCOE_CONN_AG_CTX_CF9EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF9EN_SHIFT		5
+#define TSTORM_FCOE_CONN_AG_CTX_CF10EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_CF10EN_SHIFT		6
+#define TSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_TSTORM_FCOE_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define TSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define TSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define TSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define TSTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define TSTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define TSTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define TSTORM_FCOE_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define TSTORM_FCOE_CONN_AG_CTX_RULE8EN_SHIFT	7
 	__le32 reg0;
 	__le32 reg1;
 };
 
-struct e4_ustorm_fcoe_conn_ag_ctx {
+struct ustorm_fcoe_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_USTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
+#define USTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF3_SHIFT	0
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF4_SHIFT	2
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	4
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	6
+#define USTORM_FCOE_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_FCOE_CONN_AG_CTX_CF4_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_FCOE_CONN_AG_CTX_CF5_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_FCOE_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_FCOE_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF3EN_SHIFT		3
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT		4
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT		5
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT		6
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define USTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define USTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define USTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define USTORM_FCOE_CONN_AG_CTX_CF3EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF3EN_SHIFT		3
+#define USTORM_FCOE_CONN_AG_CTX_CF4EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF4EN_SHIFT		4
+#define USTORM_FCOE_CONN_AG_CTX_CF5EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF5EN_SHIFT		5
+#define USTORM_FCOE_CONN_AG_CTX_CF6EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_CF6EN_SHIFT		6
+#define USTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE8EN_MASK		0x1
-#define E4_USTORM_FCOE_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_FCOE_CONN_AG_CTX_RULE5EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_FCOE_CONN_AG_CTX_RULE6EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_FCOE_CONN_AG_CTX_RULE7EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_FCOE_CONN_AG_CTX_RULE8EN_MASK		0x1
+#define USTORM_FCOE_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -10770,37 +10770,37 @@ struct tstorm_fcoe_conn_st_ctx {
 	u8 reserved0[4];
 };
 
-struct e4_mstorm_fcoe_conn_ag_ctx {
+struct mstorm_fcoe_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_MSTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
+#define MSTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_MSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define MSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define MSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define MSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define MSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
@@ -10846,21 +10846,21 @@ struct mstorm_fcoe_conn_st_ctx {
 };
 
 /* fcoe connection context */
-struct e4_fcoe_conn_context {
+struct fcoe_conn_context {
 	struct ystorm_fcoe_conn_st_ctx ystorm_st_context;
 	struct pstorm_fcoe_conn_st_ctx pstorm_st_context;
 	struct regpair pstorm_st_padding[2];
 	struct xstorm_fcoe_conn_st_ctx xstorm_st_context;
-	struct e4_xstorm_fcoe_conn_ag_ctx xstorm_ag_context;
+	struct xstorm_fcoe_conn_ag_ctx xstorm_ag_context;
 	struct regpair xstorm_ag_padding[6];
 	struct ustorm_fcoe_conn_st_ctx ustorm_st_context;
 	struct regpair ustorm_st_padding[2];
-	struct e4_tstorm_fcoe_conn_ag_ctx tstorm_ag_context;
+	struct tstorm_fcoe_conn_ag_ctx tstorm_ag_context;
 	struct regpair tstorm_ag_padding[2];
 	struct timers_context timer_context;
-	struct e4_ustorm_fcoe_conn_ag_ctx ustorm_ag_context;
+	struct ustorm_fcoe_conn_ag_ctx ustorm_ag_context;
 	struct tstorm_fcoe_conn_st_ctx tstorm_st_context;
-	struct e4_mstorm_fcoe_conn_ag_ctx mstorm_ag_context;
+	struct mstorm_fcoe_conn_ag_ctx mstorm_ag_context;
 	struct mstorm_fcoe_conn_st_ctx mstorm_st_context;
 };
 
@@ -10911,37 +10911,37 @@ struct fcoe_stat_ramrod_params {
 	struct fcoe_stat_ramrod_data stat_ramrod_data;
 };
 
-struct e4_ystorm_fcoe_conn_ag_ctx {
+struct ystorm_fcoe_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_FCOE_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_FCOE_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_FCOE_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_FCOE_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_FCOE_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_FCOE_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_FCOE_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_FCOE_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_FCOE_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_FCOE_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
-#define E4_YSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_FCOE_CONN_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_CF0EN_SHIFT		0
+#define YSTORM_FCOE_CONN_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_CF1EN_SHIFT		1
+#define YSTORM_FCOE_CONN_AG_CTX_CF2EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_CF2EN_SHIFT		2
+#define YSTORM_FCOE_CONN_AG_CTX_RULE0EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_FCOE_CONN_AG_CTX_RULE1EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_FCOE_CONN_AG_CTX_RULE2EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_FCOE_CONN_AG_CTX_RULE3EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_FCOE_CONN_AG_CTX_RULE4EN_MASK		0x1
+#define YSTORM_FCOE_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -10972,216 +10972,216 @@ struct xstorm_iscsi_tcp_conn_st_ctx {
 	__le32 reserved_iscsi[44];
 };
 
-struct e4_xstorm_iscsi_conn_ag_ctx {
+struct xstorm_iscsi_conn_ag_ctx {
 	u8 cdu_validation;
 	u8 state;
 	u8 flags0;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_SHIFT	1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_SHIFT	5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT6_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT6_SHIFT		6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT7_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT7_SHIFT		7
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM1_SHIFT	1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED1_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM3_SHIFT	3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED2_SHIFT	5
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT6_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT6_SHIFT		6
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT7_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT7_SHIFT		7
 	u8 flags1;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT8_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT8_SHIFT		0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT9_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT9_SHIFT		1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT10_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT10_SHIFT		2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT11_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT11_SHIFT		3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT12_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT12_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT13_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT13_SHIFT		5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT14_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT14_SHIFT		6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_SHIFT	7
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT8_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT8_SHIFT		0
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT9_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT9_SHIFT		1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT10_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT10_SHIFT		2
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT11_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT11_SHIFT		3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT12_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT12_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT13_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT13_SHIFT		5
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT14_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT14_SHIFT		6
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_TRUNCATE_SHIFT	7
 	u8 flags2;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF0_MASK			0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT			0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF1_MASK			0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT			2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF2_MASK			0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT			4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0_MASK			0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT			0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1_MASK			0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT			2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2_MASK			0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT			4
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	6
 	u8 flags3;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT	6
 	u8 flags4;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF9_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF9_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF10_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT	4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF11_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF11_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10_SHIFT	4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11_SHIFT	6
 	u8 flags5;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF12_MASK				0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF12_SHIFT				0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF13_MASK				0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF13_SHIFT				2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF14_MASK				0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF14_SHIFT				4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12_MASK				0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12_SHIFT				0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13_MASK				0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13_SHIFT				2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14_MASK				0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14_SHIFT				4
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_SHIFT	6
 	u8 flags6;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF16_MASK		0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF16_SHIFT		0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF17_MASK		0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF17_SHIFT		2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF18_MASK		0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF18_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16_MASK		0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16_SHIFT		0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17_MASK		0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17_SHIFT		2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18_MASK		0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_SHIFT	6
 	u8 flags7;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_MASK		0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT			6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT			7
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_MASK		0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT			6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT			7
 	u8 flags8;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT			0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT			2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT			3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT			4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT			5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT			6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF9EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF9EN_SHIFT			7
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT			0
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT			2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT			3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT			4
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT			5
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT			6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF9EN_SHIFT			7
 	u8 flags9;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF10EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF10EN_SHIFT			0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF11EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF11EN_SHIFT			1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF12EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF12EN_SHIFT			2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF13EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF13EN_SHIFT			3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF14EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF14EN_SHIFT			4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_SHIFT	5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF16EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF16EN_SHIFT			6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF17EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF17EN_SHIFT			7
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF14EN_SHIFT			4
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_UPDATE_STATE_TO_BASE_CF_EN_SHIFT	5
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF16EN_SHIFT			6
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF17EN_SHIFT			7
 	u8 flags10;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF18EN_MASK				0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_CF18EN_SHIFT			0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT			1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_SHIFT	3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_SHIFT			4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_SHIFT		5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT			6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_SHIFT	7
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18EN_MASK				0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_CF18EN_SHIFT			0
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT			1
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_MST_XCM_Q0_FLUSH_CF_EN_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_UST_XCM_Q1_FLUSH_CF_EN_SHIFT	3
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_SLOW_PATH_EN_SHIFT			4
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_EN_SHIFT		5
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT			6
+#define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_MORE_TO_SEND_DEC_RULE_EN_SHIFT	7
 	u8 flags11;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_SHIFT	2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_SHIFT	7
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RESERVED3_SHIFT	2
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	3
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	4
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	5
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED1_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE9EN_SHIFT	7
 	u8 flags12;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_SHIFT		1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_SHIFT		2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_SHIFT		3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_SHIFT		5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_SHIFT		6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_SHIFT		7
+#define XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_SQ_DEC_RULE_EN_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE11EN_SHIFT		1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED2_SHIFT		2
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED3_SHIFT		3
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE14EN_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE15EN_SHIFT		5
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE16EN_SHIFT		6
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_RULE17EN_SHIFT		7
 	u8 flags13;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_SHIFT	0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_SHIFT	1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_SHIFT		2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_SHIFT		3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_SHIFT		5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
+#define XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_R2TQ_DEC_RULE_EN_SHIFT	0
+#define XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_HQ_DEC_RULE_EN_SHIFT	1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED4_SHIFT		2
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED5_SHIFT		3
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED7_SHIFT		5
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
 	u8 flags14;
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT16_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT16_SHIFT			0
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT17_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT17_SHIFT			1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT18_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT18_SHIFT			2
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT19_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT19_SHIFT			3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT20_MASK			0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_BIT20_SHIFT			4
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_MASK	0x1
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_SHIFT	5
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_MASK	0x3
-#define E4_XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_SHIFT	6
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT16_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT16_SHIFT			0
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT17_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT17_SHIFT			1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT18_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT18_SHIFT			2
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT19_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT19_SHIFT			3
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT20_MASK			0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_BIT20_SHIFT			4
+#define XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_MASK	0x1
+#define XSTORM_ISCSI_CONN_AG_CTX_DUMMY_READ_DONE_SHIFT	5
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_MASK	0x3
+#define XSTORM_ISCSI_CONN_AG_CTX_PROC_ONLY_CLEANUP_SHIFT	6
 	u8 byte2;
 	__le16 physical_q0;
 	__le16 physical_q1;
@@ -11229,89 +11229,89 @@ struct e4_xstorm_iscsi_conn_ag_ctx {
 	__le32 reg17;
 };
 
-struct e4_tstorm_iscsi_conn_ag_ctx {
+struct tstorm_iscsi_conn_ag_ctx {
 	u8 reserved0;
 	u8 state;
 	u8 flags0;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT		1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT2_SHIFT		2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT3_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT3_SHIFT		3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT		4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT5_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_BIT5_SHIFT		5
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0_MASK		0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT		6
+#define TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_EXIST_IN_QM0_SHIFT	0
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT		1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT2_SHIFT		2
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT3_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT3_SHIFT		3
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT4_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT4_SHIFT		4
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT5_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_BIT5_SHIFT		5
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0_MASK		0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT		6
 	u8 flags1;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_MASK		0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_SHIFT		0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_MASK		0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_SHIFT		2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF4_MASK			0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT			6
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_MASK		0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_SHIFT		0
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_MASK		0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_SHIFT		2
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT	4
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4_MASK			0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT			6
 	u8 flags2;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT	4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF8_MASK	0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT	6
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7_MASK	0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7_SHIFT	4
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8_MASK	0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT		0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_MASK	0x3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_SHIFT	2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK			0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT			4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_SHIFT	5
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_SHIFT	6
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	7
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_MASK		0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_SHIFT		0
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_MASK	0x3
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_SHIFT	2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK			0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT			4
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_P2T_FLUSH_CF_EN_SHIFT	5
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_M2T_FLUSH_CF_EN_SHIFT	6
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT		0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT		1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT		2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT		3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK		0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT		4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	5
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_SHIFT	6
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT		0
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT		1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT		2
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF7EN_SHIFT		3
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8EN_MASK		0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_CF8EN_SHIFT		4
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT	5
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_FLUSH_OOO_ISLES_CF_EN_SHIFT	6
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags5;
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define TSTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT	7
 	__le32 reg0;
 	__le32 reg1;
 	__le32 rx_tcp_checksum_err_cnt;
@@ -11326,63 +11326,63 @@ struct e4_tstorm_iscsi_conn_ag_ctx {
 	__le16 word0;
 };
 
-struct e4_ustorm_iscsi_conn_ag_ctx {
+struct ustorm_iscsi_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_USTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_USTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
+#define USTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
+#define USTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
+#define USTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
+#define USTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
+#define USTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF3_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF3_SHIFT	0
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF4_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT	2
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	4
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	6
+#define USTORM_ISCSI_CONN_AG_CTX_CF3_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF3_SHIFT	0
+#define USTORM_ISCSI_CONN_AG_CTX_CF4_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF4_SHIFT	2
+#define USTORM_ISCSI_CONN_AG_CTX_CF5_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF5_SHIFT	4
+#define USTORM_ISCSI_CONN_AG_CTX_CF6_MASK	0x3
+#define USTORM_ISCSI_CONN_AG_CTX_CF6_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF3EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF3EN_SHIFT		3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT		4
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT		5
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK		0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT		6
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	7
+#define USTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
+#define USTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
+#define USTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
+#define USTORM_ISCSI_CONN_AG_CTX_CF3EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF3EN_SHIFT		3
+#define USTORM_ISCSI_CONN_AG_CTX_CF4EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF4EN_SHIFT		4
+#define USTORM_ISCSI_CONN_AG_CTX_CF5EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF5EN_SHIFT		5
+#define USTORM_ISCSI_CONN_AG_CTX_CF6EN_MASK		0x1
+#define USTORM_ISCSI_CONN_AG_CTX_CF6EN_SHIFT		6
+#define USTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	0
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	2
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	3
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	4
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	5
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	6
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK	0x1
-#define E4_USTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT	7
+#define USTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	0
+#define USTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	2
+#define USTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	3
+#define USTORM_ISCSI_CONN_AG_CTX_RULE5EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE5EN_SHIFT	4
+#define USTORM_ISCSI_CONN_AG_CTX_RULE6EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE6EN_SHIFT	5
+#define USTORM_ISCSI_CONN_AG_CTX_RULE7EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE7EN_SHIFT	6
+#define USTORM_ISCSI_CONN_AG_CTX_RULE8EN_MASK	0x1
+#define USTORM_ISCSI_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
@@ -11400,37 +11400,37 @@ struct tstorm_iscsi_conn_st_ctx {
 	__le32 reserved[44];
 };
 
-struct e4_mstorm_iscsi_conn_ag_ctx {
+struct mstorm_iscsi_conn_ag_ctx {
 	u8 reserved;
 	u8 state;
 	u8 flags0;
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	7
 	__le16 word0;
 	__le16 word1;
 	__le32 reg0;
@@ -11449,22 +11449,22 @@ struct ustorm_iscsi_conn_st_ctx {
 };
 
 /* iscsi connection context */
-struct e4_iscsi_conn_context {
+struct iscsi_conn_context {
 	struct ystorm_iscsi_conn_st_ctx ystorm_st_context;
 	struct pstorm_iscsi_tcp_conn_st_ctx pstorm_st_context;
 	struct regpair pstorm_st_padding[2];
 	struct pb_context xpb2_context;
 	struct xstorm_iscsi_tcp_conn_st_ctx xstorm_st_context;
 	struct regpair xstorm_st_padding[2];
-	struct e4_xstorm_iscsi_conn_ag_ctx xstorm_ag_context;
-	struct e4_tstorm_iscsi_conn_ag_ctx tstorm_ag_context;
+	struct xstorm_iscsi_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_iscsi_conn_ag_ctx tstorm_ag_context;
 	struct regpair tstorm_ag_padding[2];
 	struct timers_context timer_context;
-	struct e4_ustorm_iscsi_conn_ag_ctx ustorm_ag_context;
+	struct ustorm_iscsi_conn_ag_ctx ustorm_ag_context;
 	struct pb_context upb_context;
 	struct tstorm_iscsi_conn_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2];
-	struct e4_mstorm_iscsi_conn_ag_ctx mstorm_ag_context;
+	struct mstorm_iscsi_conn_ag_ctx mstorm_ag_context;
 	struct mstorm_iscsi_tcp_conn_st_ctx mstorm_st_context;
 	struct ustorm_iscsi_conn_st_ctx ustorm_st_context;
 };
@@ -11475,37 +11475,37 @@ struct iscsi_init_ramrod_params {
 	struct tcp_init_params tcp_init;
 };
 
-struct e4_ystorm_iscsi_conn_ag_ctx {
+struct ystorm_iscsi_conn_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	u8 flags0;
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT0_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT0_SHIFT	0
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT1_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_BIT1_SHIFT	1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0_MASK	0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0_SHIFT	2
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1_MASK	0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1_SHIFT	4
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2_MASK	0x3
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2_SHIFT	6
 	u8 flags1;
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	3
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	4
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	5
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	6
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
-#define E4_YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	7
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF0EN_SHIFT		0
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF1EN_SHIFT		1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2EN_MASK		0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_CF2EN_SHIFT		2
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_ISCSI_CONN_AG_CTX_RULE4EN_SHIFT	7
 	u8 byte2;
 	u8 byte3;
 	__le16 word0;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index ea888a2c6ddb..30c0b5502670 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -17,13 +17,13 @@
 
 #define CDU_VALIDATION_DEFAULT_CFG	61
 
-static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES_E4] = {
+static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES] = {
 	{400, 336, 352, 368, 304, 384, 416, 352},	/* region 3 offsets */
 	{528, 496, 416, 512, 448, 512, 544, 480},	/* region 4 offsets */
 	{608, 544, 496, 576, 576, 592, 624, 560}	/* region 5 offsets */
 };
 
-static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
+static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES] = {
 	{240, 240, 112, 0, 0, 0, 0, 96}	/* region 1 offsets */
 };
 
@@ -54,7 +54,7 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 #define QM_WFQ_VP_PQ_VOQ_SHIFT	0
 
 /* Bit  of PF in WFQ VP PQ map */
-#define QM_WFQ_VP_PQ_PF_E4_SHIFT	5
+#define QM_WFQ_VP_PQ_PF_SHIFT	5
 
 /* 0x9000 = 4*9*1024 */
 #define QM_WFQ_INC_VAL(weight)	((weight) * 0x9000)
@@ -156,20 +156,20 @@ static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES_E4] = {
 		  cmd ## _ ## field, \
 		  value)
 
-#define QM_INIT_TX_PQ_MAP(p_hwfn, map, chip, pq_id, vp_pq_id, rl_valid,	      \
+#define QM_INIT_TX_PQ_MAP(p_hwfn, map, pq_id, vp_pq_id, rl_valid,	      \
 			  rl_id, ext_voq, wrr)				      \
 	do {								      \
 		u32 __reg = 0;						      \
 									      \
 		BUILD_BUG_ON(sizeof((map).reg) != sizeof(__reg));	      \
 									      \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_PQ_VALID, 1);	      \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_RL_VALID,	      \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_PQ_VALID, 1);	      \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_RL_VALID,	      \
 			  !!(rl_valid));				      \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_VP_PQ_ID, (vp_pq_id)); \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_RL_ID, (rl_id));	      \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_VOQ, (ext_voq));	      \
-		SET_FIELD(__reg, QM_RF_PQ_MAP_##chip##_WRR_WEIGHT_GROUP,      \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_VP_PQ_ID, (vp_pq_id)); \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_RL_ID, (rl_id));	      \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_VOQ, (ext_voq));	      \
+		SET_FIELD(__reg, QM_RF_PQ_MAP_WRR_WEIGHT_GROUP,      \
 			  (wrr));					      \
 									      \
 		STORE_RT_REG((p_hwfn), QM_REG_TXPQMAP_RT_OFFSET + (pq_id),    \
@@ -204,7 +204,7 @@ static void qed_enable_pf_rl(struct qed_hwfn *p_hwfn, bool pf_rl_en)
 {
 	STORE_RT_REG(p_hwfn, QM_REG_RLPFENABLE_RT_OFFSET, pf_rl_en ? 1 : 0);
 	if (pf_rl_en) {
-		u8 num_ext_voqs = MAX_NUM_VOQS_E4;
+		u8 num_ext_voqs = MAX_NUM_VOQS;
 		u64 voq_bit_mask = ((u64)1 << num_ext_voqs) - 1;
 
 		/* Enable RLs for all VOQs */
@@ -298,7 +298,7 @@ static void qed_cmdq_lines_rt_init(
 	struct init_qm_port_params port_params[MAX_NUM_PORTS])
 {
 	u8 tc, ext_voq, port_id, num_tcs_in_port;
-	u8 num_ext_voqs = MAX_NUM_VOQS_E4;
+	u8 num_ext_voqs = MAX_NUM_VOQS;
 
 	/* Clear PBF lines of all VOQs */
 	for (ext_voq = 0; ext_voq < num_ext_voqs; ext_voq++)
@@ -487,7 +487,7 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 	/* Go over all Tx PQs */
 	for (i = 0, pq_id = p_params->start_pq; i < num_pqs; i++, pq_id++) {
 		u16 *p_first_tx_pq_id, vport_id_in_pf;
-		struct qm_rf_pq_map_e4 tx_pq_map;
+		struct qm_rf_pq_map tx_pq_map;
 		u8 tc_id = pq_params[i].tc_id;
 		bool is_vf_pq;
 		u8 ext_voq;
@@ -505,7 +505,7 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 		if (*p_first_tx_pq_id == QM_INVALID_PQ_ID) {
 			u32 map_val =
 				(ext_voq << QM_WFQ_VP_PQ_VOQ_SHIFT) |
-				(p_params->pf_id << QM_WFQ_VP_PQ_PF_E4_SHIFT);
+				(p_params->pf_id << QM_WFQ_VP_PQ_PF_SHIFT);
 
 			/* Create new VP PQ */
 			*p_first_tx_pq_id = pq_id;
@@ -520,7 +520,6 @@ static void qed_tx_pq_map_rt_init(struct qed_hwfn *p_hwfn,
 		/* Prepare PQ map entry */
 		QM_INIT_TX_PQ_MAP(p_hwfn,
 				  tx_pq_map,
-				  E4,
 				  pq_id,
 				  *p_first_tx_pq_id,
 				  pq_params[i].rl_valid,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index f78e6055f654..a97f691839e0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -36,7 +36,7 @@ struct qed_sb_sp_info {
 	struct qed_sb_info sb_info;
 
 	/* per protocol index data */
-	struct qed_pi_info pi_info_arr[PIS_PER_SB_E4];
+	struct qed_pi_info pi_info_arr[PIS_PER_SB];
 };
 
 enum qed_attention_type {
@@ -1507,7 +1507,7 @@ static void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn,
 	else
 		SET_FIELD(prod, CAU_PI_ENTRY_FSM_SEL, 1);
 
-	sb_offset = igu_sb_id * PIS_PER_SB_E4;
+	sb_offset = igu_sb_id * PIS_PER_SB;
 	pi_offset = sb_offset + pi_index;
 
 	if (p_hwfn->hw_init_done)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index eb8e0f4242d7..84c17e97f569 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -204,7 +204,7 @@ int qed_db_rec_handler(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 #define QED_SB_EVENT_MASK       0x0003
 
 #define SB_ALIGNED_SIZE(p_hwfn)	\
-	ALIGNED_TYPE_SIZE(struct status_block_e4, p_hwfn)
+	ALIGNED_TYPE_SIZE(struct status_block, p_hwfn)
 
 #define QED_SB_INVALID_IDX      0xffff
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index c46a7f756ed5..bf48a66704bd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1533,7 +1533,7 @@ static inline u8 qed_ll2_handle_to_queue_id(struct qed_hwfn *p_hwfn,
 
 int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
 {
-	struct e4_core_conn_context *p_cxt;
+	struct core_conn_context *p_cxt;
 	struct qed_ll2_tx_packet *p_pkt;
 	struct qed_ll2_info *p_ll2_conn;
 	struct qed_hwfn *p_hwfn = cxt;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 24cd41567775..2b39fa294d32 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3905,10 +3905,6 @@ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		   DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK |
 		   DRV_MB_PARAM_FEATURE_SUPPORT_PORT_FEC_CONTROL;
 
-	if (QED_IS_E5(p_hwfn->cdev))
-		features |=
-		    DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EXT_SPEED_FEC_CONTROL;
-
 	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
 			   features, &mcp_resp, &mcp_param);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index da1b7fdcbda7..fd338a8dbedc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -1531,7 +1531,7 @@
 	0x1940000UL
 #define SEM_FAST_REG_DBG_MODE23_SRC_DISABLE \
 	0x000748UL
-#define SEM_FAST_REG_DBG_MODE4_SRC_DISABLE \
+#define SEM_FAST_REG_DBG_MODSRC_DISABLE \
 	0x00074cUL
 #define SEM_FAST_REG_DBG_MODE6_SRC_DISABLE \
 	0x000750UL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 0bc1a0aeb56e..fa8385178538 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -189,7 +189,7 @@ static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
 static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 				  struct qed_spq *p_spq)
 {
-	struct e4_core_conn_context *p_cxt;
+	struct core_conn_context *p_cxt;
 	struct qed_cxt_info cxt_info;
 	u16 physical_q;
 	int rc;
@@ -207,11 +207,11 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 	p_cxt = cxt_info.p_cxt;
 
 	SET_FIELD(p_cxt->xstorm_ag_context.flags10,
-		  E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN, 1);
+		  XSTORM_CORE_CONN_AG_CTX_DQ_CF_EN, 1);
 	SET_FIELD(p_cxt->xstorm_ag_context.flags1,
-		  E4_XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE, 1);
+		  XSTORM_CORE_CONN_AG_CTX_DQ_CF_ACTIVE, 1);
 	SET_FIELD(p_cxt->xstorm_ag_context.flags9,
-		  E4_XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN, 1);
+		  XSTORM_CORE_CONN_AG_CTX_CONSOLID_PROD_CF_EN, 1);
 
 	/* QM physical queue */
 	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index ed2b6fe5a78d..08d92711c7a2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -1603,7 +1603,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	/* fill in pfdev info */
 	pfdev_info->chip_num = p_hwfn->cdev->chip_num;
 	pfdev_info->db_size = 0;
-	pfdev_info->indices_per_sb = PIS_PER_SB_E4;
+	pfdev_info->indices_per_sb = PIS_PER_SB;
 
 	pfdev_info->capabilities = PFVF_ACQUIRE_CAP_DEFAULT_UNTAGGED |
 				   PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE;
@@ -3581,11 +3581,11 @@ static int
 qed_iov_vf_flr_poll_pbf(struct qed_hwfn *p_hwfn,
 			struct qed_vf_info *p_vf, struct qed_ptt *p_ptt)
 {
-	u32 cons[MAX_NUM_VOQS_E4], distance[MAX_NUM_VOQS_E4];
+	u32 cons[MAX_NUM_VOQS], distance[MAX_NUM_VOQS];
 	int i, cnt;
 
 	/* Read initial consumers & producers */
-	for (i = 0; i < MAX_NUM_VOQS_E4; i++) {
+	for (i = 0; i < MAX_NUM_VOQS; i++) {
 		u32 prod;
 
 		cons[i] = qed_rd(p_hwfn, p_ptt,
@@ -3600,7 +3600,7 @@ qed_iov_vf_flr_poll_pbf(struct qed_hwfn *p_hwfn,
 	/* Wait for consumers to pass the producers */
 	i = 0;
 	for (cnt = 0; cnt < 50; cnt++) {
-		for (; i < MAX_NUM_VOQS_E4; i++) {
+		for (; i < MAX_NUM_VOQS; i++) {
 			u32 tmp;
 
 			tmp = qed_rd(p_hwfn, p_ptt,
@@ -3610,7 +3610,7 @@ qed_iov_vf_flr_poll_pbf(struct qed_hwfn *p_hwfn,
 				break;
 		}
 
-		if (i == MAX_NUM_VOQS_E4)
+		if (i == MAX_NUM_VOQS)
 			break;
 
 		msleep(20);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index be33bde0f731..06c6a5813606 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1395,7 +1395,7 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info,
 static int qede_alloc_mem_sb(struct qede_dev *edev,
 			     struct qed_sb_info *sb_info, u16 sb_id)
 {
-	struct status_block_e4 *sb_virt;
+	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int rc;
 
diff --git a/drivers/scsi/qedf/drv_fcoe_fw_funcs.c b/drivers/scsi/qedf/drv_fcoe_fw_funcs.c
index 747af96dd15c..e8bc8d9e4583 100644
--- a/drivers/scsi/qedf/drv_fcoe_fw_funcs.c
+++ b/drivers/scsi/qedf/drv_fcoe_fw_funcs.c
@@ -22,9 +22,9 @@ int init_initiator_rw_fcoe_task(struct fcoe_task_params *task_params,
 				u32 task_retry_id,
 				u8 fcp_cmd_payload[32])
 {
-	struct e4_fcoe_task_context *ctx = task_params->context;
+	struct fcoe_task_context *ctx = task_params->context;
 	const u8 val_byte = ctx->ystorm_ag_context.byte0;
-	struct e4_ustorm_fcoe_task_ag_ctx *u_ag_ctx;
+	struct ustorm_fcoe_task_ag_ctx *u_ag_ctx;
 	struct ystorm_fcoe_task_st_ctx *y_st_ctx;
 	struct tstorm_fcoe_task_st_ctx *t_st_ctx;
 	struct mstorm_fcoe_task_st_ctx *m_st_ctx;
@@ -115,9 +115,9 @@ int init_initiator_midpath_unsolicited_fcoe_task(
 	struct scsi_sgl_task_params *rx_sgl_task_params,
 	u8 fw_to_place_fc_header)
 {
-	struct e4_fcoe_task_context *ctx = task_params->context;
+	struct fcoe_task_context *ctx = task_params->context;
 	const u8 val_byte = ctx->ystorm_ag_context.byte0;
-	struct e4_ustorm_fcoe_task_ag_ctx *u_ag_ctx;
+	struct ustorm_fcoe_task_ag_ctx *u_ag_ctx;
 	struct ystorm_fcoe_task_st_ctx *y_st_ctx;
 	struct tstorm_fcoe_task_st_ctx *t_st_ctx;
 	struct mstorm_fcoe_task_st_ctx *m_st_ctx;
diff --git a/drivers/scsi/qedf/drv_fcoe_fw_funcs.h b/drivers/scsi/qedf/drv_fcoe_fw_funcs.h
index 1ee31a5f063b..7125e484bf93 100644
--- a/drivers/scsi/qedf/drv_fcoe_fw_funcs.h
+++ b/drivers/scsi/qedf/drv_fcoe_fw_funcs.h
@@ -10,7 +10,7 @@
 
 struct fcoe_task_params {
 	/* Output parameter [set/filled by the HSI function] */
-	struct e4_fcoe_task_context *context;
+	struct fcoe_task_context *context;
 
 	/* Output parameter [set/filled by the HSI function] */
 	struct fcoe_wqe *sqe;
diff --git a/drivers/scsi/qedf/qedf.h b/drivers/scsi/qedf/qedf.h
index ba94413fe2ea..631a15969d21 100644
--- a/drivers/scsi/qedf/qedf.h
+++ b/drivers/scsi/qedf/qedf.h
@@ -141,7 +141,7 @@ struct qedf_ioreq {
 	struct completion tm_done;
 	struct completion abts_done;
 	struct completion cleanup_done;
-	struct e4_fcoe_task_context *task;
+	struct fcoe_task_context *task;
 	struct fcoe_task_params *task_params;
 	struct scsi_sgl_task_params *sgl_task_params;
 	int idx;
@@ -503,7 +503,7 @@ extern void qedf_cmd_timer_set(struct qedf_ctx *qedf, struct qedf_ioreq *io_req,
 	unsigned int timer_msec);
 extern int qedf_init_mp_req(struct qedf_ioreq *io_req);
 extern void qedf_init_mp_task(struct qedf_ioreq *io_req,
-	struct e4_fcoe_task_context *task_ctx, struct fcoe_wqe *sqe);
+	struct fcoe_task_context *task_ctx, struct fcoe_wqe *sqe);
 extern u16 qedf_get_sqe_idx(struct qedf_rport *fcport);
 extern void qedf_ring_doorbell(struct qedf_rport *fcport);
 extern void qedf_process_els_compl(struct qedf_ctx *qedf, struct fcoe_cqe *cqe,
diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c
index 625e58ccb8c8..1ff5bc314fc0 100644
--- a/drivers/scsi/qedf/qedf_els.c
+++ b/drivers/scsi/qedf/qedf_els.c
@@ -16,7 +16,7 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op,
 	struct qedf_ioreq *els_req;
 	struct qedf_mp_req *mp_req;
 	struct fc_frame_header *fc_hdr;
-	struct e4_fcoe_task_context *task;
+	struct fcoe_task_context *task;
 	int rc = 0;
 	uint32_t did, sid;
 	uint16_t xid;
diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c
index 3404782988d5..b649f835d436 100644
--- a/drivers/scsi/qedf/qedf_io.c
+++ b/drivers/scsi/qedf/qedf_io.c
@@ -584,7 +584,7 @@ static void qedf_build_fcp_cmnd(struct qedf_ioreq *io_req,
 }
 
 static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
-	struct qedf_ioreq *io_req, struct e4_fcoe_task_context *task_ctx,
+	struct qedf_ioreq *io_req, struct fcoe_task_context *task_ctx,
 	struct fcoe_wqe *sqe)
 {
 	enum fcoe_task_type task_type;
@@ -602,7 +602,7 @@ static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
 
 	/* Note init_initiator_rw_fcoe_task memsets the task context */
 	io_req->task = task_ctx;
-	memset(task_ctx, 0, sizeof(struct e4_fcoe_task_context));
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
 	memset(io_req->task_params, 0, sizeof(struct fcoe_task_params));
 	memset(io_req->sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
 
@@ -674,7 +674,7 @@ static void  qedf_init_task(struct qedf_rport *fcport, struct fc_lport *lport,
 }
 
 void qedf_init_mp_task(struct qedf_ioreq *io_req,
-	struct e4_fcoe_task_context *task_ctx, struct fcoe_wqe *sqe)
+	struct fcoe_task_context *task_ctx, struct fcoe_wqe *sqe)
 {
 	struct qedf_mp_req *mp_req = &(io_req->mp_req);
 	struct qedf_rport *fcport = io_req->fcport;
@@ -692,7 +692,7 @@ void qedf_init_mp_task(struct qedf_ioreq *io_req,
 
 	memset(&tx_sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
 	memset(&rx_sgl_task_params, 0, sizeof(struct scsi_sgl_task_params));
-	memset(task_ctx, 0, sizeof(struct e4_fcoe_task_context));
+	memset(task_ctx, 0, sizeof(struct fcoe_task_context));
 	memset(&task_fc_hdr, 0, sizeof(struct fcoe_tx_mid_path_params));
 
 	/* Setup the task from io_req for easy reference */
@@ -850,7 +850,7 @@ int qedf_post_io_req(struct qedf_rport *fcport, struct qedf_ioreq *io_req)
 	struct Scsi_Host *host = sc_cmd->device->host;
 	struct fc_lport *lport = shost_priv(host);
 	struct qedf_ctx *qedf = lport_priv(lport);
-	struct e4_fcoe_task_context *task_ctx;
+	struct fcoe_task_context *task_ctx;
 	u16 xid;
 	struct fcoe_wqe *sqe;
 	u16 sqe_idx;
@@ -2293,7 +2293,7 @@ static int qedf_execute_tmf(struct qedf_rport *fcport, struct scsi_cmnd *sc_cmd,
 	uint8_t tm_flags)
 {
 	struct qedf_ioreq *io_req;
-	struct e4_fcoe_task_context *task;
+	struct fcoe_task_context *task;
 	struct qedf_ctx *qedf = fcport->qedf;
 	struct fc_lport *lport = qedf->lport;
 	int rc = 0;
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 94ee08fab46a..0da32fd3302e 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2170,7 +2170,7 @@ static bool qedf_fp_has_work(struct qedf_fastpath *fp)
 	struct qedf_ctx *qedf = fp->qedf;
 	struct global_queue *que;
 	struct qed_sb_info *sb_info = fp->sb_info;
-	struct status_block_e4 *sb = sb_info->sb_virt;
+	struct status_block *sb = sb_info->sb_virt;
 	u16 prod_idx;
 
 	/* Get the pointer to the global CQ this completion is on */
@@ -2197,7 +2197,7 @@ static bool qedf_process_completions(struct qedf_fastpath *fp)
 {
 	struct qedf_ctx *qedf = fp->qedf;
 	struct qed_sb_info *sb_info = fp->sb_info;
-	struct status_block_e4 *sb = sb_info->sb_virt;
+	struct status_block *sb = sb_info->sb_virt;
 	struct global_queue *que;
 	u16 prod_idx;
 	struct fcoe_cqe *cqe;
@@ -2688,12 +2688,12 @@ void qedf_fp_io_handler(struct work_struct *work)
 static int qedf_alloc_and_init_sb(struct qedf_ctx *qedf,
 	struct qed_sb_info *sb_info, u16 sb_id)
 {
-	struct status_block_e4 *sb_virt;
+	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int ret;
 
 	sb_virt = dma_alloc_coherent(&qedf->pdev->dev,
-	    sizeof(struct status_block_e4), &sb_phys, GFP_KERNEL);
+	    sizeof(struct status_block), &sb_phys, GFP_KERNEL);
 
 	if (!sb_virt) {
 		QEDF_ERR(&qedf->dbg_ctx,
diff --git a/drivers/scsi/qedi/qedi_debugfs.c b/drivers/scsi/qedi/qedi_debugfs.c
index 42f5afb60055..8deb2001dc2f 100644
--- a/drivers/scsi/qedi/qedi_debugfs.c
+++ b/drivers/scsi/qedi/qedi_debugfs.c
@@ -136,7 +136,7 @@ qedi_gbl_ctx_show(struct seq_file *s, void *unused)
 {
 	struct qedi_fastpath *fp = NULL;
 	struct qed_sb_info *sb_info = NULL;
-	struct status_block_e4 *sb = NULL;
+	struct status_block *sb = NULL;
 	struct global_queue *que = NULL;
 	int id;
 	u16 prod_idx;
@@ -152,7 +152,7 @@ qedi_gbl_ctx_show(struct seq_file *s, void *unused)
 		sb_info = fp->sb_info;
 		sb = sb_info->sb_virt;
 		prod_idx = (sb->pi_array[QEDI_PROTO_CQ_PROD_IDX] &
-			    STATUS_BLOCK_E4_PROD_INDEX_MASK);
+			    STATUS_BLOCK_PROD_INDEX_MASK);
 		seq_printf(s, "SB PROD IDX: %d\n", prod_idx);
 		que = qedi->global_queues[fp->sb_id];
 		seq_printf(s, "DRV CONS IDX: %d\n", que->cq_cons_idx);
diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c
index d01cd829ef97..84a4204a2cb4 100644
--- a/drivers/scsi/qedi/qedi_fw.c
+++ b/drivers/scsi/qedi/qedi_fw.c
@@ -85,7 +85,7 @@ static void qedi_process_text_resp(struct qedi_ctx *qedi,
 {
 	struct iscsi_conn *conn = qedi_conn->cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
-	struct e4_iscsi_task_context *task_ctx;
+	struct iscsi_task_context *task_ctx;
 	struct iscsi_text_rsp *resp_hdr_ptr;
 	struct iscsi_text_response_hdr *cqe_text_response;
 	struct qedi_cmd *cmd;
@@ -261,7 +261,7 @@ static void qedi_process_login_resp(struct qedi_ctx *qedi,
 {
 	struct iscsi_conn *conn = qedi_conn->cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
-	struct e4_iscsi_task_context *task_ctx;
+	struct iscsi_task_context *task_ctx;
 	struct iscsi_login_rsp *resp_hdr_ptr;
 	struct iscsi_login_response_hdr *cqe_login_response;
 	struct qedi_cmd *cmd;
@@ -970,7 +970,7 @@ int qedi_send_iscsi_login(struct qedi_conn *qedi_conn,
 	struct scsi_sgl_task_params tx_sgl_task_params;
 	struct scsi_sgl_task_params rx_sgl_task_params;
 	struct iscsi_task_params task_params;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct qedi_ctx *qedi = qedi_conn->qedi;
 	struct iscsi_login_req *login_hdr;
 	struct scsi_sge *resp_sge = NULL;
@@ -990,9 +990,9 @@ int qedi_send_iscsi_login(struct qedi_conn *qedi_conn,
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	qedi_cmd->task_id = tid;
 
@@ -1073,7 +1073,7 @@ int qedi_send_iscsi_logout(struct qedi_conn *qedi_conn,
 	struct scsi_sgl_task_params tx_sgl_task_params;
 	struct scsi_sgl_task_params rx_sgl_task_params;
 	struct iscsi_task_params task_params;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct iscsi_logout *logout_hdr = NULL;
 	struct qedi_ctx *qedi = qedi_conn->qedi;
 	struct qedi_cmd *qedi_cmd;
@@ -1091,9 +1091,9 @@ int qedi_send_iscsi_logout(struct qedi_conn *qedi_conn,
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	qedi_cmd->task_id = tid;
 
@@ -1434,7 +1434,7 @@ static int send_iscsi_tmf(struct qedi_conn *qedi_conn, struct iscsi_task *mtask,
 	struct iscsi_tmf_request_hdr tmf_pdu_header;
 	struct iscsi_task_params task_params;
 	struct qedi_ctx *qedi = qedi_conn->qedi;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct iscsi_tm *tmf_hdr;
 	struct qedi_cmd *qedi_cmd;
 	struct qedi_cmd *cmd;
@@ -1454,9 +1454,9 @@ static int send_iscsi_tmf(struct qedi_conn *qedi_conn, struct iscsi_task *mtask,
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	qedi_cmd->task_id = tid;
 
@@ -1548,7 +1548,7 @@ int qedi_send_iscsi_text(struct qedi_conn *qedi_conn,
 	struct scsi_sgl_task_params tx_sgl_task_params;
 	struct scsi_sgl_task_params rx_sgl_task_params;
 	struct iscsi_task_params task_params;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct qedi_ctx *qedi = qedi_conn->qedi;
 	struct iscsi_text *text_hdr;
 	struct scsi_sge *req_sge = NULL;
@@ -1570,9 +1570,9 @@ int qedi_send_iscsi_text(struct qedi_conn *qedi_conn,
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	qedi_cmd->task_id = tid;
 
@@ -1649,7 +1649,7 @@ int qedi_send_iscsi_nopout(struct qedi_conn *qedi_conn,
 	struct scsi_sgl_task_params rx_sgl_task_params;
 	struct iscsi_task_params task_params;
 	struct qedi_ctx *qedi = qedi_conn->qedi;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct iscsi_nopout *nopout_hdr;
 	struct scsi_sge *resp_sge = NULL;
 	struct qedi_cmd *qedi_cmd;
@@ -1669,9 +1669,9 @@ int qedi_send_iscsi_nopout(struct qedi_conn *qedi_conn,
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	qedi_cmd->task_id = tid;
 
@@ -1991,7 +1991,7 @@ int qedi_iscsi_send_ioreq(struct iscsi_task *task)
 	struct iscsi_task_params task_params;
 	struct iscsi_conn_params conn_params;
 	struct scsi_initiator_cmd_params cmd_params;
-	struct e4_iscsi_task_context *fw_task_ctx;
+	struct iscsi_task_context *fw_task_ctx;
 	struct iscsi_cls_conn *cls_conn;
 	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 	enum iscsi_task_type task_type = MAX_ISCSI_TASK_TYPE;
@@ -2014,9 +2014,9 @@ int qedi_iscsi_send_ioreq(struct iscsi_task *task)
 		return -ENOMEM;
 
 	fw_task_ctx =
-	     (struct e4_iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
+	     (struct iscsi_task_context *)qedi_get_task_mem(&qedi->tasks,
 							       tid);
-	memset(fw_task_ctx, 0, sizeof(struct e4_iscsi_task_context));
+	memset(fw_task_ctx, 0, sizeof(struct iscsi_task_context));
 
 	cmd->task_id = tid;
 
diff --git a/drivers/scsi/qedi/qedi_fw_api.c b/drivers/scsi/qedi/qedi_fw_api.c
index 52772904ef5d..642556a1ce1c 100644
--- a/drivers/scsi/qedi/qedi_fw_api.c
+++ b/drivers/scsi/qedi/qedi_fw_api.c
@@ -202,7 +202,7 @@ static void init_default_iscsi_task(struct iscsi_task_params *task_params,
 				    struct data_hdr *pdu_header,
 				    enum iscsi_task_type task_type)
 {
-	struct e4_iscsi_task_context *context;
+	struct iscsi_task_context *context;
 	u32 val;
 	u16 index;
 	u8 val_byte;
@@ -224,7 +224,7 @@ static void init_default_iscsi_task(struct iscsi_task_params *task_params,
 					    cpu_to_le16(task_params->conn_icid);
 
 	SET_FIELD(context->ustorm_ag_context.flags1,
-		  E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
+		  USTORM_ISCSI_TASK_AG_CTX_R2T2RECV, 1);
 
 	context->ustorm_st_context.task_type = task_type;
 	context->ustorm_st_context.cq_rss_number = task_params->cq_rss_number;
@@ -254,7 +254,7 @@ void init_initiator_rw_cdb_ystorm_context(struct ystorm_iscsi_task_st_ctx *ystc,
 
 static
 void init_ustorm_task_contexts(struct ustorm_iscsi_task_st_ctx *ustorm_st_cxt,
-			struct e4_ustorm_iscsi_task_ag_ctx *ustorm_ag_cxt,
+			struct ustorm_iscsi_task_ag_ctx *ustorm_ag_cxt,
 			u32 remaining_recv_len, u32 expected_data_transfer_len,
 			u8 num_sges, bool tx_dif_conn_err_en)
 {
@@ -266,12 +266,12 @@ void init_ustorm_task_contexts(struct ustorm_iscsi_task_st_ctx *ustorm_st_cxt,
 	ustorm_st_cxt->exp_data_transfer_len = val;
 	SET_FIELD(ustorm_st_cxt->reg1.reg1_map, ISCSI_REG1_NUM_SGES, num_sges);
 	SET_FIELD(ustorm_ag_cxt->flags2,
-		  E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN,
+		  USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN,
 		  tx_dif_conn_err_en ? 1 : 0);
 }
 
 static
-void set_rw_exp_data_acked_and_cont_len(struct e4_iscsi_task_context *context,
+void set_rw_exp_data_acked_and_cont_len(struct iscsi_task_context *context,
 					struct iscsi_conn_params  *conn_params,
 					enum iscsi_task_type task_type,
 					u32 task_size,
@@ -470,7 +470,7 @@ void init_rtdif_task_context(struct rdif_task_context *rdif_context,
 	}
 }
 
-static void set_local_completion_context(struct e4_iscsi_task_context *context)
+static void set_local_completion_context(struct iscsi_task_context *context)
 {
 	SET_FIELD(context->ystorm_st_context.state.flags,
 		  YSTORM_ISCSI_TASK_STATE_LOCAL_COMP, 1);
@@ -487,7 +487,7 @@ static int init_rw_iscsi_task(struct iscsi_task_params *task_params,
 			      struct scsi_dif_task_params *dif_task_params)
 {
 	u32 exp_data_transfer_len = conn_params->max_burst_length;
-	struct e4_iscsi_task_context *cxt;
+	struct iscsi_task_context *cxt;
 	bool slow_io = false;
 	u32 task_size, val;
 	u8 num_sges = 0;
@@ -615,7 +615,7 @@ int init_initiator_login_request_task(struct iscsi_task_params *task_params,
 				      struct scsi_sgl_task_params *tx_params,
 				      struct scsi_sgl_task_params *rx_params)
 {
-	struct e4_iscsi_task_context *cxt;
+	struct iscsi_task_context *cxt;
 
 	cxt = task_params->context;
 
@@ -657,7 +657,7 @@ int init_initiator_nop_out_task(struct iscsi_task_params *task_params,
 				struct scsi_sgl_task_params *tx_sgl_task_params,
 				struct scsi_sgl_task_params *rx_sgl_task_params)
 {
-	struct e4_iscsi_task_context *cxt;
+	struct iscsi_task_context *cxt;
 
 	cxt = task_params->context;
 
@@ -703,7 +703,7 @@ int init_initiator_logout_request_task(struct iscsi_task_params *task_params,
 				       struct scsi_sgl_task_params *tx_params,
 				       struct scsi_sgl_task_params *rx_params)
 {
-	struct e4_iscsi_task_context *cxt;
+	struct iscsi_task_context *cxt;
 
 	cxt = task_params->context;
 
@@ -758,7 +758,7 @@ int init_initiator_text_request_task(struct iscsi_task_params *task_params,
 				     struct scsi_sgl_task_params *tx_params,
 				     struct scsi_sgl_task_params *rx_params)
 {
-	struct e4_iscsi_task_context *cxt;
+	struct iscsi_task_context *cxt;
 
 	cxt = task_params->context;
 
diff --git a/drivers/scsi/qedi/qedi_fw_iscsi.h b/drivers/scsi/qedi/qedi_fw_iscsi.h
index 10f19f0af0a3..df2d471a7b51 100644
--- a/drivers/scsi/qedi/qedi_fw_iscsi.h
+++ b/drivers/scsi/qedi/qedi_fw_iscsi.h
@@ -10,7 +10,7 @@
 #include "qedi_fw_scsi.h"
 
 struct iscsi_task_params {
-	struct e4_iscsi_task_context *context;
+	struct iscsi_task_context *context;
 	struct iscsi_wqe	  *sqe;
 	u32			  tx_io_size;
 	u32			  rx_io_size;
diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h
index a31c5de74754..a282860da0aa 100644
--- a/drivers/scsi/qedi/qedi_iscsi.h
+++ b/drivers/scsi/qedi/qedi_iscsi.h
@@ -182,7 +182,7 @@ struct qedi_cmd {
 	struct scsi_cmnd *scsi_cmd;
 	struct scatterlist *sg;
 	struct qedi_io_bdt io_tbl;
-	struct e4_iscsi_task_context request;
+	struct iscsi_task_context request;
 	unsigned char *sense_buffer;
 	dma_addr_t sense_buffer_dma;
 	u16 task_id;
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e6dc0b495a82..fe36ddb82aef 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -351,12 +351,12 @@ static int qedi_init_uio(struct qedi_ctx *qedi)
 static int qedi_alloc_and_init_sb(struct qedi_ctx *qedi,
 				  struct qed_sb_info *sb_info, u16 sb_id)
 {
-	struct status_block_e4 *sb_virt;
+	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	int ret;
 
 	sb_virt = dma_alloc_coherent(&qedi->pdev->dev,
-				     sizeof(struct status_block_e4), &sb_phys,
+				     sizeof(struct status_block), &sb_phys,
 				     GFP_KERNEL);
 	if (!sb_virt) {
 		QEDI_ERR(&qedi->dbg_ctx,
@@ -1259,7 +1259,7 @@ static bool qedi_process_completions(struct qedi_fastpath *fp)
 {
 	struct qedi_ctx *qedi = fp->qedi;
 	struct qed_sb_info *sb_info = fp->sb_info;
-	struct status_block_e4 *sb = sb_info->sb_virt;
+	struct status_block *sb = sb_info->sb_virt;
 	struct qedi_percpu_s *p = NULL;
 	struct global_queue *que;
 	u16 prod_idx;
@@ -1315,7 +1315,7 @@ static bool qedi_fp_has_work(struct qedi_fastpath *fp)
 	struct qedi_ctx *qedi = fp->qedi;
 	struct global_queue *que;
 	struct qed_sb_info *sb_info = fp->sb_info;
-	struct status_block_e4 *sb = sb_info->sb_virt;
+	struct status_block *sb = sb_info->sb_virt;
 	u16 prod_idx;
 
 	barrier();
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 0a3807e927c5..3742d1f7d1f7 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -133,7 +133,7 @@
 #define NUM_OF_TCS		(NUM_OF_PHYS_TCS + 1)
 
 /* CIDs */
-#define NUM_OF_CONNECTION_TYPES_E4	(8)
+#define NUM_OF_CONNECTION_TYPES	(8)
 #define NUM_OF_LCIDS			(320)
 #define NUM_OF_LTIDS			(320)
 
@@ -379,7 +379,7 @@
 #define CAU_FSM_ETH_TX  1
 
 /* Number of Protocol Indices per Status Block */
-#define PIS_PER_SB_E4	12
+#define PIS_PER_SB	12
 #define MAX_PIS_PER_SB	PIS_PER_SB
 
 #define CAU_HC_STOPPED_STATE	3
@@ -1221,20 +1221,20 @@ struct rdif_task_context {
 };
 
 /* Status block structure */
-struct status_block_e4 {
-	__le16	pi_array[PIS_PER_SB_E4];
+struct status_block {
+	__le16	pi_array[PIS_PER_SB];
 	__le32	sb_num;
-#define STATUS_BLOCK_E4_SB_NUM_MASK	0x1FF
-#define STATUS_BLOCK_E4_SB_NUM_SHIFT	0
-#define STATUS_BLOCK_E4_ZERO_PAD_MASK	0x7F
-#define STATUS_BLOCK_E4_ZERO_PAD_SHIFT	9
-#define STATUS_BLOCK_E4_ZERO_PAD2_MASK	0xFFFF
-#define STATUS_BLOCK_E4_ZERO_PAD2_SHIFT	16
+#define STATUS_BLOCK_SB_NUM_MASK	0x1FF
+#define STATUS_BLOCK_SB_NUM_SHIFT	0
+#define STATUS_BLOCK_ZERO_PAD_MASK	0x7F
+#define STATUS_BLOCK_ZERO_PAD_SHIFT	9
+#define STATUS_BLOCK_ZERO_PAD2_MASK	0xFFFF
+#define STATUS_BLOCK_ZERO_PAD2_SHIFT	16
 	__le32 prod_index;
-#define STATUS_BLOCK_E4_PROD_INDEX_MASK		0xFFFFFF
-#define STATUS_BLOCK_E4_PROD_INDEX_SHIFT	0
-#define STATUS_BLOCK_E4_ZERO_PAD3_MASK		0xFF
-#define STATUS_BLOCK_E4_ZERO_PAD3_SHIFT		24
+#define STATUS_BLOCK_PROD_INDEX_MASK		0xFFFFFF
+#define STATUS_BLOCK_PROD_INDEX_SHIFT	0
+#define STATUS_BLOCK_ZERO_PAD3_MASK		0xFF
+#define STATUS_BLOCK_ZERO_PAD3_SHIFT		24
 };
 
 /* Tdif context */
diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h
index 68eda1c21cde..7ba0abc867f1 100644
--- a/include/linux/qed/fcoe_common.h
+++ b/include/linux/qed/fcoe_common.h
@@ -150,49 +150,49 @@ struct ystorm_fcoe_task_st_ctx {
 	u8 reserved2[8];
 };
 
-struct e4_ystorm_fcoe_task_ag_ctx {
+struct ystorm_fcoe_task_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	__le16 word0;
 	u8 flags0;
-#define E4_YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK		0xF
-#define E4_YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT	0
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT		4
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT		5
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT		6
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT		7
+#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_MASK		0xF
+#define YSTORM_FCOE_TASK_AG_CTX_NIBBLE0_SHIFT	0
+#define YSTORM_FCOE_TASK_AG_CTX_BIT0_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT0_SHIFT		4
+#define YSTORM_FCOE_TASK_AG_CTX_BIT1_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT		5
+#define YSTORM_FCOE_TASK_AG_CTX_BIT2_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT		6
+#define YSTORM_FCOE_TASK_AG_CTX_BIT3_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT		7
 	u8 flags1;
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0_MASK		0x3
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT		0
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1_MASK		0x3
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT		2
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT		6
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT		7
+#define YSTORM_FCOE_TASK_AG_CTX_CF0_MASK		0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF0_SHIFT		0
+#define YSTORM_FCOE_TASK_AG_CTX_CF1_MASK		0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT		2
+#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
+#define YSTORM_FCOE_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
+#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT		6
+#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT		7
 	u8 flags2;
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT		0
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT	1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT	2
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT	3
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT	4
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT	5
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT	6
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT	7
+#define YSTORM_FCOE_TASK_AG_CTX_BIT4_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_BIT4_SHIFT		0
+#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT	1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT	2
+#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT	3
+#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT	4
+#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT	5
+#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT	6
+#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK		0x1
+#define YSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT	7
 	u8 byte2;
 	__le32 reg0;
 	u8 byte3;
@@ -206,73 +206,73 @@ struct e4_ystorm_fcoe_task_ag_ctx {
 	__le32 reg2;
 };
 
-struct e4_tstorm_fcoe_task_ag_ctx {
+struct tstorm_fcoe_task_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	__le16 icid;
 	u8 flags0;
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT			5
-#define E4_TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK		0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT	6
-#define E4_TSTORM_FCOE_TASK_AG_CTX_VALID_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT			7
+#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define TSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define TSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define TSTORM_FCOE_TASK_AG_CTX_BIT1_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT			5
+#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_MASK		0x1
+#define TSTORM_FCOE_TASK_AG_CTX_WAIT_ABTS_RSP_F_SHIFT	6
+#define TSTORM_FCOE_TASK_AG_CTX_VALID_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_VALID_SHIFT			7
 	u8 flags1;
-#define E4_TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT	0
-#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK		0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT		1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK	0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT	2
-#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK	0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT	4
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT		6
+#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_FALSE_RR_TOV_SHIFT	0
+#define TSTORM_FCOE_TASK_AG_CTX_BIT5_MASK		0x1
+#define TSTORM_FCOE_TASK_AG_CTX_BIT5_SHIFT		1
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_MASK	0x3
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_SHIFT	2
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_MASK	0x3
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_SHIFT	4
+#define TSTORM_FCOE_TASK_AG_CTX_CF2_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT		6
 	u8 flags2;
-#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT		0
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT		2
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT		4
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT	6
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_SHIFT		0
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT		2
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_SHIFT		4
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK		0x3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT		0
-#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT	2
-#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK		0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT		3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			4
-#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	5
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT	6
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK		0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT		7
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_MASK		0x3
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_SHIFT		0
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_REC_RR_TOV_CF_EN_SHIFT	2
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_MASK		0x1
+#define TSTORM_FCOE_TASK_AG_CTX_ED_TOV_CF_EN_SHIFT		3
+#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			4
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_TIMER_STOP_ALL_EN_SHIFT	5
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT	6
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_MASK		0x1
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_INIT_CF_EN_SHIFT		7
 	u8 flags4;
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT	0
-#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK	0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT	1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		2
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		3
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		4
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT		5
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT		6
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK			0x1
-#define E4_TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT		7
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_SEQ_RECOVERY_CF_EN_SHIFT	0
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_MASK	0x1
+#define TSTORM_FCOE_TASK_AG_CTX_UNSOL_COMP_CF_EN_SHIFT	1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		2
+#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		3
+#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		4
+#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT		5
+#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT		6
+#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK			0x1
+#define TSTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT		7
 	u8 cleanup_state;
 	__le16 last_sent_tid;
 	__le32 rec_rr_tov_exp_timeout;
@@ -352,49 +352,49 @@ struct tstorm_fcoe_task_st_ctx {
 	struct fcoe_tstorm_fcoe_task_st_ctx_read_only read_only;
 };
 
-struct e4_mstorm_fcoe_task_ag_ctx {
+struct mstorm_fcoe_task_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	__le16 icid;
 	u8 flags0;
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK		0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT		5
-#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT			6
-#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT			7
+#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define MSTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define MSTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_MASK		0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CQE_PLACED_SHIFT		5
+#define MSTORM_FCOE_TASK_AG_CTX_BIT2_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_BIT2_SHIFT			6
+#define MSTORM_FCOE_TASK_AG_CTX_BIT3_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_BIT3_SHIFT			7
 	u8 flags1;
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK		0x3
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT		0
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1_MASK			0x3
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT			2
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2_MASK			0x3
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT			4
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK	0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT	6
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT			7
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_MASK		0x3
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_SHIFT		0
+#define MSTORM_FCOE_TASK_AG_CTX_CF1_MASK			0x3
+#define MSTORM_FCOE_TASK_AG_CTX_CF1_SHIFT			2
+#define MSTORM_FCOE_TASK_AG_CTX_CF2_MASK			0x3
+#define MSTORM_FCOE_TASK_AG_CTX_CF2_SHIFT			4
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_MASK	0x1
+#define MSTORM_FCOE_TASK_AG_CTX_EX_CLEANUP_CF_EN_SHIFT	6
+#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT			7
 	u8 flags2;
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			0
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		2
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		3
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT		4
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT		5
-#define E4_MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK	0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT	6
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK			0x1
-#define E4_MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT		7
+#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			0
+#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		2
+#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		3
+#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT		4
+#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT		5
+#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_MASK	0x1
+#define MSTORM_FCOE_TASK_AG_CTX_XFER_PLACEMENT_EN_SHIFT	6
+#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK			0x1
+#define MSTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT		7
 	u8 cleanup_state;
 	__le32 received_bytes;
 	u8 byte3;
@@ -440,56 +440,56 @@ struct mstorm_fcoe_task_st_ctx {
 	struct scsi_cached_sges data_desc;
 };
 
-struct e4_ustorm_fcoe_task_ag_ctx {
+struct ustorm_fcoe_task_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	__le16 icid;
 	u8 flags0;
-#define E4_USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
-#define E4_USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_USTORM_FCOE_TASK_AG_CTX_BIT1_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT			5
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF0_MASK			0x3
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT			6
+#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_MASK		0xF
+#define USTORM_FCOE_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define USTORM_FCOE_TASK_AG_CTX_BIT1_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_BIT1_SHIFT			5
+#define USTORM_FCOE_TASK_AG_CTX_CF0_MASK			0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF0_SHIFT			6
 	u8 flags1;
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF1_MASK		0x3
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT		0
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF2_MASK		0x3
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT		2
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF3_MASK		0x3
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT		4
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK	0x3
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT	6
+#define USTORM_FCOE_TASK_AG_CTX_CF1_MASK		0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF1_SHIFT		0
+#define USTORM_FCOE_TASK_AG_CTX_CF2_MASK		0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF2_SHIFT		2
+#define USTORM_FCOE_TASK_AG_CTX_CF3_MASK		0x3
+#define USTORM_FCOE_TASK_AG_CTX_CF3_SHIFT		4
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_MASK	0x3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT			0
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT			1
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			2
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT			3
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		5
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		6
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		7
+#define USTORM_FCOE_TASK_AG_CTX_CF0EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF0EN_SHIFT			0
+#define USTORM_FCOE_TASK_AG_CTX_CF1EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF1EN_SHIFT			1
+#define USTORM_FCOE_TASK_AG_CTX_CF2EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF2EN_SHIFT			2
+#define USTORM_FCOE_TASK_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
+#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE0EN_SHIFT		5
+#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE1EN_SHIFT		6
+#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_MASK			0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE2EN_SHIFT		7
 	u8 flags3;
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT	0
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT	1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT	2
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT	3
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK	0xF
-#define E4_USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
+#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE3EN_SHIFT	0
+#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE4EN_SHIFT	1
+#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE5EN_SHIFT	2
+#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_MASK		0x1
+#define USTORM_FCOE_TASK_AG_CTX_RULE6EN_SHIFT	3
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_MASK	0xF
+#define USTORM_FCOE_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
 	__le32 global_cq_num;
@@ -499,18 +499,18 @@ struct e4_ustorm_fcoe_task_ag_ctx {
 };
 
 /* FCoE task context */
-struct e4_fcoe_task_context {
+struct fcoe_task_context {
 	struct ystorm_fcoe_task_st_ctx ystorm_st_context;
 	struct regpair ystorm_st_padding[2];
 	struct tdif_task_context tdif_context;
-	struct e4_ystorm_fcoe_task_ag_ctx ystorm_ag_context;
-	struct e4_tstorm_fcoe_task_ag_ctx tstorm_ag_context;
+	struct ystorm_fcoe_task_ag_ctx ystorm_ag_context;
+	struct tstorm_fcoe_task_ag_ctx tstorm_ag_context;
 	struct timers_context timer_context;
 	struct tstorm_fcoe_task_st_ctx tstorm_st_context;
 	struct regpair tstorm_st_padding[2];
-	struct e4_mstorm_fcoe_task_ag_ctx mstorm_ag_context;
+	struct mstorm_fcoe_task_ag_ctx mstorm_ag_context;
 	struct mstorm_fcoe_task_st_ctx mstorm_st_context;
-	struct e4_ustorm_fcoe_task_ag_ctx ustorm_ag_context;
+	struct ustorm_fcoe_task_ag_ctx ustorm_ag_context;
 	struct rdif_task_context rdif_context;
 };
 
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index 157019f716f1..1a60285a01e3 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -714,49 +714,49 @@ struct ystorm_iscsi_task_st_ctx {
 	union iscsi_task_hdr pdu_hdr;
 };
 
-struct e4_ystorm_iscsi_task_ag_ctx {
+struct ystorm_iscsi_task_ag_ctx {
 	u8 reserved;
 	u8 byte1;
 	__le16 word0;
 	u8 flags0;
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK	0xF
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT	0
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT		4
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT		5
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT		6
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK   0x1	/* bit3 */
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT  7
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK	0xF
+#define YSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT	0
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT		4
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT		5
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT		6
+#define YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_MASK   0x1	/* bit3 */
+#define YSTORM_ISCSI_TASK_AG_CTX_TTT_VALID_SHIFT  7
 	u8 flags1;
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK		0x3
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT		0
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK		0x3
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT		2
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT		6
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT		7
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_MASK		0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT		0
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_MASK		0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT		2
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_MASK	0x3
+#define YSTORM_ISCSI_TASK_AG_CTX_CF2SPECIAL_SHIFT	4
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT		6
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT		7
 	u8 flags2;
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK		0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT		0
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	2
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	3
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	4
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	5
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	6
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK	0x1
-#define E4_YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT	7
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK		0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT		0
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	2
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	3
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	4
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	5
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	6
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK	0x1
+#define YSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT	7
 	u8 byte2;
 	__le32 TTT;
 	u8 byte3;
@@ -764,49 +764,49 @@ struct e4_ystorm_iscsi_task_ag_ctx {
 	__le16 word1;
 };
 
-struct e4_mstorm_iscsi_task_ag_ctx {
+struct mstorm_iscsi_task_ag_ctx {
 	u8 cdu_validation;
 	u8 byte1;
 	__le16 task_cid;
 	u8 flags0;
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK	0xF
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT	5
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK			0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT			6
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT	7
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK	0xF
+#define MSTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT	5
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_MASK			0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_VALID_SHIFT			6
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_FLAG_SHIFT	7
 	u8 flags1;
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK	0x3
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT	0
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK			0x3
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT			2
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK			0x3
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT			4
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT	6
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK			0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT			7
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_MASK	0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_SHIFT	0
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_MASK			0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT			2
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_MASK			0x3
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT			4
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_TASK_CLEANUP_CF_EN_SHIFT	6
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK			0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT			7
 	u8 flags2;
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK		0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT		0
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	2
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	3
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	4
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	5
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	6
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK	0x1
-#define E4_MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT	7
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK		0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT		0
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	2
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	3
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	4
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	5
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	6
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK	0x1
+#define MSTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT	7
 	u8 byte2;
 	__le32 reg0;
 	u8 byte3;
@@ -814,56 +814,56 @@ struct e4_mstorm_iscsi_task_ag_ctx {
 	__le16 word1;
 };
 
-struct e4_ustorm_iscsi_task_ag_ctx {
+struct ustorm_iscsi_task_ag_ctx {
 	u8 reserved;
 	u8 state;
 	__le16 icid;
 	u8 flags0;
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK	0xF
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
-#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK     0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT    5
-#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK		0x3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT		6
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_MASK	0xF
+#define USTORM_ISCSI_TASK_AG_CTX_CONNECTION_TYPE_SHIFT	0
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_EXIST_IN_QM0_SHIFT		4
+#define USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_MASK     0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CONN_CLEAR_SQ_FLAG_SHIFT    5
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_MASK		0x3
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_SHIFT		6
 	u8 flags1;
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK	0x3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT	0
-#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK	0x3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT	2
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3_MASK		0x3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT		4
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK	0x3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT	6
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_MASK	0x3
+#define USTORM_ISCSI_TASK_AG_CTX_RESERVED1_SHIFT	0
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_MASK	0x3
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_SHIFT	2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_MASK		0x3
+#define USTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT		4
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_MASK	0x3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_SHIFT	6
 	u8 flags2;
-#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK	0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT	0
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK	0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT	1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT		2
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK			0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT			3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK	0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK	0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT	5
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT		6
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK	0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT	7
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_MASK	0x1
+#define USTORM_ISCSI_TASK_AG_CTX_HQ_SCANNED_CF_EN_SHIFT	0
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_MASK	0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DISABLE_DATA_ACKED_SHIFT	1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_R2T2RECV_EN_SHIFT		2
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK			0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT			3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_MASK	0x1
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_CF_EN_SHIFT	4
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_MASK	0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_DATA_TOTAL_EXP_EN_SHIFT	5
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT		6
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_MASK	0x1
+#define USTORM_ISCSI_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT	7
 	u8 flags3;
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT		0
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT		1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT		2
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK		0x1
-#define E4_USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT		3
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK		0xF
-#define E4_USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT		0
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT		1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT		2
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_MASK		0x1
+#define USTORM_ISCSI_TASK_AG_CTX_RULE6EN_SHIFT		3
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_MASK		0xF
+#define USTORM_ISCSI_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT	4
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
 	__le32 rcv_cont_len;
@@ -952,14 +952,14 @@ struct ustorm_iscsi_task_st_ctx {
 };
 
 /* iscsi task context */
-struct e4_iscsi_task_context {
+struct iscsi_task_context {
 	struct ystorm_iscsi_task_st_ctx ystorm_st_context;
-	struct e4_ystorm_iscsi_task_ag_ctx ystorm_ag_context;
+	struct ystorm_iscsi_task_ag_ctx ystorm_ag_context;
 	struct regpair ystorm_ag_padding[2];
 	struct tdif_task_context tdif_context;
-	struct e4_mstorm_iscsi_task_ag_ctx mstorm_ag_context;
+	struct mstorm_iscsi_task_ag_ctx mstorm_ag_context;
 	struct regpair mstorm_ag_padding[2];
-	struct e4_ustorm_iscsi_task_ag_ctx ustorm_ag_context;
+	struct ustorm_iscsi_task_ag_ctx ustorm_ag_context;
 	struct mstorm_iscsi_task_st_ctx mstorm_st_context;
 	struct ustorm_iscsi_task_st_ctx ustorm_st_context;
 	struct rdif_task_context rdif_context;
@@ -1431,73 +1431,73 @@ struct ystorm_iscsi_stats_drv {
 	struct regpair iscsi_tx_tcp_pkt_cnt;
 };
 
-struct e4_tstorm_iscsi_task_ag_ctx {
+struct tstorm_iscsi_task_ag_ctx {
 	u8 byte0;
 	u8 byte1;
 	__le16 word0;
 	u8 flags0;
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK	0xF
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT	0
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT		4
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT		5
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT		6
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT		7
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_MASK	0xF
+#define TSTORM_ISCSI_TASK_AG_CTX_NIBBLE0_SHIFT	0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT0_SHIFT		4
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT1_SHIFT		5
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT2_SHIFT		6
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT3_SHIFT		7
 	u8 flags1;
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT	0
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT	1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT	2
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT	4
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT	6
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT4_SHIFT	0
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_BIT5_SHIFT	1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0_SHIFT	2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1_SHIFT	4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2_SHIFT	6
 	u8 flags2;
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT	0
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT	2
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT	4
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT	6
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3_SHIFT	0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4_SHIFT	2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5_SHIFT	4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6_SHIFT	6
 	u8 flags3;
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK	0x3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT	0
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT	2
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT	3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT	4
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT	5
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT	6
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT	7
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_MASK	0x3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7_SHIFT	0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF0EN_SHIFT	2
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF1EN_SHIFT	3
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF2EN_SHIFT	4
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF3EN_SHIFT	5
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF4EN_SHIFT	6
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF5EN_SHIFT	7
 	u8 flags4;
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT		0
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK		0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT		1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	2
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	3
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	4
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	5
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	6
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
-#define E4_TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	7
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF6EN_SHIFT		0
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_MASK		0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_CF7EN_SHIFT		1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE0EN_SHIFT	2
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE1EN_SHIFT	3
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE2EN_SHIFT	4
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE3EN_SHIFT	5
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE4EN_SHIFT	6
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_MASK	0x1
+#define TSTORM_ISCSI_TASK_AG_CTX_RULE5EN_SHIFT	7
 	u8 byte2;
 	__le16 word1;
 	__le32 reg0;
diff --git a/include/linux/qed/nvmetcp_common.h b/include/linux/qed/nvmetcp_common.h
index 5a2ab0606308..cc7c7481a0e0 100644
--- a/include/linux/qed/nvmetcp_common.h
+++ b/include/linux/qed/nvmetcp_common.h
@@ -410,7 +410,7 @@ struct e5_ystorm_nvmetcp_task_ag_ctx {
 	u8 byte2;
 	u8 byte3;
 	u8 byte4;
-	u8 e4_reserved7;
+	u8 reserved7;
 };
 
 struct e5_mstorm_nvmetcp_task_ag_ctx {
@@ -445,7 +445,7 @@ struct e5_mstorm_nvmetcp_task_ag_ctx {
 	u8 byte2;
 	u8 byte3;
 	u8 byte4;
-	u8 e4_reserved7;
+	u8 reserved7;
 };
 
 struct e5_ustorm_nvmetcp_task_ag_ctx {
@@ -489,17 +489,17 @@ struct e5_ustorm_nvmetcp_task_ag_ctx {
 #define E5_USTORM_NVMETCP_TASK_AG_CTX_CMP_CONT_RCV_EXP_EN_SHIFT 7
 	u8 flags3;
 	u8 flags4;
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_MASK 0x3
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED5_SHIFT 0
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_MASK 0x1
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED6_SHIFT 2
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_MASK 0x1
-#define E5_USTORM_NVMETCP_TASK_AG_CTX_E4_RESERVED7_SHIFT 3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED5_MASK 0x3
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED5_SHIFT 0
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED6_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED6_SHIFT 2
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED7_MASK 0x1
+#define E5_USTORM_NVMETCP_TASK_AG_CTX_RESERVED7_SHIFT 3
 #define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_MASK 0xF
 #define E5_USTORM_NVMETCP_TASK_AG_CTX_DIF_ERROR_TYPE_SHIFT 4
 	u8 byte2;
 	u8 byte3;
-	u8 e4_reserved8;
+	u8 reserved8;
 	__le32 dif_err_intervals;
 	__le32 dif_error_1st_interval;
 	__le32 rcv_cont_len;
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index f39451aaaeec..4dcd0d37a521 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -588,7 +588,7 @@ enum qed_int_mode {
 };
 
 struct qed_sb_info {
-	struct status_block_e4 *sb_virt;
+	struct status_block *sb_virt;
 	dma_addr_t sb_phys;
 	u32 sb_ack; /* Last given ack */
 	u16 igu_sb_id;
@@ -613,7 +613,6 @@ enum qed_hw_err_type {
 enum qed_dev_type {
 	QED_DEV_TYPE_BB,
 	QED_DEV_TYPE_AH,
-	QED_DEV_TYPE_E5,
 };
 
 struct qed_dev_info {
@@ -1411,7 +1410,7 @@ static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info)
 	u16 rc = 0;
 
 	prod = le32_to_cpu(sb_info->sb_virt->prod_index) &
-	       STATUS_BLOCK_E4_PROD_INDEX_MASK;
+	       STATUS_BLOCK_PROD_INDEX_MASK;
 	if (sb_info->sb_ack != prod) {
 		sb_info->sb_ack = prod;
 		rc |= QED_SB_IDX;
-- 
cgit v1.2.3


From 484563e230a8c68ab342080b41b5a5e2ce9621ef Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Mon, 4 Oct 2021 09:58:42 +0300
Subject: qed: Update common_hsi for FW ver 8.59.1.0

The common_hsi.h has been updated for FW version 8.59.1.0 with below
changes.
  - FW and Tools version.
  - New structures related to search table, packet duplication.
  - Structure for doorbell address for legacy mode without DEM.
  - Enhanced union rdma_eqe_data for RoCE Suspend Event Data.
  - New defines.

This patch also fixes the existing checkpatch warnings and few important
checks.

Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |   2 +-
 include/linux/qed/common_hsi.h            | 113 +++++++++++++++++++++++++++---
 2 files changed, 106 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 987b086811b7..68eaef8ab6e8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -1093,7 +1093,7 @@ enum malicious_vf_error_id {
 /* Mstorm non-triggering VF zone */
 struct mstorm_non_trigger_vf_zone {
 	struct eth_mstorm_per_queue_stat eth_queue_stat;
-	struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD];
+	struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_RXQ_VF_QUAD];
 };
 
 /* Mstorm VF zone */
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 3742d1f7d1f7..827624840ee2 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
 /* QLogic qed NIC Driver
  * Copyright (c) 2015-2016  QLogic Corporation
- * Copyright (c) 2019-2020 Marvell International Ltd.
+ * Copyright (c) 2019-2021 Marvell International Ltd.
  */
 
 #ifndef _COMMON_HSI_H
@@ -47,10 +47,10 @@
 #define ISCSI_CDU_TASK_SEG_TYPE			0
 #define FCOE_CDU_TASK_SEG_TYPE			0
 #define RDMA_CDU_TASK_SEG_TYPE			1
+#define ETH_CDU_TASK_SEG_TYPE			2
 
 #define FW_ASSERT_GENERAL_ATTN_IDX		32
 
-
 /* Queue Zone sizes in bytes */
 #define TSTORM_QZONE_SIZE	8
 #define MSTORM_QZONE_SIZE	16
@@ -60,9 +60,12 @@
 #define PSTORM_QZONE_SIZE	0
 
 #define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG		7
-#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT	16
-#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE	48
-#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD	112
+#define ETH_MAX_RXQ_VF_DEFAULT 16
+#define ETH_MAX_RXQ_VF_DOUBLE 48
+#define ETH_MAX_RXQ_VF_QUAD 112
+
+#define ETH_RGSRC_CTX_SIZE			6
+#define ETH_TGSRC_CTX_SIZE			6
 
 /********************************/
 /* CORE (LIGHT L2) FW CONSTANTS */
@@ -89,8 +92,8 @@
 #define MAX_NUM_LL2_TX_STATS_COUNTERS  48
 
 #define FW_MAJOR_VERSION	8
-#define FW_MINOR_VERSION	42
-#define FW_REVISION_VERSION	2
+#define FW_MINOR_VERSION	59
+#define FW_REVISION_VERSION	1
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -112,6 +115,7 @@
 #define MAX_NUM_VFS	(MAX_NUM_VFS_K2)
 
 #define MAX_NUM_FUNCTIONS_BB	(MAX_NUM_PFS_BB + MAX_NUM_VFS_BB)
+#define MAX_NUM_FUNCTIONS_K2    (MAX_NUM_PFS_K2 + MAX_NUM_VFS_K2)
 
 #define MAX_FUNCTION_NUMBER_BB	(MAX_NUM_PFS + MAX_NUM_VFS_BB)
 #define MAX_FUNCTION_NUMBER_K2  (MAX_NUM_PFS + MAX_NUM_VFS_K2)
@@ -144,7 +148,7 @@
 #define GTT_DWORD_SIZE		BIT(GTT_DWORD_SIZE_BITS)
 
 /* Tools Version */
-#define TOOLS_VERSION	10
+#define TOOLS_VERSION 11
 
 /*****************/
 /* CDU CONSTANTS */
@@ -162,6 +166,7 @@
 #define CDU_CONTEXT_VALIDATION_CFG_USE_REGION			(3)
 #define CDU_CONTEXT_VALIDATION_CFG_USE_CID			(4)
 #define CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE			(5)
+#define CDU_CONTEXT_VALIDATION_DEFAULT_CFG			(0x3d)
 
 /*****************/
 /* DQ CONSTANTS  */
@@ -302,6 +307,9 @@
 /* PWM address mapping */
 #define DQ_PWM_OFFSET_DPM_BASE		0x0
 #define DQ_PWM_OFFSET_DPM_END		0x27
+#define DQ_PWM_OFFSET_XCM32_24ICID_BASE 0x28
+#define DQ_PWM_OFFSET_UCM32_24ICID_BASE 0x30
+#define DQ_PWM_OFFSET_TCM32_24ICID_BASE 0x38
 #define DQ_PWM_OFFSET_XCM16_BASE	0x40
 #define DQ_PWM_OFFSET_XCM32_BASE	0x44
 #define DQ_PWM_OFFSET_UCM16_BASE	0x48
@@ -325,6 +333,13 @@
 #define DQ_PWM_OFFSET_TCM_LL2_PROD_UPDATE \
 	(DQ_PWM_OFFSET_TCM32_BASE + DQ_TCM_AGG_VAL_SEL_REG9 - 4)
 
+#define DQ_PWM_OFFSET_XCM_RDMA_24B_ICID_SQ_PROD \
+	(DQ_PWM_OFFSET_XCM32_24ICID_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_24B_ICID_CQ_CONS_32BIT \
+	(DQ_PWM_OFFSET_UCM32_24ICID_BASE + 4)
+#define DQ_PWM_OFFSET_TCM_ROCE_24B_ICID_RQ_PROD \
+	(DQ_PWM_OFFSET_TCM32_24ICID_BASE + 1)
+
 #define	DQ_REGION_SHIFT			(12)
 
 /* DPM */
@@ -360,6 +375,7 @@
 
 /* Number of global Vport/QCN rate limiters */
 #define MAX_QM_GLOBAL_RLS	256
+#define COMMON_MAX_QM_GLOBAL_RLS MAX_QM_GLOBAL_RLS
 
 /* QM registers data */
 #define QM_LINE_CRD_REG_WIDTH		16
@@ -700,6 +716,13 @@ enum mf_mode {
 	MAX_MF_MODE
 };
 
+/* Per protocol packet duplication enable bit vector. If set, duplicate
+ * offloaded traffic to LL2 debug queueu.
+ */
+struct offload_pkt_dup_enable {
+	__le16 enable_vector;
+};
+
 /* Per-protocol connection types */
 enum protocol_type {
 	PROTOCOLID_TCP_ULP,
@@ -717,6 +740,12 @@ enum protocol_type {
 	MAX_PROTOCOL_TYPE
 };
 
+/* Pstorm packet duplication config */
+struct pstorm_pkt_dup_cfg {
+	struct offload_pkt_dup_enable enable;
+	__le16 reserved[3];
+};
+
 struct regpair {
 	__le32 lo;
 	__le32 hi;
@@ -728,10 +757,24 @@ struct rdma_eqe_destroy_qp {
 	u8 reserved[4];
 };
 
+/* RoCE Suspend Event Data */
+struct rdma_eqe_suspend_qp {
+	__le32 cid;
+	u8 reserved[4];
+};
+
 /* RDMA Event Data Union */
 union rdma_eqe_data {
 	struct regpair async_handle;
 	struct rdma_eqe_destroy_qp rdma_destroy_qp_data;
+	struct rdma_eqe_suspend_qp rdma_suspend_qp_data;
+};
+
+/* Tstorm packet duplication config */
+struct tstorm_pkt_dup_cfg {
+	struct offload_pkt_dup_enable enable;
+	__le16 reserved;
+	__le32 cid;
 };
 
 struct tstorm_queue_zone {
@@ -891,6 +934,15 @@ struct db_legacy_addr {
 #define DB_LEGACY_ADDR_ICID_SHIFT	5
 };
 
+/* Structure for doorbell address, in legacy mode, without DEMS */
+struct db_legacy_wo_dems_addr {
+	__le32 addr;
+#define DB_LEGACY_WO_DEMS_ADDR_RESERVED0_MASK   0x3
+#define DB_LEGACY_WO_DEMS_ADDR_RESERVED0_SHIFT  0
+#define DB_LEGACY_WO_DEMS_ADDR_ICID_MASK        0x3FFFFFFF
+#define DB_LEGACY_WO_DEMS_ADDR_ICID_SHIFT       2
+};
+
 /* Structure for doorbell address, in PWM mode */
 struct db_pwm_addr {
 	__le32 addr;
@@ -906,6 +958,31 @@ struct db_pwm_addr {
 #define DB_PWM_ADDR_RESERVED1_SHIFT	28
 };
 
+/* Parameters to RDMA firmware, passed in EDPM doorbell */
+struct db_rdma_24b_icid_dpm_params {
+	__le32 params;
+#define DB_RDMA_24B_ICID_DPM_PARAMS_SIZE_MASK   0x3F
+#define DB_RDMA_24B_ICID_DPM_PARAMS_SIZE_SHIFT  0
+#define DB_RDMA_24B_ICID_DPM_PARAMS_DPM_TYPE_MASK       0x3
+#define DB_RDMA_24B_ICID_DPM_PARAMS_DPM_TYPE_SHIFT      6
+#define DB_RDMA_24B_ICID_DPM_PARAMS_OPCODE_MASK 0xFF
+#define DB_RDMA_24B_ICID_DPM_PARAMS_OPCODE_SHIFT        8
+#define DB_RDMA_24B_ICID_DPM_PARAMS_ICID_EXT_MASK       0xFF
+#define DB_RDMA_24B_ICID_DPM_PARAMS_ICID_EXT_SHIFT      16
+#define DB_RDMA_24B_ICID_DPM_PARAMS_INV_BYTE_CNT_MASK   0x7
+#define DB_RDMA_24B_ICID_DPM_PARAMS_INV_BYTE_CNT_SHIFT  24
+#define DB_RDMA_24B_ICID_DPM_PARAMS_EXT_ICID_MODE_EN_MASK       0x1
+#define DB_RDMA_24B_ICID_DPM_PARAMS_EXT_ICID_MODE_EN_SHIFT      27
+#define DB_RDMA_24B_ICID_DPM_PARAMS_COMPLETION_FLG_MASK 0x1
+#define DB_RDMA_24B_ICID_DPM_PARAMS_COMPLETION_FLG_SHIFT        28
+#define DB_RDMA_24B_ICID_DPM_PARAMS_S_FLG_MASK  0x1
+#define DB_RDMA_24B_ICID_DPM_PARAMS_S_FLG_SHIFT 29
+#define DB_RDMA_24B_ICID_DPM_PARAMS_RESERVED1_MASK      0x1
+#define DB_RDMA_24B_ICID_DPM_PARAMS_RESERVED1_SHIFT     30
+#define DB_RDMA_24B_ICID_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK     0x1
+#define DB_RDMA_24B_ICID_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT    31
+};
+
 /* Parameters to RDMA firmware, passed in EDPM doorbell */
 struct db_rdma_dpm_params {
 	__le32 params;
@@ -1220,6 +1297,26 @@ struct rdif_task_context {
 	__le32 reserved2;
 };
 
+/* Searcher Table struct */
+struct src_entry_header {
+	__le32 flags;
+#define SRC_ENTRY_HEADER_NEXT_PTR_TYPE_MASK     0x1
+#define SRC_ENTRY_HEADER_NEXT_PTR_TYPE_SHIFT    0
+#define SRC_ENTRY_HEADER_EMPTY_MASK     0x1
+#define SRC_ENTRY_HEADER_EMPTY_SHIFT    1
+#define SRC_ENTRY_HEADER_RESERVED_MASK  0x3FFFFFFF
+#define SRC_ENTRY_HEADER_RESERVED_SHIFT 2
+	__le32 magic_number;
+	struct regpair next_ptr;
+};
+
+/* Enumeration for address type */
+enum src_header_next_ptr_type_enum {
+	e_physical_addr,
+	e_logical_addr,
+	MAX_SRC_HEADER_NEXT_PTR_TYPE_ENUM
+};
+
 /* Status block structure */
 struct status_block {
 	__le16	pi_array[PIS_PER_SB];
-- 
cgit v1.2.3


From fe40a830dcded26f012739fd6dac0da9c805bc38 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Mon, 4 Oct 2021 09:58:44 +0300
Subject: qed: Update qed_hsi.h for fw 8.59.1.0

The qed_hsi.h has been updated to support new FW version 8.59.1.0 with
changes.
 - Updates FW HSI (Hardware Software interface) structures.
 - Addition/update in function declaration and defines as per HSI.
 - Add generic infrastructure for FW error reporting as part of
   common event queue handling.
 - Move malicious VF error reporting to FW error reporting
   infrastructure.
 - Move consolidation queue initialization from FW context to ramrod
   message.

qed_hsi.h header file changes lead to change in many files to ensure
compilation.

This patch also fixes the existing checkpatch warnings and few important
checks.

Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c          |  112 +-
 drivers/net/ethernet/qlogic/qed/qed_hsi.h          | 1556 +++++++++++++++++---
 .../net/ethernet/qlogic/qed/qed_init_fw_funcs.c    |   14 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c           |    6 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.h           |    1 -
 drivers/net/ethernet/qlogic/qed/qed_sp.h           |    8 +-
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c  |   10 +-
 drivers/net/ethernet/qlogic/qed/qed_spq.c          |   50 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.c        |  112 +-
 drivers/net/ethernet/qlogic/qed/qed_sriov.h        |   27 +-
 include/linux/qed/eth_common.h                     |    1 +
 include/linux/qed/rdma_common.h                    |    1 +
 12 files changed, 1590 insertions(+), 308 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 3db1a5512b9b..dad5cd219b0e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1397,12 +1397,13 @@ void qed_resc_free(struct qed_dev *cdev)
 			qed_rdma_info_free(p_hwfn);
 		}
 
+		qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_COMMON);
 		qed_iov_free(p_hwfn);
 		qed_l2_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn);
 		qed_dbg_user_data_free(p_hwfn);
-		qed_fw_overlay_mem_free(p_hwfn, p_hwfn->fw_overlay_mem);
+		qed_fw_overlay_mem_free(p_hwfn, &p_hwfn->fw_overlay_mem);
 
 		/* Destroy doorbell recovery mechanism */
 		qed_db_recovery_teardown(p_hwfn);
@@ -1484,8 +1485,8 @@ static u16 qed_init_qm_get_num_pf_rls(struct qed_hwfn *p_hwfn)
 	u16 num_pf_rls, num_vfs = qed_init_qm_get_num_vfs(p_hwfn);
 
 	/* num RLs can't exceed resource amount of rls or vports */
-	num_pf_rls = (u16) min_t(u32, RESC_NUM(p_hwfn, QED_RL),
-				 RESC_NUM(p_hwfn, QED_VPORT));
+	num_pf_rls = (u16)min_t(u32, RESC_NUM(p_hwfn, QED_RL),
+				RESC_NUM(p_hwfn, QED_VPORT));
 
 	/* Make sure after we reserve there's something left */
 	if (num_pf_rls < num_vfs + NUM_DEFAULT_RLS)
@@ -1533,8 +1534,8 @@ static void qed_init_qm_params(struct qed_hwfn *p_hwfn)
 	bool four_port;
 
 	/* pq and vport bases for this PF */
-	qm_info->start_pq = (u16) RESC_START(p_hwfn, QED_PQ);
-	qm_info->start_vport = (u8) RESC_START(p_hwfn, QED_VPORT);
+	qm_info->start_pq = (u16)RESC_START(p_hwfn, QED_PQ);
+	qm_info->start_vport = (u8)RESC_START(p_hwfn, QED_VPORT);
 
 	/* rate limiting and weighted fair queueing are always enabled */
 	qm_info->vport_rl_en = true;
@@ -1629,9 +1630,9 @@ static void qed_init_qm_advance_vport(struct qed_hwfn *p_hwfn)
  */
 
 /* flags for pq init */
-#define PQ_INIT_SHARE_VPORT     (1 << 0)
-#define PQ_INIT_PF_RL           (1 << 1)
-#define PQ_INIT_VF_RL           (1 << 2)
+#define PQ_INIT_SHARE_VPORT     BIT(0)
+#define PQ_INIT_PF_RL           BIT(1)
+#define PQ_INIT_VF_RL           BIT(2)
 
 /* defines for pq init */
 #define PQ_INIT_DEFAULT_WRR_GROUP       1
@@ -2291,7 +2292,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			goto alloc_no_mem;
 		}
 
-		rc = qed_eq_alloc(p_hwfn, (u16) n_eqes);
+		rc = qed_eq_alloc(p_hwfn, (u16)n_eqes);
 		if (rc)
 			goto alloc_err;
 
@@ -2376,6 +2377,49 @@ alloc_err:
 	return rc;
 }
 
+static int qed_fw_err_handler(struct qed_hwfn *p_hwfn,
+			      u8 opcode,
+			      u16 echo,
+			      union event_ring_data *data, u8 fw_return_code)
+{
+	if (fw_return_code != COMMON_ERR_CODE_ERROR)
+		goto eqe_unexpected;
+
+	if (data->err_data.recovery_scope == ERR_SCOPE_FUNC &&
+	    le16_to_cpu(data->err_data.entity_id) >= MAX_NUM_PFS) {
+		qed_sriov_vfpf_malicious(p_hwfn, &data->err_data);
+		return 0;
+	}
+
+eqe_unexpected:
+	DP_ERR(p_hwfn,
+	       "Skipping unexpected eqe 0x%02x, FW return code 0x%x, echo 0x%x\n",
+	       opcode, fw_return_code, echo);
+	return -EINVAL;
+}
+
+static int qed_common_eqe_event(struct qed_hwfn *p_hwfn,
+				u8 opcode,
+				__le16 echo,
+				union event_ring_data *data,
+				u8 fw_return_code)
+{
+	switch (opcode) {
+	case COMMON_EVENT_VF_PF_CHANNEL:
+	case COMMON_EVENT_VF_FLR:
+		return qed_sriov_eqe_event(p_hwfn, opcode, echo, data,
+					   fw_return_code);
+	case COMMON_EVENT_FW_ERROR:
+		return qed_fw_err_handler(p_hwfn, opcode,
+					  le16_to_cpu(echo), data,
+					  fw_return_code);
+	default:
+		DP_INFO(p_hwfn->cdev, "Unknown eqe event 0x%02x, echo 0x%x\n",
+			opcode, echo);
+		return -EINVAL;
+	}
+}
+
 void qed_resc_setup(struct qed_dev *cdev)
 {
 	int i;
@@ -2404,6 +2448,8 @@ void qed_resc_setup(struct qed_dev *cdev)
 
 		qed_l2_setup(p_hwfn);
 		qed_iov_setup(p_hwfn);
+		qed_spq_register_async_cb(p_hwfn, PROTOCOLID_COMMON,
+					  qed_common_eqe_event);
 #ifdef CONFIG_QED_LL2
 		if (p_hwfn->using_ll2)
 			qed_ll2_setup(p_hwfn);
@@ -2593,7 +2639,7 @@ static void qed_init_cache_line_size(struct qed_hwfn *p_hwfn,
 			cache_line_size);
 	}
 
-	if (L1_CACHE_BYTES > wr_mbs)
+	if (wr_mbs < L1_CACHE_BYTES)
 		DP_INFO(p_hwfn,
 			"The cache line size for padding is suboptimal for performance [OS cache line size 0x%x, wr mbs 0x%x]\n",
 			L1_CACHE_BYTES, wr_mbs);
@@ -2609,13 +2655,21 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 			      struct qed_ptt *p_ptt, int hw_mode)
 {
 	struct qed_qm_info *qm_info = &p_hwfn->qm_info;
-	struct qed_qm_common_rt_init_params params;
+	struct qed_qm_common_rt_init_params *params;
 	struct qed_dev *cdev = p_hwfn->cdev;
 	u8 vf_id, max_num_vfs;
 	u16 num_pfs, pf_id;
 	u32 concrete_fid;
 	int rc = 0;
 
+	params = kzalloc(sizeof(*params), GFP_KERNEL);
+	if (!params) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "Failed to allocate common init params\n");
+
+		return -ENOMEM;
+	}
+
 	qed_init_cau_rt_data(cdev);
 
 	/* Program GTT windows */
@@ -2628,16 +2682,15 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 			qm_info->pf_wfq_en = true;
 	}
 
-	memset(&params, 0, sizeof(params));
-	params.max_ports_per_engine = p_hwfn->cdev->num_ports_in_engine;
-	params.max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
-	params.pf_rl_en = qm_info->pf_rl_en;
-	params.pf_wfq_en = qm_info->pf_wfq_en;
-	params.global_rl_en = qm_info->vport_rl_en;
-	params.vport_wfq_en = qm_info->vport_wfq_en;
-	params.port_params = qm_info->qm_port_params;
+	params->max_ports_per_engine = p_hwfn->cdev->num_ports_in_engine;
+	params->max_phys_tcs_per_port = qm_info->max_phys_tcs_per_port;
+	params->pf_rl_en = qm_info->pf_rl_en;
+	params->pf_wfq_en = qm_info->pf_wfq_en;
+	params->global_rl_en = qm_info->vport_rl_en;
+	params->vport_wfq_en = qm_info->vport_wfq_en;
+	params->port_params = qm_info->qm_port_params;
 
-	qed_qm_common_rt_init(p_hwfn, &params);
+	qed_qm_common_rt_init(p_hwfn, params);
 
 	qed_cxt_hw_init_common(p_hwfn);
 
@@ -2645,7 +2698,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 
 	rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode);
 	if (rc)
-		return rc;
+		goto out;
 
 	qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0);
 	qed_wr(p_hwfn, p_ptt, PGLUE_B_REG_USE_CLIENTID_IN_TAG, 1);
@@ -2664,7 +2717,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	max_num_vfs = QED_IS_AH(cdev) ? MAX_NUM_VFS_K2 : MAX_NUM_VFS_BB;
 	for (vf_id = 0; vf_id < max_num_vfs; vf_id++) {
 		concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id);
-		qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid);
+		qed_fid_pretend(p_hwfn, p_ptt, (u16)concrete_fid);
 		qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1);
 		qed_wr(p_hwfn, p_ptt, CCFC_REG_WEAK_ENABLE_VF, 0x0);
 		qed_wr(p_hwfn, p_ptt, TCFC_REG_STRONG_ENABLE_VF, 0x1);
@@ -2673,6 +2726,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn,
 	/* pretend to original PF */
 	qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
 
+out:
+	kfree(params);
+
 	return rc;
 }
 
@@ -2785,7 +2841,7 @@ qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 			qed_rdma_dpm_bar(p_hwfn, p_ptt);
 	}
 
-	p_hwfn->wid_count = (u16) n_cpus;
+	p_hwfn->wid_count = (u16)n_cpus;
 
 	DP_INFO(p_hwfn,
 		"doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s, page_size=%lu\n",
@@ -3504,8 +3560,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn)
 static void get_function_id(struct qed_hwfn *p_hwfn)
 {
 	/* ME Register */
-	p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn,
-						  PXP_PF_ME_OPAQUE_ADDR);
+	p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn,
+						 PXP_PF_ME_OPAQUE_ADDR);
 
 	p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR);
 
@@ -3671,12 +3727,14 @@ u32 qed_get_hsi_def_val(struct qed_dev *cdev, enum qed_hsi_def_type type)
 
 	return qed_hsi_def_val[type][chip_id];
 }
+
 static int
 qed_hw_set_soft_resc_size(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 resc_max_val, mcp_resp;
 	u8 res_id;
 	int rc;
+
 	for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
 		switch (res_id) {
 		case QED_LL2_RAM_QUEUE:
@@ -3922,7 +3980,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	 * resources allocation queries should be atomic. Since several PFs can
 	 * run in parallel - a resource lock is needed.
 	 * If either the resource lock or resource set value commands are not
-	 * supported - skip the the max values setting, release the lock if
+	 * supported - skip the max values setting, release the lock if
 	 * needed, and proceed to the queries. Other failures, including a
 	 * failure to acquire the lock, will cause this function to fail.
 	 */
@@ -4776,7 +4834,7 @@ int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id)
 	if (src_id >= RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
 		u16 min, max;
 
-		min = (u16) RESC_START(p_hwfn, QED_L2_QUEUE);
+		min = (u16)RESC_START(p_hwfn, QED_L2_QUEUE);
 		max = min + RESC_NUM(p_hwfn, QED_L2_QUEUE);
 		DP_NOTICE(p_hwfn,
 			  "l2_queue id [%d] is not valid, available indices [%d - %d]\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 68eaef8ab6e8..f2cedbd9489c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
 /* QLogic qed NIC Driver
  * Copyright (c) 2015-2017  QLogic Corporation
- * Copyright (c) 2019-2020 Marvell International Ltd.
+ * Copyright (c) 2019-2021 Marvell International Ltd.
  */
 
 #ifndef _QED_HSI_H
@@ -38,7 +38,7 @@ enum common_event_opcode {
 	COMMON_EVENT_VF_PF_CHANNEL,
 	COMMON_EVENT_VF_FLR,
 	COMMON_EVENT_PF_UPDATE,
-	COMMON_EVENT_MALICIOUS_VF,
+	COMMON_EVENT_FW_ERROR,
 	COMMON_EVENT_RL_UPDATE,
 	COMMON_EVENT_EMPTY,
 	MAX_COMMON_EVENT_OPCODE
@@ -84,6 +84,13 @@ enum core_l4_pseudo_checksum_mode {
 	MAX_CORE_L4_PSEUDO_CHECKSUM_MODE
 };
 
+/* LL2 SP error code */
+enum core_ll2_error_code {
+	LL2_OK = 0,
+	LL2_ERROR,
+	MAX_CORE_LL2_ERROR_CODE
+};
+
 /* Light-L2 RX Producers in Tstorm RAM */
 struct core_ll2_port_stats {
 	struct regpair gsi_invalid_hdr;
@@ -123,6 +130,15 @@ struct core_ll2_ustorm_per_queue_stat {
 	struct regpair rcv_bcast_pkts;
 };
 
+struct core_ll2_rx_per_queue_stat {
+	struct core_ll2_tstorm_per_queue_stat tstorm_stat;
+	struct core_ll2_ustorm_per_queue_stat ustorm_stat;
+};
+
+struct core_ll2_tx_per_queue_stat {
+	struct core_ll2_pstorm_per_queue_stat pstorm_stat;
+};
+
 /* Structure for doorbell data, in PWM mode, for RX producers update. */
 struct core_pwm_prod_update_data {
 	__le16 icid; /* internal CID */
@@ -135,6 +151,15 @@ struct core_pwm_prod_update_data {
 	struct core_ll2_rx_prod prod; /* Producers */
 };
 
+/* Ramrod data for rx/tx queue statistics query ramrod */
+struct core_queue_stats_query_ramrod_data {
+	u8 rx_stat;
+	u8 tx_stat;
+	__le16 reserved[3];
+	struct regpair rx_stat_addr;
+	struct regpair tx_stat_addr;
+};
+
 /* Core Ramrod Command IDs (light L2) */
 enum core_ramrod_cmd_id {
 	CORE_RAMROD_UNUSED,
@@ -210,7 +235,8 @@ struct core_rx_fast_path_cqe {
 	__le16 vlan;
 	struct core_rx_cqe_opaque_data opaque_data;
 	struct parsing_err_flags err_flags;
-	__le16 reserved0;
+	u8 packet_source;
+	u8 reserved0;
 	__le32 reserved1[3];
 };
 
@@ -226,7 +252,8 @@ struct core_rx_gsi_offload_cqe {
 	__le16 qp_id;
 	__le32 src_qp;
 	struct core_rx_cqe_opaque_data opaque_data;
-	__le32 reserved;
+	u8 packet_source;
+	u8 reserved[3];
 };
 
 /* Core RX CQE for Light L2 */
@@ -245,6 +272,15 @@ union core_rx_cqe_union {
 	struct core_rx_slow_path_cqe rx_cqe_sp;
 };
 
+/* RX packet source. */
+enum core_rx_pkt_source {
+	CORE_RX_PKT_SOURCE_NETWORK = 0,
+	CORE_RX_PKT_SOURCE_LB,
+	CORE_RX_PKT_SOURCE_TX,
+	CORE_RX_PKT_SOURCE_LL2_TX,
+	MAX_CORE_RX_PKT_SOURCE
+};
+
 /* Ramrod data for rx queue start ramrod */
 struct core_rx_start_ramrod_data {
 	struct regpair bd_base;
@@ -362,7 +398,7 @@ struct core_tx_update_ramrod_data {
 	u8 update_qm_pq_id_flg;
 	u8 reserved0;
 	__le16 qm_pq_id;
-	__le32 reserved1;
+	__le32 reserved1[1];
 };
 
 /* Enum flag for what type of dcb data to update */
@@ -386,12 +422,10 @@ struct pstorm_core_conn_st_ctx {
 
 /* Core Slowpath Connection storm context of Xstorm */
 struct xstorm_core_conn_st_ctx {
-	__le32 spq_base_lo;
-	__le32 spq_base_hi;
-	struct regpair consolid_base_addr;
+	struct regpair spq_base_addr;
+	__le32 reserved0[2];
 	__le16 spq_cons;
-	__le16 consolid_cons;
-	__le32 reserved0[55];
+	__le16 reserved1[111];
 };
 
 struct xstorm_core_conn_ag_ctx {
@@ -930,12 +964,12 @@ struct eth_rx_rate_limit {
 
 /* Update RSS indirection table entry command */
 struct eth_tstorm_rss_update_data {
-	u8 valid;
 	u8 vport_id;
 	u8 ind_table_index;
-	u8 reserved;
 	__le16 ind_table_value;
 	__le16 reserved1;
+	u8 reserved;
+	u8 valid;
 };
 
 struct eth_ustorm_per_pf_stat {
@@ -967,19 +1001,20 @@ struct vf_pf_channel_eqe_data {
 	struct regpair msg_addr;
 };
 
-/* Event Ring malicious VF data */
-struct malicious_vf_eqe_data {
-	u8 vf_id;
-	u8 err_id;
-	__le16 reserved[3];
-};
-
 /* Event Ring initial cleanup data */
 struct initial_cleanup_eqe_data {
 	u8 vf_id;
 	u8 reserved[7];
 };
 
+/* FW error data */
+struct fw_err_data {
+	u8 recovery_scope;
+	u8 err_id;
+	__le16 entity_id;
+	u8 reserved[4];
+};
+
 /* Event Data Union */
 union event_ring_data {
 	u8 bytes[8];
@@ -987,8 +1022,8 @@ union event_ring_data {
 	struct iscsi_eqe_data iscsi_info;
 	struct iscsi_connect_done_results iscsi_conn_done_info;
 	union rdma_eqe_data rdma_data;
-	struct malicious_vf_eqe_data malicious_vf;
 	struct initial_cleanup_eqe_data vf_init_cleanup;
+	struct fw_err_data err_data;
 };
 
 /* Event Ring Entry */
@@ -1042,6 +1077,15 @@ struct hsi_fp_ver_struct {
 	u8 major_ver_arr[2];
 };
 
+/* Integration Phase */
+enum integ_phase {
+	INTEG_PHASE_BB_A0_LATEST = 3,
+	INTEG_PHASE_BB_B0_NO_MCP = 10,
+	INTEG_PHASE_BB_B0_WITH_MCP = 11,
+	MAX_INTEG_PHASE
+};
+
+/* Ports mode */
 enum iwarp_ll2_tx_queues {
 	IWARP_LL2_IN_ORDER_TX_QUEUE = 1,
 	IWARP_LL2_ALIGNED_TX_QUEUE,
@@ -1050,9 +1094,9 @@ enum iwarp_ll2_tx_queues {
 	MAX_IWARP_LL2_TX_QUEUES
 };
 
-/* Malicious VF error ID */
-enum malicious_vf_error_id {
-	MALICIOUS_VF_NO_ERROR,
+/* Function error ID */
+enum func_err_id {
+	FUNC_NO_ERROR,
 	VF_PF_CHANNEL_NOT_READY,
 	VF_ZONE_MSG_NOT_VALID,
 	VF_ZONE_FUNC_NOT_ENABLED,
@@ -1087,7 +1131,27 @@ enum malicious_vf_error_id {
 	CORE_PACKET_SIZE_TOO_LARGE,
 	CORE_ILLEGAL_BD_FLAGS,
 	CORE_GSI_PACKET_VIOLATION,
-	MAX_MALICIOUS_VF_ERROR_ID,
+	MAX_FUNC_ERR_ID
+};
+
+/* FW error handling mode */
+enum fw_err_mode {
+	FW_ERR_FATAL_ASSERT,
+	FW_ERR_DRV_REPORT,
+	MAX_FW_ERR_MODE
+};
+
+/* FW error recovery scope */
+enum fw_err_recovery_scope {
+	ERR_SCOPE_INVALID,
+	ERR_SCOPE_TX_Q,
+	ERR_SCOPE_RX_Q,
+	ERR_SCOPE_QP,
+	ERR_SCOPE_VPORT,
+	ERR_SCOPE_FUNC,
+	ERR_SCOPE_PORT,
+	ERR_SCOPE_ENGINE,
+	MAX_FW_ERR_RECOVERY_SCOPE
 };
 
 /* Mstorm non-triggering VF zone */
@@ -1148,7 +1212,7 @@ struct pf_start_tunnel_config {
 /* Ramrod data for PF start ramrod */
 struct pf_start_ramrod_data {
 	struct regpair event_ring_pbl_addr;
-	struct regpair consolid_q_pbl_addr;
+	struct regpair consolid_q_pbl_base_addr;
 	struct pf_start_tunnel_config tunnel_config;
 	__le16 event_ring_sb_id;
 	u8 base_vf_id;
@@ -1166,6 +1230,9 @@ struct pf_start_ramrod_data {
 	u8 reserved0;
 	struct hsi_fp_ver_struct hsi_fp_ver;
 	struct outer_tag_config_struct outer_tag_config;
+	u8 pf_fp_err_mode;
+	u8 consolid_q_num_pages;
+	u8 reserved[6];
 };
 
 /* Data for port update ramrod */
@@ -1230,6 +1297,13 @@ enum ports_mode {
 	MAX_PORTS_MODE
 };
 
+/* Protocol-common error code */
+enum protocol_common_error_code {
+	COMMON_ERR_CODE_OK = 0,
+	COMMON_ERR_CODE_ERROR,
+	MAX_PROTOCOL_COMMON_ERROR_CODE
+};
+
 /* use to index in hsi_fp_[major|minor]_ver_arr per protocol */
 enum protocol_version_array_key {
 	ETH_VER_KEY = 0,
@@ -1704,6 +1778,7 @@ struct igu_msix_vector {
 #define IGU_MSIX_VECTOR_RESERVED1_MASK		0xFF
 #define IGU_MSIX_VECTOR_RESERVED1_SHIFT		24
 };
+
 /* per encapsulation type enabling flags */
 struct prs_reg_encapsulation_type_en {
 	u8 flags;
@@ -1881,6 +1956,9 @@ struct init_nig_pri_tc_map_req {
 
 /* QM per global RL init parameters */
 struct init_qm_global_rl_params {
+	u8 type;
+	u8 reserved0;
+	u16 reserved1;
 	u32 rate_limit;
 };
 
@@ -1895,18 +1973,33 @@ struct init_qm_port_params {
 
 /* QM per-PQ init parameters */
 struct init_qm_pq_params {
-	u8 vport_id;
+	u16 vport_id;
+	u16 rl_id;
+	u8 rl_valid;
 	u8 tc_id;
 	u8 wrr_group;
-	u8 rl_valid;
-	u16 rl_id;
 	u8 port_id;
-	u8 reserved;
+};
+
+/* QM per RL init parameters */
+struct init_qm_rl_params {
+	u32 vport_rl;
+	u8 vport_rl_type;
+	u8 reserved[3];
+};
+
+/* QM Rate Limiter types */
+enum init_qm_rl_type {
+	QM_RL_TYPE_NORMAL,
+	QM_RL_TYPE_QCN,
+	MAX_INIT_QM_RL_TYPE
 };
 
 /* QM per-vport init parameters */
 struct init_qm_vport_params {
 	u16 wfq;
+	u16 reserved;
+	u16 tc_wfq[NUM_OF_TCS];
 	u16 first_tx_pq_id[NUM_OF_TCS];
 };
 
@@ -1965,14 +2058,14 @@ struct fw_info_location {
 };
 
 enum init_modes {
-	MODE_RESERVED,
+	MODE_BB_A0_DEPRECATED,
 	MODE_BB,
 	MODE_K2,
 	MODE_ASIC,
-	MODE_RESERVED2,
-	MODE_RESERVED3,
-	MODE_RESERVED4,
-	MODE_RESERVED5,
+	MODE_EMUL_REDUCED,
+	MODE_EMUL_FULL,
+	MODE_FPGA,
+	MODE_CHIPSIM,
 	MODE_SF,
 	MODE_MF_SD,
 	MODE_MF_SI,
@@ -1980,8 +2073,8 @@ enum init_modes {
 	MODE_PORTS_PER_ENG_2,
 	MODE_PORTS_PER_ENG_4,
 	MODE_100G,
-	MODE_RESERVED6,
-	MODE_RESERVED7,
+	MODE_SKIP_PRAM_INIT,
+	MODE_EMUL_MAC,
 	MAX_INIT_MODES
 };
 
@@ -2282,6 +2375,15 @@ struct iro {
 /* Win 13 */
 #define GTT_BAR0_MAP_REG_PSDM_RAM	0x01a000UL
 
+/* Returns the VOQ based on port and TC */
+#define VOQ(port, tc, max_phys_tcs_per_port)   ((tc) ==                       \
+						PURE_LB_TC ? NUM_OF_PHYS_TCS *\
+						MAX_NUM_PORTS_BB +            \
+						(port) : (port) *             \
+						(max_phys_tcs_per_port) + (tc))
+
+struct init_qm_pq_params;
+
 /**
  * qed_qm_pf_mem_size(): Prepare QM ILT sizes.
  *
@@ -2308,8 +2410,19 @@ struct qed_qm_common_rt_init_params {
 	bool global_rl_en;
 	bool vport_wfq_en;
 	struct init_qm_port_params *port_params;
+	struct init_qm_global_rl_params
+	global_rl_params[COMMON_MAX_QM_GLOBAL_RLS];
 };
 
+/**
+ * qed_qm_common_rt_init(): Prepare QM runtime init values for the
+ *                          engine phase.
+ *
+ * @p_hwfn: HW device data.
+ * @p_params: Parameters.
+ *
+ * Return: 0 on success, -1 on error.
+ */
 int qed_qm_common_rt_init(struct qed_hwfn *p_hwfn,
 			  struct qed_qm_common_rt_init_params *p_params);
 
@@ -2326,15 +2439,28 @@ struct qed_qm_pf_rt_init_params {
 	u16 num_vf_pqs;
 	u16 start_vport;
 	u16 num_vports;
+	u16 start_rl;
+	u16 num_rls;
 	u16 pf_wfq;
 	u32 pf_rl;
+	u32 link_speed;
 	struct init_qm_pq_params *pq_params;
 	struct init_qm_vport_params *vport_params;
+	struct init_qm_rl_params *rl_params;
 };
 
+/**
+ * qed_qm_pf_rt_init(): Prepare QM runtime init values for the PF phase.
+ *
+ * @p_hwfn:  HW device data.
+ * @p_ptt: Ptt window used for writing the registers
+ * @p_params: Parameters.
+ *
+ * Return: 0 on success, -1 on error.
+ */
 int qed_qm_pf_rt_init(struct qed_hwfn *p_hwfn,
-	struct qed_ptt *p_ptt,
-	struct qed_qm_pf_rt_init_params *p_params);
+		      struct qed_ptt *p_ptt,
+		      struct qed_qm_pf_rt_init_params *p_params);
 
 /**
  * qed_init_pf_wfq(): Initializes the WFQ weight of the specified PF.
@@ -2378,6 +2504,22 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
 		       u16 first_tx_pq_id[NUM_OF_TCS], u16 wfq);
 
+/**
+ * qed_init_vport_tc_wfq(): Initializes the WFQ weight of the specified
+ *                          VPORT and TC.
+ *
+ * @p_hwfn: HW device data.
+ * @p_ptt: Ptt window used for writing the registers.
+ * @first_tx_pq_id: The first Tx PQ ID associated with the VPORT and TC.
+ *                  (filled by qed_qm_pf_rt_init).
+ * @weight: VPORT+TC WFQ weight.
+ *
+ * Return: 0 on success, -1 on error.
+ */
+int qed_init_vport_tc_wfq(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  u16 first_tx_pq_id, u16 weight);
+
 /**
  * qed_init_global_rl():  Initializes the rate limit of the specified
  * rate limiter.
@@ -2386,12 +2528,14 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
  * @p_ptt: Ptt window used for writing the registers.
  * @rl_id: RL ID.
  * @rate_limit: Rate limit in Mb/sec units
+ * @vport_rl_type: Vport RL type.
  *
  * Return: 0 on success, -1 on error.
  */
 int qed_init_global_rl(struct qed_hwfn *p_hwfn,
 		       struct qed_ptt *p_ptt,
-		       u16 rl_id, u32 rate_limit);
+		       u16 rl_id, u32 rate_limit,
+		       enum init_qm_rl_type vport_rl_type);
 
 /**
  * qed_send_qm_stop_cmd(): Sends a stop command to the QM.
@@ -2627,7 +2771,19 @@ void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
  * Return: Void.
  */
 void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
-			     struct phys_mem_desc *fw_overlay_mem);
+			     struct phys_mem_desc **fw_overlay_mem);
+
+#define PCICFG_OFFSET					0x2000
+#define GRC_CONFIG_REG_PF_INIT_VF			0x624
+
+/* First VF_NUM for PF is encoded in this register.
+ * The number of VFs assigned to a PF is assumed to be a multiple of 8.
+ * Software should program these bits based on Total Number of VFs programmed
+ * for each PF.
+ * Since registers from 0x000-0x7ff are spilt across functions, each PF will
+ * have the same location for the same 4 bits
+ */
+#define GRC_CR_PF_INIT_VF_PF_FIRST_VF_NUM_MASK		0xff
 
 /* Runtime array offsets */
 #define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET				0
@@ -2958,116 +3114,118 @@ void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
 #define QM_REG_TXPQMAP_RT_SIZE						512
 #define QM_REG_WFQVPWEIGHT_RT_OFFSET					31556
 #define QM_REG_WFQVPWEIGHT_RT_SIZE					512
-#define QM_REG_WFQVPCRD_RT_OFFSET					32068
+#define QM_REG_WFQVPUPPERBOUND_RT_OFFSET				32068
+#define QM_REG_WFQVPUPPERBOUND_RT_SIZE					512
+#define QM_REG_WFQVPCRD_RT_OFFSET					32580
 #define QM_REG_WFQVPCRD_RT_SIZE						512
-#define QM_REG_WFQVPMAP_RT_OFFSET					32580
+#define QM_REG_WFQVPMAP_RT_OFFSET					33092
 #define QM_REG_WFQVPMAP_RT_SIZE						512
-#define QM_REG_PTRTBLTX_RT_OFFSET					33092
+#define QM_REG_PTRTBLTX_RT_OFFSET					33604
 #define QM_REG_PTRTBLTX_RT_SIZE						1024
-#define QM_REG_WFQPFCRD_MSB_RT_OFFSET					34116
+#define QM_REG_WFQPFCRD_MSB_RT_OFFSET					34628
 #define QM_REG_WFQPFCRD_MSB_RT_SIZE					160
-#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET				34276
-#define NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET				34277
-#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET				34278
-#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET				34279
-#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET				34280
-#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET				34281
-#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET			34282
-#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET				34283
+#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET				34788
+#define NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET				34789
+#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET				34790
+#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET				34791
+#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET				34792
+#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET				34793
+#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET			34794
+#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET				34795
 #define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE					4
-#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET				34287
+#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET				34799
 #define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE				4
-#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET				34291
+#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET				34803
 #define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE				32
-#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET				34323
+#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET				34835
 #define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE				16
-#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET				34339
+#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET				34851
 #define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE				16
-#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET			34355
+#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET			34867
 #define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE			16
-#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET			34371
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET			34883
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE				16
-#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET					34387
-#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET				34388
+#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET					34899
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET				34900
 #define NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE				8
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET				34396
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET				34397
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET				34398
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET				34399
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET				34400
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET				34401
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET				34402
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET			34403
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET			34404
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET			34405
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET			34406
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET				34407
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET				34408
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET				34409
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET				34410
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET			34411
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET				34412
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET			34413
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET			34414
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET				34415
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET			34416
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET			34417
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET				34418
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET			34419
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET			34420
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET				34421
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET			34422
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET			34423
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET				34424
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET			34425
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET			34426
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET				34427
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET			34428
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET			34429
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET				34430
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET			34431
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET			34432
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET				34433
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET			34434
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET			34435
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET				34436
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET			34437
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET			34438
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET				34439
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET			34440
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET			34441
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET				34442
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET			34443
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET			34444
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET				34445
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET			34446
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET			34447
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET				34448
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET			34449
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET			34450
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET				34451
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET			34452
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET			34453
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET				34454
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET			34455
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET			34456
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET				34457
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET			34458
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET			34459
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET				34460
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET			34461
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET			34462
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET				34463
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET			34464
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET			34465
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET				34466
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET			34467
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET			34468
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET				34469
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET			34470
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET					34471
-
-#define RUNTIME_ARRAY_SIZE 34472
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET				34908
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET				34909
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET				34910
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET				34911
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET				34912
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET				34913
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET				34914
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET			34915
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET			34916
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET			34917
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET			34918
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET				34919
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET				34920
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET				34921
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET				34922
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET			34923
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET				34924
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET			34925
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET			34926
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET				34927
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET			34928
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET			34929
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET				34930
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET			34931
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET			34932
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET				34933
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET			34934
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET			34935
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET				34936
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET			34937
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET			34938
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET				34939
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET			34940
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET			34941
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET				34942
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET			34943
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET			34944
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET				34945
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET			34946
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET			34947
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET				34948
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET			34949
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET			34950
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET				34951
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET			34952
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET			34953
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET				34954
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET			34955
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET			34956
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET				34957
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET			34958
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET			34959
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET				34960
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET			34961
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET			34962
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET				34963
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET			34964
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET			34965
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET				34966
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET			34967
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET			34968
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET				34969
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET			34970
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET			34971
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET				34972
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET			34973
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET			34974
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET				34975
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET			34976
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET			34977
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET				34978
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET			34979
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET			34980
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET				34981
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET			34982
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET					34983
+
+#define RUNTIME_ARRAY_SIZE						34984
 
 /* Init Callbacks */
 #define DMAE_READY_CB	0
@@ -3749,7 +3907,7 @@ enum eth_ramrod_cmd_id {
 	ETH_RAMROD_RX_ADD_UDP_FILTER,
 	ETH_RAMROD_RX_DELETE_UDP_FILTER,
 	ETH_RAMROD_RX_CREATE_GFT_ACTION,
-	ETH_RAMROD_GFT_UPDATE_FILTER,
+	ETH_RAMROD_RX_UPDATE_GFT_FILTER,
 	ETH_RAMROD_TX_QUEUE_UPDATE,
 	ETH_RAMROD_RGFS_FILTER_ADD,
 	ETH_RAMROD_RGFS_FILTER_DEL,
@@ -3833,10 +3991,12 @@ struct eth_vport_rss_config {
 	u8 update_rss_ind_table;
 	u8 update_rss_capabilities;
 	u8 tbl_size;
-	__le32 reserved2[2];
+	u8 ind_table_mask_valid;
+	u8 reserved2[3];
 	__le16 indirection_table[ETH_RSS_IND_TABLE_ENTRIES_NUM];
+	__le32 ind_table_mask[ETH_RSS_IND_TABLE_MASK_SIZE_REGS];
 	__le32 rss_key[ETH_RSS_KEY_SIZE_REGS];
-	__le32 reserved3[2];
+	__le32 reserved3;
 };
 
 /* eth vport RSS mode */
@@ -3911,8 +4071,20 @@ enum gft_filter_update_action {
 	MAX_GFT_FILTER_UPDATE_ACTION
 };
 
+/* Ramrod data for rx create gft action */
+struct rx_create_gft_action_ramrod_data {
+	u8 vport_id;
+	u8 reserved[7];
+};
+
+/* Ramrod data for rx create openflow action */
+struct rx_create_openflow_action_ramrod_data {
+	u8 vport_id;
+	u8 reserved[7];
+};
+
 /* Ramrod data for rx add openflow filter */
-struct rx_add_openflow_filter_data {
+struct rx_openflow_filter_ramrod_data {
 	__le16 action_icid;
 	u8 priority;
 	u8 reserved0;
@@ -3935,18 +4107,6 @@ struct rx_add_openflow_filter_data {
 	__le16 l4_src_port;
 };
 
-/* Ramrod data for rx create gft action */
-struct rx_create_gft_action_data {
-	u8 vport_id;
-	u8 reserved[7];
-};
-
-/* Ramrod data for rx create openflow action */
-struct rx_create_openflow_action_data {
-	u8 vport_id;
-	u8 reserved[7];
-};
-
 /* Ramrod data for rx queue start ramrod */
 struct rx_queue_start_ramrod_data {
 	__le16 rx_queue_id;
@@ -4005,7 +4165,7 @@ struct rx_queue_update_ramrod_data {
 };
 
 /* Ramrod data for rx Add UDP Filter */
-struct rx_udp_filter_data {
+struct rx_udp_filter_ramrod_data {
 	__le16 action_icid;
 	__le16 vlan_id;
 	u8 ip_type;
@@ -4021,7 +4181,7 @@ struct rx_udp_filter_data {
 /* Add or delete GFT filter - filter is packet header of type of packet wished
  * to pass certain FW flow.
  */
-struct rx_update_gft_filter_data {
+struct rx_update_gft_filter_ramrod_data {
 	struct regpair pkt_hdr_addr;
 	__le16 pkt_hdr_length;
 	__le16 action_icid;
@@ -4061,7 +4221,8 @@ struct tx_queue_start_ramrod_data {
 	u8 pxp_tph_valid_bd;
 	u8 pxp_tph_valid_pkt;
 	__le16 pxp_st_index;
-	__le16 comp_agg_size;
+	u8 comp_agg_size;
+	u8 reserved3;
 	__le16 queue_zone_id;
 	__le16 reserved2;
 	__le16 pbl_size;
@@ -4182,7 +4343,12 @@ struct vport_update_ramrod_data_cmn {
 	u8 ctl_frame_ethtype_check_en;
 	u8 update_in_to_in_pri_map_mode;
 	u8 in_to_in_pri_map[8];
-	u8 reserved[6];
+	u8 update_tx_dst_port_mode_flg;
+	u8 tx_dst_port_mode_config;
+	u8 dst_vport_id;
+	u8 tx_dst_port_mode;
+	u8 dst_vport_id_valid;
+	u8 reserved[1];
 };
 
 struct vport_update_ramrod_mcast {
@@ -4716,7 +4882,6 @@ struct gft_cam_line_mapped {
 #define GFT_CAM_LINE_MAPPED_RESERVED1_SHIFT			29
 };
 
-
 /* Used in gft_profile_key: Indication for ip version */
 enum gft_profile_ip_version {
 	GFT_PROFILE_IPV4 = 0,
@@ -5077,6 +5242,843 @@ struct rdma_task_context {
 	struct ustorm_rdma_task_ag_ctx ustorm_ag_context;
 };
 
+#define TOE_MAX_RAMROD_PER_PF			8
+#define TOE_TX_PAGE_SIZE_BYTES			4096
+#define TOE_GRQ_PAGE_SIZE_BYTES			4096
+#define TOE_RX_CQ_PAGE_SIZE_BYTES		4096
+
+#define TOE_RX_MAX_RSS_CHAINS			64
+#define TOE_TX_MAX_TSS_CHAINS			64
+#define TOE_RSS_INDIRECTION_TABLE_SIZE		128
+
+/* The toe storm context of Mstorm */
+struct mstorm_toe_conn_st_ctx {
+	__le32 reserved[24];
+};
+
+/* The toe storm context of Pstorm */
+struct pstorm_toe_conn_st_ctx {
+	__le32 reserved[36];
+};
+
+/* The toe storm context of Ystorm */
+struct ystorm_toe_conn_st_ctx {
+	__le32 reserved[8];
+};
+
+/* The toe storm context of Xstorm */
+struct xstorm_toe_conn_st_ctx {
+	__le32 reserved[44];
+};
+
+struct ystorm_toe_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define YSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define YSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define YSTORM_TOE_CONN_AG_CTX_BIT1_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_BIT1_SHIFT			1
+#define YSTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_MASK		0x3
+#define YSTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_SHIFT		2
+#define YSTORM_TOE_CONN_AG_CTX_RESET_RECEIVED_CF_MASK		0x3
+#define YSTORM_TOE_CONN_AG_CTX_RESET_RECEIVED_CF_SHIFT		4
+#define YSTORM_TOE_CONN_AG_CTX_CF2_MASK				0x3
+#define YSTORM_TOE_CONN_AG_CTX_CF2_SHIFT			6
+	u8 flags1;
+#define YSTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_EN_MASK		0x1
+#define YSTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_EN_SHIFT		0
+#define YSTORM_TOE_CONN_AG_CTX_RESET_RECEIVED_CF_EN_MASK	0x1
+#define YSTORM_TOE_CONN_AG_CTX_RESET_RECEIVED_CF_EN_SHIFT	1
+#define YSTORM_TOE_CONN_AG_CTX_CF2EN_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_CF2EN_SHIFT			2
+#define YSTORM_TOE_CONN_AG_CTX_REL_SEQ_EN_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_REL_SEQ_EN_SHIFT			3
+#define YSTORM_TOE_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_RULE1EN_SHIFT			4
+#define YSTORM_TOE_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_RULE2EN_SHIFT			5
+#define YSTORM_TOE_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define YSTORM_TOE_CONN_AG_CTX_RULE3EN_SHIFT			6
+#define YSTORM_TOE_CONN_AG_CTX_CONS_PROD_EN_MASK		0x1
+#define YSTORM_TOE_CONN_AG_CTX_CONS_PROD_EN_SHIFT		7
+	u8 completion_opcode;
+	u8 byte3;
+	__le16 word0;
+	__le32 rel_seq;
+	__le32 rel_seq_threshold;
+	__le16 app_prod;
+	__le16 app_cons;
+	__le16 word3;
+	__le16 word4;
+	__le32 reg2;
+	__le32 reg3;
+};
+
+struct xstorm_toe_conn_ag_ctx {
+	u8 reserved0;
+	u8 state;
+	u8 flags0;
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM1_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM1_SHIFT		1
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED1_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED1_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM3_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM3_SHIFT		3
+#define XSTORM_TOE_CONN_AG_CTX_TX_DEC_RULE_RES_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_TX_DEC_RULE_RES_SHIFT		4
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED2_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED2_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_BIT6_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT6_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_BIT7_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT7_SHIFT			7
+	u8 flags1;
+#define XSTORM_TOE_CONN_AG_CTX_BIT8_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT8_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_BIT9_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT9_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_BIT10_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT10_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_BIT11_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT11_SHIFT			3
+#define XSTORM_TOE_CONN_AG_CTX_BIT12_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT12_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_BIT13_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT13_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_BIT14_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT14_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_BIT15_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT15_SHIFT			7
+	u8 flags2;
+#define XSTORM_TOE_CONN_AG_CTX_CF0_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF0_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF1_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF1_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF2_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF2_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define XSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT		6
+	u8 flags3;
+#define XSTORM_TOE_CONN_AG_CTX_CF4_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF4_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF5_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF5_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF6_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF6_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF7_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF7_SHIFT			6
+	u8 flags4;
+#define XSTORM_TOE_CONN_AG_CTX_CF8_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF8_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF9_MASK				0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF9_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF10_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF10_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF11_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF11_SHIFT			6
+	u8 flags5;
+#define XSTORM_TOE_CONN_AG_CTX_CF12_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF12_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF13_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF13_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF14_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF14_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF15_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF15_SHIFT			6
+	u8 flags6;
+#define XSTORM_TOE_CONN_AG_CTX_CF16_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF16_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF17_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF17_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF18_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF18_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_DQ_FLUSH_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_DQ_FLUSH_SHIFT			6
+	u8 flags7;
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q1_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q1_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_SLOW_PATH_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_SLOW_PATH_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF0EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF0EN_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_CF1EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF1EN_SHIFT			7
+	u8 flags8;
+#define XSTORM_TOE_CONN_AG_CTX_CF2EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF2EN_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT		1
+#define XSTORM_TOE_CONN_AG_CTX_CF4EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF4EN_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF5EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF5EN_SHIFT			3
+#define XSTORM_TOE_CONN_AG_CTX_CF6EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF6EN_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF7EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF7EN_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_CF8EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF8EN_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_CF9EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF9EN_SHIFT			7
+	u8 flags9;
+#define XSTORM_TOE_CONN_AG_CTX_CF10EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF10EN_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_CF11EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF11EN_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_CF12EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF12EN_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_CF13EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF13EN_SHIFT			3
+#define XSTORM_TOE_CONN_AG_CTX_CF14EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF14EN_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_CF15EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF15EN_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_CF16EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF16EN_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_CF17EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF17EN_SHIFT			7
+	u8 flags10;
+#define XSTORM_TOE_CONN_AG_CTX_CF18EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF18EN_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_DQ_FLUSH_EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_DQ_FLUSH_EN_SHIFT		1
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		2
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q1_EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_FLUSH_Q1_EN_SHIFT		3
+#define XSTORM_TOE_CONN_AG_CTX_SLOW_PATH_EN_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_SLOW_PATH_EN_SHIFT		4
+#define XSTORM_TOE_CONN_AG_CTX_CF23EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_CF23EN_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE0EN_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_MASK	0x1
+#define XSTORM_TOE_CONN_AG_CTX_MORE_TO_SEND_RULE_EN_SHIFT	7
+	u8 flags11;
+#define XSTORM_TOE_CONN_AG_CTX_TX_BLOCKED_EN_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_TX_BLOCKED_EN_SHIFT		0
+#define XSTORM_TOE_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE3EN_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED3_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RESERVED3_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_RULE5EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE5EN_SHIFT			3
+#define XSTORM_TOE_CONN_AG_CTX_RULE6EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE6EN_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_RULE7EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE7EN_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED1_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED1_SHIFT		6
+#define XSTORM_TOE_CONN_AG_CTX_RULE9EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE9EN_SHIFT			7
+	u8 flags12;
+#define XSTORM_TOE_CONN_AG_CTX_RULE10EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE10EN_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_RULE11EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE11EN_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED2_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED2_SHIFT		2
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED3_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED3_SHIFT		3
+#define XSTORM_TOE_CONN_AG_CTX_RULE14EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE14EN_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_RULE15EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE15EN_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_RULE16EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE16EN_SHIFT			6
+#define XSTORM_TOE_CONN_AG_CTX_RULE17EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE17EN_SHIFT			7
+	u8 flags13;
+#define XSTORM_TOE_CONN_AG_CTX_RULE18EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE18EN_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_RULE19EN_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_RULE19EN_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED4_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED4_SHIFT		2
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED5_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED5_SHIFT		3
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED6_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED6_SHIFT		4
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED7_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED7_SHIFT		5
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED8_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED8_SHIFT		6
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED9_MASK		0x1
+#define XSTORM_TOE_CONN_AG_CTX_A0_RESERVED9_SHIFT		7
+	u8 flags14;
+#define XSTORM_TOE_CONN_AG_CTX_BIT16_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT16_SHIFT			0
+#define XSTORM_TOE_CONN_AG_CTX_BIT17_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT17_SHIFT			1
+#define XSTORM_TOE_CONN_AG_CTX_BIT18_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT18_SHIFT			2
+#define XSTORM_TOE_CONN_AG_CTX_BIT19_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT19_SHIFT			3
+#define XSTORM_TOE_CONN_AG_CTX_BIT20_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT20_SHIFT			4
+#define XSTORM_TOE_CONN_AG_CTX_BIT21_MASK			0x1
+#define XSTORM_TOE_CONN_AG_CTX_BIT21_SHIFT			5
+#define XSTORM_TOE_CONN_AG_CTX_CF23_MASK			0x3
+#define XSTORM_TOE_CONN_AG_CTX_CF23_SHIFT			6
+	u8 byte2;
+	__le16 physical_q0;
+	__le16 physical_q1;
+	__le16 word2;
+	__le16 word3;
+	__le16 bd_prod;
+	__le16 word5;
+	__le16 word6;
+	u8 byte3;
+	u8 byte4;
+	u8 byte5;
+	u8 byte6;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 more_to_send_seq;
+	__le32 local_adv_wnd_seq;
+	__le32 reg5;
+	__le32 reg6;
+	__le16 word7;
+	__le16 word8;
+	__le16 word9;
+	__le16 word10;
+	__le32 reg7;
+	__le32 reg8;
+	__le32 reg9;
+	u8 byte7;
+	u8 byte8;
+	u8 byte9;
+	u8 byte10;
+	u8 byte11;
+	u8 byte12;
+	u8 byte13;
+	u8 byte14;
+	u8 byte15;
+	u8 e5_reserved;
+	__le16 word11;
+	__le32 reg10;
+	__le32 reg11;
+	__le32 reg12;
+	__le32 reg13;
+	__le32 reg14;
+	__le32 reg15;
+	__le32 reg16;
+	__le32 reg17;
+};
+
+struct tstorm_toe_conn_ag_ctx {
+	u8 reserved0;
+	u8 byte1;
+	u8 flags0;
+#define TSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define TSTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define TSTORM_TOE_CONN_AG_CTX_BIT1_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_BIT1_SHIFT			1
+#define TSTORM_TOE_CONN_AG_CTX_BIT2_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_BIT2_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_BIT3_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_BIT3_SHIFT			3
+#define TSTORM_TOE_CONN_AG_CTX_BIT4_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_BIT4_SHIFT			4
+#define TSTORM_TOE_CONN_AG_CTX_BIT5_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_BIT5_SHIFT			5
+#define TSTORM_TOE_CONN_AG_CTX_TIMEOUT_CF_MASK			0x3
+#define TSTORM_TOE_CONN_AG_CTX_TIMEOUT_CF_SHIFT			6
+	u8 flags1;
+#define TSTORM_TOE_CONN_AG_CTX_CF1_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF1_SHIFT			0
+#define TSTORM_TOE_CONN_AG_CTX_CF2_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF2_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define TSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT		4
+#define TSTORM_TOE_CONN_AG_CTX_CF4_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF4_SHIFT			6
+	u8 flags2;
+#define TSTORM_TOE_CONN_AG_CTX_CF5_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF5_SHIFT			0
+#define TSTORM_TOE_CONN_AG_CTX_CF6_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF6_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_CF7_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF7_SHIFT			4
+#define TSTORM_TOE_CONN_AG_CTX_CF8_MASK				0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF8_SHIFT			6
+	u8 flags3;
+#define TSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_MASK			0x3
+#define TSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_SHIFT			0
+#define TSTORM_TOE_CONN_AG_CTX_CF10_MASK			0x3
+#define TSTORM_TOE_CONN_AG_CTX_CF10_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_TIMEOUT_CF_EN_MASK		0x1
+#define TSTORM_TOE_CONN_AG_CTX_TIMEOUT_CF_EN_SHIFT		4
+#define TSTORM_TOE_CONN_AG_CTX_CF1EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF1EN_SHIFT			5
+#define TSTORM_TOE_CONN_AG_CTX_CF2EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF2EN_SHIFT			6
+#define TSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK		0x1
+#define TSTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT		7
+	u8 flags4;
+#define TSTORM_TOE_CONN_AG_CTX_CF4EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF4EN_SHIFT			0
+#define TSTORM_TOE_CONN_AG_CTX_CF5EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF5EN_SHIFT			1
+#define TSTORM_TOE_CONN_AG_CTX_CF6EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF6EN_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_CF7EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF7EN_SHIFT			3
+#define TSTORM_TOE_CONN_AG_CTX_CF8EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF8EN_SHIFT			4
+#define TSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_FLUSH_Q0_EN_SHIFT		5
+#define TSTORM_TOE_CONN_AG_CTX_CF10EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_CF10EN_SHIFT			6
+#define TSTORM_TOE_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE0EN_SHIFT			7
+	u8 flags5;
+#define TSTORM_TOE_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE1EN_SHIFT			0
+#define TSTORM_TOE_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE2EN_SHIFT			1
+#define TSTORM_TOE_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE3EN_SHIFT			2
+#define TSTORM_TOE_CONN_AG_CTX_RULE4EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE4EN_SHIFT			3
+#define TSTORM_TOE_CONN_AG_CTX_RULE5EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE5EN_SHIFT			4
+#define TSTORM_TOE_CONN_AG_CTX_RULE6EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE6EN_SHIFT			5
+#define TSTORM_TOE_CONN_AG_CTX_RULE7EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE7EN_SHIFT			6
+#define TSTORM_TOE_CONN_AG_CTX_RULE8EN_MASK			0x1
+#define TSTORM_TOE_CONN_AG_CTX_RULE8EN_SHIFT			7
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le32 reg4;
+	__le32 reg5;
+	__le32 reg6;
+	__le32 reg7;
+	__le32 reg8;
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+};
+
+struct ustorm_toe_conn_ag_ctx {
+	u8 reserved;
+	u8 byte1;
+	u8 flags0;
+#define USTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_MASK		0x1
+#define USTORM_TOE_CONN_AG_CTX_EXIST_IN_QM0_SHIFT		0
+#define USTORM_TOE_CONN_AG_CTX_BIT1_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_BIT1_SHIFT			1
+#define USTORM_TOE_CONN_AG_CTX_CF0_MASK				0x3
+#define USTORM_TOE_CONN_AG_CTX_CF0_SHIFT			2
+#define USTORM_TOE_CONN_AG_CTX_CF1_MASK				0x3
+#define USTORM_TOE_CONN_AG_CTX_CF1_SHIFT			4
+#define USTORM_TOE_CONN_AG_CTX_PUSH_TIMER_CF_MASK		0x3
+#define USTORM_TOE_CONN_AG_CTX_PUSH_TIMER_CF_SHIFT		6
+	u8 flags1;
+#define USTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_MASK		0x3
+#define USTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_SHIFT		0
+#define USTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_MASK		0x3
+#define USTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_SHIFT		2
+#define USTORM_TOE_CONN_AG_CTX_DQ_CF_MASK			0x3
+#define USTORM_TOE_CONN_AG_CTX_DQ_CF_SHIFT			4
+#define USTORM_TOE_CONN_AG_CTX_CF6_MASK				0x3
+#define USTORM_TOE_CONN_AG_CTX_CF6_SHIFT			6
+	u8 flags2;
+#define USTORM_TOE_CONN_AG_CTX_CF0EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_CF0EN_SHIFT			0
+#define USTORM_TOE_CONN_AG_CTX_CF1EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_CF1EN_SHIFT			1
+#define USTORM_TOE_CONN_AG_CTX_PUSH_TIMER_CF_EN_MASK		0x1
+#define USTORM_TOE_CONN_AG_CTX_PUSH_TIMER_CF_EN_SHIFT		2
+#define USTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_MASK		0x1
+#define USTORM_TOE_CONN_AG_CTX_TIMER_STOP_ALL_EN_SHIFT		3
+#define USTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_EN_MASK		0x1
+#define USTORM_TOE_CONN_AG_CTX_SLOW_PATH_CF_EN_SHIFT		4
+#define USTORM_TOE_CONN_AG_CTX_DQ_CF_EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_DQ_CF_EN_SHIFT			5
+#define USTORM_TOE_CONN_AG_CTX_CF6EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_CF6EN_SHIFT			6
+#define USTORM_TOE_CONN_AG_CTX_RULE0EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE0EN_SHIFT			7
+	u8 flags3;
+#define USTORM_TOE_CONN_AG_CTX_RULE1EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE1EN_SHIFT			0
+#define USTORM_TOE_CONN_AG_CTX_RULE2EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE2EN_SHIFT			1
+#define USTORM_TOE_CONN_AG_CTX_RULE3EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE3EN_SHIFT			2
+#define USTORM_TOE_CONN_AG_CTX_RULE4EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE4EN_SHIFT			3
+#define USTORM_TOE_CONN_AG_CTX_RULE5EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE5EN_SHIFT			4
+#define USTORM_TOE_CONN_AG_CTX_RULE6EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE6EN_SHIFT			5
+#define USTORM_TOE_CONN_AG_CTX_RULE7EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE7EN_SHIFT			6
+#define USTORM_TOE_CONN_AG_CTX_RULE8EN_MASK			0x1
+#define USTORM_TOE_CONN_AG_CTX_RULE8EN_SHIFT			7
+	u8 byte2;
+	u8 byte3;
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+	__le32 reg2;
+	__le32 reg3;
+	__le16 word2;
+	__le16 word3;
+};
+
+/* The toe storm context of Tstorm */
+struct tstorm_toe_conn_st_ctx {
+	__le32 reserved[16];
+};
+
+/* The toe storm context of Ustorm */
+struct ustorm_toe_conn_st_ctx {
+	__le32 reserved[52];
+};
+
+/* toe connection context */
+struct toe_conn_context {
+	struct ystorm_toe_conn_st_ctx ystorm_st_context;
+	struct pstorm_toe_conn_st_ctx pstorm_st_context;
+	struct regpair pstorm_st_padding[2];
+	struct xstorm_toe_conn_st_ctx xstorm_st_context;
+	struct regpair xstorm_st_padding[2];
+	struct ystorm_toe_conn_ag_ctx ystorm_ag_context;
+	struct xstorm_toe_conn_ag_ctx xstorm_ag_context;
+	struct tstorm_toe_conn_ag_ctx tstorm_ag_context;
+	struct regpair tstorm_ag_padding[2];
+	struct timers_context timer_context;
+	struct ustorm_toe_conn_ag_ctx ustorm_ag_context;
+	struct tstorm_toe_conn_st_ctx tstorm_st_context;
+	struct mstorm_toe_conn_st_ctx mstorm_st_context;
+	struct ustorm_toe_conn_st_ctx ustorm_st_context;
+};
+
+/* toe init ramrod header */
+struct toe_init_ramrod_header {
+	u8 first_rss;
+	u8 num_rss;
+	u8 reserved[6];
+};
+
+/* toe pf init parameters */
+struct toe_pf_init_params {
+	__le32 push_timeout;
+	__le16 grq_buffer_size;
+	__le16 grq_sb_id;
+	u8 grq_sb_index;
+	u8 max_seg_retransmit;
+	u8 doubt_reachability;
+	u8 ll2_rx_queue_id;
+	__le16 grq_fetch_threshold;
+	u8 reserved1[2];
+	struct regpair grq_page_addr;
+};
+
+/* toe tss parameters */
+struct toe_tss_params {
+	struct regpair curr_page_addr;
+	struct regpair next_page_addr;
+	u8 reserved0;
+	u8 status_block_index;
+	__le16 status_block_id;
+	__le16 reserved1[2];
+};
+
+/* toe rss parameters */
+struct toe_rss_params {
+	struct regpair curr_page_addr;
+	struct regpair next_page_addr;
+	u8 reserved0;
+	u8 status_block_index;
+	__le16 status_block_id;
+	__le16 reserved1[2];
+};
+
+/* toe init ramrod data */
+struct toe_init_ramrod_data {
+	struct toe_init_ramrod_header hdr;
+	struct tcp_init_params tcp_params;
+	struct toe_pf_init_params pf_params;
+	struct toe_tss_params tss_params[TOE_TX_MAX_TSS_CHAINS];
+	struct toe_rss_params rss_params[TOE_RX_MAX_RSS_CHAINS];
+};
+
+/* toe offload parameters */
+struct toe_offload_params {
+	struct regpair tx_bd_page_addr;
+	struct regpair tx_app_page_addr;
+	__le32 more_to_send_seq;
+	__le16 rcv_indication_size;
+	u8 rss_tss_id;
+	u8 ignore_grq_push;
+	struct regpair rx_db_data_ptr;
+};
+
+/* TOE offload ramrod data - DMAed by firmware */
+struct toe_offload_ramrod_data {
+	struct tcp_offload_params tcp_ofld_params;
+	struct toe_offload_params toe_ofld_params;
+};
+
+/* TOE ramrod command IDs */
+enum toe_ramrod_cmd_id {
+	TOE_RAMROD_UNUSED,
+	TOE_RAMROD_FUNC_INIT,
+	TOE_RAMROD_INITATE_OFFLOAD,
+	TOE_RAMROD_FUNC_CLOSE,
+	TOE_RAMROD_SEARCHER_DELETE,
+	TOE_RAMROD_TERMINATE,
+	TOE_RAMROD_QUERY,
+	TOE_RAMROD_UPDATE,
+	TOE_RAMROD_EMPTY,
+	TOE_RAMROD_RESET_SEND,
+	TOE_RAMROD_INVALIDATE,
+	MAX_TOE_RAMROD_CMD_ID
+};
+
+/* Toe RQ buffer descriptor */
+struct toe_rx_bd {
+	struct regpair addr;
+	__le16 size;
+	__le16 flags;
+#define TOE_RX_BD_START_MASK		0x1
+#define TOE_RX_BD_START_SHIFT		0
+#define TOE_RX_BD_END_MASK		0x1
+#define TOE_RX_BD_END_SHIFT		1
+#define TOE_RX_BD_NO_PUSH_MASK		0x1
+#define TOE_RX_BD_NO_PUSH_SHIFT		2
+#define TOE_RX_BD_SPLIT_MASK		0x1
+#define TOE_RX_BD_SPLIT_SHIFT		3
+#define TOE_RX_BD_RESERVED0_MASK	0xFFF
+#define TOE_RX_BD_RESERVED0_SHIFT	4
+	__le32 reserved1;
+};
+
+/* TOE RX completion queue opcodes (opcode 0 is illegal) */
+enum toe_rx_cmp_opcode {
+	TOE_RX_CMP_OPCODE_GA = 1,
+	TOE_RX_CMP_OPCODE_GR = 2,
+	TOE_RX_CMP_OPCODE_GNI = 3,
+	TOE_RX_CMP_OPCODE_GAIR = 4,
+	TOE_RX_CMP_OPCODE_GAIL = 5,
+	TOE_RX_CMP_OPCODE_GRI = 6,
+	TOE_RX_CMP_OPCODE_GJ = 7,
+	TOE_RX_CMP_OPCODE_DGI = 8,
+	TOE_RX_CMP_OPCODE_CMP = 9,
+	TOE_RX_CMP_OPCODE_REL = 10,
+	TOE_RX_CMP_OPCODE_SKP = 11,
+	TOE_RX_CMP_OPCODE_URG = 12,
+	TOE_RX_CMP_OPCODE_RT_TO = 13,
+	TOE_RX_CMP_OPCODE_KA_TO = 14,
+	TOE_RX_CMP_OPCODE_MAX_RT = 15,
+	TOE_RX_CMP_OPCODE_DBT_RE = 16,
+	TOE_RX_CMP_OPCODE_SYN = 17,
+	TOE_RX_CMP_OPCODE_OPT_ERR = 18,
+	TOE_RX_CMP_OPCODE_FW2_TO = 19,
+	TOE_RX_CMP_OPCODE_2WY_CLS = 20,
+	TOE_RX_CMP_OPCODE_RST_RCV = 21,
+	TOE_RX_CMP_OPCODE_FIN_RCV = 22,
+	TOE_RX_CMP_OPCODE_FIN_UPL = 23,
+	TOE_RX_CMP_OPCODE_INIT = 32,
+	TOE_RX_CMP_OPCODE_RSS_UPDATE = 33,
+	TOE_RX_CMP_OPCODE_CLOSE = 34,
+	TOE_RX_CMP_OPCODE_INITIATE_OFFLOAD = 80,
+	TOE_RX_CMP_OPCODE_SEARCHER_DELETE = 81,
+	TOE_RX_CMP_OPCODE_TERMINATE = 82,
+	TOE_RX_CMP_OPCODE_QUERY = 83,
+	TOE_RX_CMP_OPCODE_RESET_SEND = 84,
+	TOE_RX_CMP_OPCODE_INVALIDATE = 85,
+	TOE_RX_CMP_OPCODE_EMPTY = 86,
+	TOE_RX_CMP_OPCODE_UPDATE = 87,
+	MAX_TOE_RX_CMP_OPCODE
+};
+
+/* TOE rx ooo completion data */
+struct toe_rx_cqe_ooo_params {
+	__le32 nbytes;
+	__le16 grq_buff_id;
+	u8 isle_num;
+	u8 reserved0;
+};
+
+/* TOE rx in order completion data */
+struct toe_rx_cqe_in_order_params {
+	__le32 nbytes;
+	__le16 grq_buff_id;
+	__le16 reserved1;
+};
+
+/* Union for TOE rx completion data */
+union toe_rx_cqe_data_union {
+	struct toe_rx_cqe_ooo_params ooo_params;
+	struct toe_rx_cqe_in_order_params in_order_params;
+	struct regpair raw_data;
+};
+
+/* TOE rx completion element */
+struct toe_rx_cqe {
+	__le16 icid;
+	u8 completion_opcode;
+	u8 reserved0;
+	__le32 reserved1;
+	union toe_rx_cqe_data_union data;
+};
+
+/* toe RX doorbel data */
+struct toe_rx_db_data {
+	__le32 local_adv_wnd_seq;
+	__le32 reserved[3];
+};
+
+/* Toe GRQ buffer descriptor */
+struct toe_rx_grq_bd {
+	struct regpair addr;
+	__le16 buff_id;
+	__le16 reserved0;
+	__le32 reserved1;
+};
+
+/* Toe transmission application buffer descriptor */
+struct toe_tx_app_buff_desc {
+	__le32 next_buffer_start_seq;
+	__le32 reserved;
+};
+
+/* Toe transmission application buffer descriptor page pointer */
+struct toe_tx_app_buff_page_pointer {
+	struct regpair next_page_addr;
+};
+
+/* Toe transmission buffer descriptor */
+struct toe_tx_bd {
+	struct regpair addr;
+	__le16 size;
+	__le16 flags;
+#define TOE_TX_BD_PUSH_MASK		0x1
+#define TOE_TX_BD_PUSH_SHIFT		0
+#define TOE_TX_BD_NOTIFY_MASK		0x1
+#define TOE_TX_BD_NOTIFY_SHIFT		1
+#define TOE_TX_BD_LARGE_IO_MASK		0x1
+#define TOE_TX_BD_LARGE_IO_SHIFT	2
+#define TOE_TX_BD_BD_CONS_MASK		0x1FFF
+#define TOE_TX_BD_BD_CONS_SHIFT		3
+	__le32 next_bd_start_seq;
+};
+
+/* TOE completion opcodes */
+enum toe_tx_cmp_opcode {
+	TOE_TX_CMP_OPCODE_DATA,
+	TOE_TX_CMP_OPCODE_TERMINATE,
+	TOE_TX_CMP_OPCODE_EMPTY,
+	TOE_TX_CMP_OPCODE_RESET_SEND,
+	TOE_TX_CMP_OPCODE_INVALIDATE,
+	TOE_TX_CMP_OPCODE_RST_RCV,
+	MAX_TOE_TX_CMP_OPCODE
+};
+
+/* Toe transmission completion element */
+struct toe_tx_cqe {
+	__le16 icid;
+	u8 opcode;
+	u8 reserved;
+	__le32 size;
+};
+
+/* Toe transmission page pointer bd */
+struct toe_tx_page_pointer_bd {
+	struct regpair next_page_addr;
+	struct regpair prev_page_addr;
+};
+
+/* Toe transmission completion element page pointer */
+struct toe_tx_page_pointer_cqe {
+	struct regpair next_page_addr;
+};
+
+/* toe update parameters */
+struct toe_update_params {
+	__le16 flags;
+#define TOE_UPDATE_PARAMS_RCV_INDICATION_SIZE_CHANGED_MASK	0x1
+#define TOE_UPDATE_PARAMS_RCV_INDICATION_SIZE_CHANGED_SHIFT	0
+#define TOE_UPDATE_PARAMS_RESERVED_MASK				0x7FFF
+#define TOE_UPDATE_PARAMS_RESERVED_SHIFT			1
+	__le16 rcv_indication_size;
+	__le16 reserved1[2];
+};
+
+/* TOE update ramrod data - DMAed by firmware */
+struct toe_update_ramrod_data {
+	struct tcp_update_params tcp_upd_params;
+	struct toe_update_params toe_upd_params;
+};
+
+struct mstorm_toe_conn_ag_ctx {
+	u8 byte0;
+	u8 byte1;
+	u8 flags0;
+#define MSTORM_TOE_CONN_AG_CTX_BIT0_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_BIT0_SHIFT	0
+#define MSTORM_TOE_CONN_AG_CTX_BIT1_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_BIT1_SHIFT	1
+#define MSTORM_TOE_CONN_AG_CTX_CF0_MASK		0x3
+#define MSTORM_TOE_CONN_AG_CTX_CF0_SHIFT	2
+#define MSTORM_TOE_CONN_AG_CTX_CF1_MASK		0x3
+#define MSTORM_TOE_CONN_AG_CTX_CF1_SHIFT	4
+#define MSTORM_TOE_CONN_AG_CTX_CF2_MASK		0x3
+#define MSTORM_TOE_CONN_AG_CTX_CF2_SHIFT	6
+	u8 flags1;
+#define MSTORM_TOE_CONN_AG_CTX_CF0EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_CF0EN_SHIFT	0
+#define MSTORM_TOE_CONN_AG_CTX_CF1EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_CF1EN_SHIFT	1
+#define MSTORM_TOE_CONN_AG_CTX_CF2EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_CF2EN_SHIFT	2
+#define MSTORM_TOE_CONN_AG_CTX_RULE0EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_RULE0EN_SHIFT	3
+#define MSTORM_TOE_CONN_AG_CTX_RULE1EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_RULE1EN_SHIFT	4
+#define MSTORM_TOE_CONN_AG_CTX_RULE2EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_RULE2EN_SHIFT	5
+#define MSTORM_TOE_CONN_AG_CTX_RULE3EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_RULE3EN_SHIFT	6
+#define MSTORM_TOE_CONN_AG_CTX_RULE4EN_MASK	0x1
+#define MSTORM_TOE_CONN_AG_CTX_RULE4EN_SHIFT	7
+	__le16 word0;
+	__le16 word1;
+	__le32 reg0;
+	__le32 reg1;
+};
+
+/* TOE doorbell data */
+struct toe_db_data {
+	u8 params;
+#define TOE_DB_DATA_DEST_MASK			0x3
+#define TOE_DB_DATA_DEST_SHIFT			0
+#define TOE_DB_DATA_AGG_CMD_MASK		0x3
+#define TOE_DB_DATA_AGG_CMD_SHIFT		2
+#define TOE_DB_DATA_BYPASS_EN_MASK		0x1
+#define TOE_DB_DATA_BYPASS_EN_SHIFT		4
+#define TOE_DB_DATA_RESERVED_MASK		0x1
+#define TOE_DB_DATA_RESERVED_SHIFT		5
+#define TOE_DB_DATA_AGG_VAL_SEL_MASK		0x3
+#define TOE_DB_DATA_AGG_VAL_SEL_SHIFT		6
+	u8 agg_flags;
+	__le16 bd_prod;
+};
+
 /* rdma function init ramrod data */
 struct rdma_close_func_ramrod_data {
 	u8 cnq_start_offset;
@@ -5148,6 +6150,8 @@ enum rdma_event_opcode {
 	RDMA_EVENT_CREATE_SRQ,
 	RDMA_EVENT_MODIFY_SRQ,
 	RDMA_EVENT_DESTROY_SRQ,
+	RDMA_EVENT_START_NAMESPACE_TRACKING,
+	RDMA_EVENT_STOP_NAMESPACE_TRACKING,
 	MAX_RDMA_EVENT_OPCODE
 };
 
@@ -5172,18 +6176,33 @@ struct rdma_init_func_hdr {
 	u8 relaxed_ordering;
 	__le16 first_reg_srq_id;
 	__le32 reg_srq_base_addr;
-	u8 searcher_mode;
-	u8 pvrdma_mode;
+	u8 flags;
+#define RDMA_INIT_FUNC_HDR_SEARCHER_MODE_MASK		0x1
+#define RDMA_INIT_FUNC_HDR_SEARCHER_MODE_SHIFT		0
+#define RDMA_INIT_FUNC_HDR_PVRDMA_MODE_MASK		0x1
+#define RDMA_INIT_FUNC_HDR_PVRDMA_MODE_SHIFT		1
+#define RDMA_INIT_FUNC_HDR_DPT_MODE_MASK		0x1
+#define RDMA_INIT_FUNC_HDR_DPT_MODE_SHIFT		2
+#define RDMA_INIT_FUNC_HDR_RESERVED0_MASK		0x1F
+#define RDMA_INIT_FUNC_HDR_RESERVED0_SHIFT		3
+	u8 dpt_byte_threshold_log;
+	u8 dpt_common_queue_id;
 	u8 max_num_ns_log;
-	u8 reserved;
 };
 
 /* rdma function init ramrod data */
 struct rdma_init_func_ramrod_data {
 	struct rdma_init_func_hdr params_header;
+	struct rdma_cnq_params dptq_params;
 	struct rdma_cnq_params cnq_params[NUM_OF_GLOBAL_QUEUES];
 };
 
+/* rdma namespace tracking ramrod data */
+struct rdma_namespace_tracking_ramrod_data {
+	u8 name_space;
+	u8 reserved[7];
+};
+
 /* RDMA ramrod command IDs */
 enum rdma_ramrod_cmd_id {
 	RDMA_RAMROD_UNUSED,
@@ -5197,6 +6216,8 @@ enum rdma_ramrod_cmd_id {
 	RDMA_RAMROD_CREATE_SRQ,
 	RDMA_RAMROD_MODIFY_SRQ,
 	RDMA_RAMROD_DESTROY_SRQ,
+	RDMA_RAMROD_START_NS_TRACKING,
+	RDMA_RAMROD_STOP_NS_TRACKING,
 	MAX_RDMA_RAMROD_CMD_ID
 };
 
@@ -5918,8 +6939,10 @@ struct roce_create_qp_req_ramrod_data {
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_SHIFT			0
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_VF_ID_VALID_MASK			0x1
 #define ROCE_CREATE_QP_REQ_RAMROD_DATA_VF_ID_VALID_SHIFT		1
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x3F
-#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			2
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_FORCE_LB_MASK			0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_FORCE_LB_SHIFT			2
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x1F
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			3
 	u8 name_space;
 	u8 reserved3[3];
 	__le16 regular_latency_phy_queue;
@@ -5951,8 +6974,10 @@ struct roce_create_qp_resp_ramrod_data {
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG_SHIFT            16
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_VF_ID_VALID_MASK	0x1
 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_VF_ID_VALID_SHIFT	17
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_MASK		0x3FFF
-#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_SHIFT		18
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_FORCE_LB_MASK			0x1
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_FORCE_LB_SHIFT			18
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_MASK			0x1FFF
+#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_SHIFT			19
 	__le16 xrc_domain;
 	u8 max_ird;
 	u8 traffic_class;
@@ -5989,10 +7014,85 @@ struct roce_create_qp_resp_ramrod_data {
 	u8 reserved3[3];
 };
 
+/* RoCE Create Suspended qp requester runtime ramrod data */
+struct roce_create_suspended_qp_req_runtime_ramrod_data {
+	__le32 flags;
+#define ROCE_CREATE_SUSPENDED_QP_REQ_RUNTIME_RAMROD_DATA_ERR_FLG_MASK 0x1
+#define ROCE_CREATE_SUSPENDED_QP_REQ_RUNTIME_RAMROD_DATA_ERR_FLG_SHIFT 0
+#define ROCE_CREATE_SUSPENDED_QP_REQ_RUNTIME_RAMROD_DATA_RESERVED0_MASK \
+								 0x7FFFFFFF
+#define ROCE_CREATE_SUSPENDED_QP_REQ_RUNTIME_RAMROD_DATA_RESERVED0_SHIFT 1
+	__le32 send_msg_psn;
+	__le32 inflight_sends;
+	__le32 ssn;
+};
+
+/* RoCE Create Suspended QP requester ramrod data */
+struct roce_create_suspended_qp_req_ramrod_data {
+	struct roce_create_qp_req_ramrod_data qp_params;
+	struct roce_create_suspended_qp_req_runtime_ramrod_data
+	 qp_runtime_params;
+};
+
+/* RoCE Create Suspended QP responder runtime params */
+struct roce_create_suspended_qp_resp_runtime_params {
+	__le32 flags;
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_ERR_FLG_MASK 0x1
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_ERR_FLG_SHIFT 0
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RDMA_ACTIVE_MASK 0x1
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RDMA_ACTIVE_SHIFT 1
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RESERVED0_MASK 0x3FFFFFFF
+#define ROCE_CREATE_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RESERVED0_SHIFT 2
+	__le32 receive_msg_psn;
+	__le32 inflight_receives;
+	__le32 rmsn;
+	__le32 rdma_key;
+	struct regpair rdma_va;
+	__le32 rdma_length;
+	__le32 num_rdb_entries;
+	__le32 resreved;
+};
+
+/* RoCE RDB array entry */
+struct roce_resp_qp_rdb_entry {
+	struct regpair atomic_data;
+	struct regpair va;
+	__le32 psn;
+	__le32 rkey;
+	__le32 byte_count;
+	u8 op_type;
+	u8 reserved[3];
+};
+
+/* RoCE Create Suspended QP responder runtime ramrod data */
+struct roce_create_suspended_qp_resp_runtime_ramrod_data {
+	struct roce_create_suspended_qp_resp_runtime_params params;
+	struct roce_resp_qp_rdb_entry
+	 rdb_array_entries[RDMA_MAX_IRQ_ELEMS_IN_PAGE];
+};
+
+/* RoCE Create Suspended QP responder ramrod data */
+struct roce_create_suspended_qp_resp_ramrod_data {
+	struct roce_create_qp_resp_ramrod_data
+	 qp_params;
+	struct roce_create_suspended_qp_resp_runtime_ramrod_data
+	 qp_runtime_params;
+};
+
+/* RoCE create ud qp ramrod data */
+struct roce_create_ud_qp_ramrod_data {
+	__le16 local_mac_addr[3];
+	__le16 vlan_id;
+	__le32 src_qp_id;
+	u8 name_space;
+	u8 reserved[3];
+};
+
 /* roce DCQCN received statistics */
 struct roce_dcqcn_received_stats {
 	struct regpair ecn_pkt_rcv;
 	struct regpair cnp_pkt_rcv;
+	struct regpair cnp_pkt_reject;
 };
 
 /* roce DCQCN sent statistics */
@@ -6024,6 +7124,12 @@ struct roce_destroy_qp_resp_ramrod_data {
 	__le32 reserved;
 };
 
+/* RoCE destroy ud qp ramrod data */
+struct roce_destroy_ud_qp_ramrod_data {
+	__le32 src_qp_id;
+	__le32 reserved;
+};
+
 /* roce error statistics */
 struct roce_error_stats {
 	__le32 resp_remote_access_errors;
@@ -6046,13 +7152,21 @@ struct roce_events_stats {
 
 /* roce slow path EQ cmd IDs */
 enum roce_event_opcode {
-	ROCE_EVENT_CREATE_QP = 11,
+	ROCE_EVENT_CREATE_QP = 13,
 	ROCE_EVENT_MODIFY_QP,
 	ROCE_EVENT_QUERY_QP,
 	ROCE_EVENT_DESTROY_QP,
 	ROCE_EVENT_CREATE_UD_QP,
 	ROCE_EVENT_DESTROY_UD_QP,
 	ROCE_EVENT_FUNC_UPDATE,
+	ROCE_EVENT_SUSPEND_QP,
+	ROCE_EVENT_QUERY_SUSPENDED_QP,
+	ROCE_EVENT_CREATE_SUSPENDED_QP,
+	ROCE_EVENT_RESUME_QP,
+	ROCE_EVENT_SUSPEND_UD_QP,
+	ROCE_EVENT_RESUME_UD_QP,
+	ROCE_EVENT_CREATE_SUSPENDED_UD_QP,
+	ROCE_EVENT_FLUSH_DPT_QP,
 	MAX_ROCE_EVENT_OPCODE
 };
 
@@ -6080,6 +7194,18 @@ struct roce_init_func_ramrod_data {
 	struct roce_init_func_params roce;
 };
 
+/* roce_ll2_cqe_data */
+struct roce_ll2_cqe_data {
+	u8 name_space;
+	u8 flags;
+#define ROCE_LL2_CQE_DATA_QP_SUSPENDED_MASK	0x1
+#define ROCE_LL2_CQE_DATA_QP_SUSPENDED_SHIFT	0
+#define ROCE_LL2_CQE_DATA_RESERVED0_MASK	0x7F
+#define ROCE_LL2_CQE_DATA_RESERVED0_SHIFT	1
+	u8 reserved1[2];
+	__le32 cid;
+};
+
 /* roce modify qp requester ramrod data */
 struct roce_modify_qp_req_ramrod_data {
 	__le16 flags;
@@ -6107,8 +7233,10 @@ struct roce_modify_qp_req_ramrod_data {
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PRI_SHIFT			10
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUE_FLG_MASK		0x1
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_PHYSICAL_QUEUE_FLG_SHIFT		13
-#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_MASK			0x3
-#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_SHIFT			14
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_FORCE_LB_MASK			0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_FORCE_LB_SHIFT			14
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_MASK			0x1
+#define ROCE_MODIFY_QP_REQ_RAMROD_DATA_RESERVED1_SHIFT			15
 	u8 fields;
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_MASK	0xF
 #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_SHIFT	0
@@ -6154,8 +7282,10 @@ struct roce_modify_qp_resp_ramrod_data {
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG_SHIFT		9
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_MASK		0x1
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PHYSICAL_QUEUE_FLG_SHIFT	10
-#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_MASK			0x1F
-#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_SHIFT			11
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_FORCE_LB_MASK			0x1
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_FORCE_LB_SHIFT			11
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_MASK			0xF
+#define ROCE_MODIFY_QP_RESP_RAMROD_DATA_RESERVED1_SHIFT			12
 	u8 fields;
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_MASK		0x7
 #define ROCE_MODIFY_QP_RESP_RAMROD_DATA_PRI_SHIFT		0
@@ -6206,18 +7336,84 @@ struct roce_query_qp_resp_ramrod_data {
 	struct regpair output_params_addr;
 };
 
+/* RoCE Query Suspended QP requester output params */
+struct roce_query_suspended_qp_req_output_params {
+	__le32 psn;
+	__le32 flags;
+#define ROCE_QUERY_SUSPENDED_QP_REQ_OUTPUT_PARAMS_ERR_FLG_MASK		0x1
+#define ROCE_QUERY_SUSPENDED_QP_REQ_OUTPUT_PARAMS_ERR_FLG_SHIFT		0
+#define ROCE_QUERY_SUSPENDED_QP_REQ_OUTPUT_PARAMS_RESERVED0_MASK 0x7FFFFFFF
+#define ROCE_QUERY_SUSPENDED_QP_REQ_OUTPUT_PARAMS_RESERVED0_SHIFT	1
+	__le32 send_msg_psn;
+	__le32 inflight_sends;
+	__le32 ssn;
+	__le32 reserved;
+};
+
+/* RoCE Query Suspended QP requester ramrod data */
+struct roce_query_suspended_qp_req_ramrod_data {
+	struct regpair output_params_addr;
+};
+
+/* RoCE Query Suspended QP responder runtime params */
+struct roce_query_suspended_qp_resp_runtime_params {
+	__le32 psn;
+	__le32 flags;
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_ERR_FLG_MASK 0x1
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_ERR_FLG_SHIFT 0
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RDMA_ACTIVE_MASK 0x1
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RDMA_ACTIVE_SHIFT 1
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RESERVED0_MASK 0x3FFFFFFF
+#define ROCE_QUERY_SUSPENDED_QP_RESP_RUNTIME_PARAMS_RESERVED0_SHIFT 2
+	__le32 receive_msg_psn;
+	__le32 inflight_receives;
+	__le32 rmsn;
+	__le32 rdma_key;
+	struct regpair rdma_va;
+	__le32 rdma_length;
+	__le32 num_rdb_entries;
+};
+
+/* RoCE Query Suspended QP responder output params */
+struct roce_query_suspended_qp_resp_output_params {
+	struct roce_query_suspended_qp_resp_runtime_params runtime_params;
+	struct roce_resp_qp_rdb_entry
+	 rdb_array_entries[RDMA_MAX_IRQ_ELEMS_IN_PAGE];
+};
+
+/* RoCE Query Suspended QP responder ramrod data */
+struct roce_query_suspended_qp_resp_ramrod_data {
+	struct regpair output_params_addr;
+};
+
 /* ROCE ramrod command IDs */
 enum roce_ramrod_cmd_id {
-	ROCE_RAMROD_CREATE_QP = 11,
+	ROCE_RAMROD_CREATE_QP = 13,
 	ROCE_RAMROD_MODIFY_QP,
 	ROCE_RAMROD_QUERY_QP,
 	ROCE_RAMROD_DESTROY_QP,
 	ROCE_RAMROD_CREATE_UD_QP,
 	ROCE_RAMROD_DESTROY_UD_QP,
 	ROCE_RAMROD_FUNC_UPDATE,
+	ROCE_RAMROD_SUSPEND_QP,
+	ROCE_RAMROD_QUERY_SUSPENDED_QP,
+	ROCE_RAMROD_CREATE_SUSPENDED_QP,
+	ROCE_RAMROD_RESUME_QP,
+	ROCE_RAMROD_SUSPEND_UD_QP,
+	ROCE_RAMROD_RESUME_UD_QP,
+	ROCE_RAMROD_CREATE_SUSPENDED_UD_QP,
+	ROCE_RAMROD_FLUSH_DPT_QP,
 	MAX_ROCE_RAMROD_CMD_ID
 };
 
+/* ROCE RDB array entry type */
+enum roce_resp_qp_rdb_entry_type {
+	ROCE_QP_RDB_ENTRY_RDMA_RESPONSE = 0,
+	ROCE_QP_RDB_ENTRY_ATOMIC_RESPONSE = 1,
+	ROCE_QP_RDB_ENTRY_INVALID = 2,
+	MAX_ROCE_RESP_QP_RDB_ENTRY_TYPE
+};
+
 /* RoCE func init ramrod data */
 struct roce_update_func_params {
 	u8 cnp_vlan_priority;
@@ -7968,8 +9164,8 @@ enum iwarp_eqe_async_opcode {
 	IWARP_EVENT_TYPE_ASYNC_EXCEPTION_DETECTED,
 	IWARP_EVENT_TYPE_ASYNC_QP_IN_ERROR_STATE,
 	IWARP_EVENT_TYPE_ASYNC_CQ_OVERFLOW,
-	IWARP_EVENT_TYPE_ASYNC_SRQ_EMPTY,
 	IWARP_EVENT_TYPE_ASYNC_SRQ_LIMIT,
+	IWARP_EVENT_TYPE_ASYNC_SRQ_EMPTY,
 	MAX_IWARP_EQE_ASYNC_OPCODE
 };
 
@@ -7987,8 +9183,7 @@ struct iwarp_eqe_data_tcp_async_completion {
 
 /* iWARP completion queue types */
 enum iwarp_eqe_sync_opcode {
-	IWARP_EVENT_TYPE_TCP_OFFLOAD =
-	11,
+	IWARP_EVENT_TYPE_TCP_OFFLOAD = 13,
 	IWARP_EVENT_TYPE_MPA_OFFLOAD,
 	IWARP_EVENT_TYPE_MPA_OFFLOAD_SEND_RTR,
 	IWARP_EVENT_TYPE_CREATE_QP,
@@ -8020,8 +9215,6 @@ enum iwarp_fw_return_code {
 	IWARP_EXCEPTION_DETECTED_LLP_RESET,
 	IWARP_EXCEPTION_DETECTED_IRQ_FULL,
 	IWARP_EXCEPTION_DETECTED_RQ_EMPTY,
-	IWARP_EXCEPTION_DETECTED_SRQ_EMPTY,
-	IWARP_EXCEPTION_DETECTED_SRQ_LIMIT,
 	IWARP_EXCEPTION_DETECTED_LLP_TIMEOUT,
 	IWARP_EXCEPTION_DETECTED_REMOTE_PROTECTION_ERROR,
 	IWARP_EXCEPTION_DETECTED_CQ_OVERFLOW,
@@ -8115,9 +9308,10 @@ struct iwarp_mpa_offload_ramrod_data {
 	struct regpair async_eqe_output_buf;
 	struct regpair handle_for_async;
 	struct regpair shared_queue_addr;
+	__le32 additional_setup_time;
 	__le16 rcv_wnd;
 	u8 stats_counter_id;
-	u8 reserved3[13];
+	u8 reserved3[9];
 };
 
 /* iWARP TCP connection offload params passed by driver to FW */
@@ -8125,11 +9319,13 @@ struct iwarp_offload_params {
 	struct mpa_ulp_buffer incoming_ulp_buffer;
 	struct regpair async_eqe_output_buf;
 	struct regpair handle_for_async;
+	__le32 additional_setup_time;
 	__le16 physical_q0;
 	__le16 physical_q1;
 	u8 stats_counter_id;
 	u8 mpa_mode;
-	u8 reserved[10];
+	u8 src_vport_id;
+	u8 reserved[5];
 };
 
 /* iWARP query QP output params */
@@ -8149,7 +9345,7 @@ struct iwarp_query_qp_ramrod_data {
 
 /* iWARP Ramrod Command IDs */
 enum iwarp_ramrod_cmd_id {
-	IWARP_RAMROD_CMD_ID_TCP_OFFLOAD = 11,
+	IWARP_RAMROD_CMD_ID_TCP_OFFLOAD = 13,
 	IWARP_RAMROD_CMD_ID_MPA_OFFLOAD,
 	IWARP_RAMROD_CMD_ID_MPA_OFFLOAD_SEND_RTR,
 	IWARP_RAMROD_CMD_ID_CREATE_QP,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 7dad91049cc0..fb90ad4a9d1f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -920,7 +920,8 @@ int qed_init_vport_wfq(struct qed_hwfn *p_hwfn,
 }
 
 int qed_init_global_rl(struct qed_hwfn *p_hwfn,
-		       struct qed_ptt *p_ptt, u16 rl_id, u32 rate_limit)
+		       struct qed_ptt *p_ptt, u16 rl_id, u32 rate_limit,
+		       enum init_qm_rl_type vport_rl_type)
 {
 	u32 inc_val;
 
@@ -1645,7 +1646,7 @@ struct phys_mem_desc *qed_fw_overlay_mem_alloc(struct qed_hwfn *p_hwfn,
 
 	/* If memory allocation has failed, free all allocated memory */
 	if (buf_offset < buf_size) {
-		qed_fw_overlay_mem_free(p_hwfn, allocated_mem);
+		qed_fw_overlay_mem_free(p_hwfn, &allocated_mem);
 		return NULL;
 	}
 
@@ -1679,16 +1680,16 @@ void qed_fw_overlay_init_ram(struct qed_hwfn *p_hwfn,
 }
 
 void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
-			     struct phys_mem_desc *fw_overlay_mem)
+			     struct phys_mem_desc **fw_overlay_mem)
 {
 	u8 storm_id;
 
-	if (!fw_overlay_mem)
+	if (!fw_overlay_mem || !(*fw_overlay_mem))
 		return;
 
 	for (storm_id = 0; storm_id < NUM_STORMS; storm_id++) {
 		struct phys_mem_desc *storm_mem_desc =
-		    (struct phys_mem_desc *)fw_overlay_mem + storm_id;
+		    (struct phys_mem_desc *)*fw_overlay_mem + storm_id;
 
 		/* Free Storm's physical memory */
 		if (storm_mem_desc->virt_addr)
@@ -1699,5 +1700,6 @@ void qed_fw_overlay_mem_free(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Free allocated virtual memory */
-	kfree(fw_overlay_mem);
+	kfree(*fw_overlay_mem);
+	*fw_overlay_mem = NULL;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 991bf4313da6..9b3850712797 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -38,7 +38,6 @@
 #include "qed_sp.h"
 #include "qed_sriov.h"
 
-
 #define QED_MAX_SGES_NUM 16
 #define CRC32_POLY 0x1edc6f41
 
@@ -1112,7 +1111,6 @@ qed_eth_pf_tx_queue_start(struct qed_hwfn *p_hwfn,
 {
 	int rc;
 
-
 	rc = qed_eth_txq_start_ramrod(p_hwfn, p_cid,
 				      pbl_addr, pbl_size,
 				      qed_get_cm_pq_idx_mcos(p_hwfn, tc));
@@ -2011,7 +2009,7 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
 				struct qed_spq_comp_cb *p_cb,
 				struct qed_ntuple_filter_params *p_params)
 {
-	struct rx_update_gft_filter_data *p_ramrod = NULL;
+	struct rx_update_gft_filter_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
 	u16 abs_rx_q_id = 0;
@@ -2032,7 +2030,7 @@ qed_configure_rfs_ntuple_filter(struct qed_hwfn *p_hwfn,
 	}
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
-				 ETH_RAMROD_GFT_UPDATE_FILTER,
+				 ETH_RAMROD_RX_UPDATE_GFT_FILTER,
 				 PROTOCOLID_ETH, &init_data);
 	if (rc)
 		return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index 2ab7f3f0cf6c..a538cf478c14 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -146,7 +146,6 @@ struct qed_sp_vport_start_params {
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 			   struct qed_sp_vport_start_params *p_params);
 
-
 struct qed_filter_accept_flags {
 	u8	update_rx_mode_config;
 	u8	update_tx_mode_config;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index c5a38f3c92b0..4fb02a5579ee 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -23,9 +23,9 @@ enum spq_mode {
 };
 
 struct qed_spq_comp_cb {
-	void	(*function)(struct qed_hwfn *,
-			    void *,
-			    union event_ring_data *,
+	void	(*function)(struct qed_hwfn *p_hwfn,
+			    void *cookie,
+			    union event_ring_data *data,
 			    u8 fw_return_code);
 	void	*cookie;
 };
@@ -53,7 +53,7 @@ union ramrod_data {
 	struct tx_queue_stop_ramrod_data tx_queue_stop;
 	struct vport_start_ramrod_data vport_start;
 	struct vport_stop_ramrod_data vport_stop;
-	struct rx_update_gft_filter_data rx_update_gft;
+	struct rx_update_gft_filter_ramrod_data rx_update_gft;
 	struct vport_update_ramrod_data vport_update;
 	struct core_rx_start_ramrod_data core_rx_queue_start;
 	struct core_rx_stop_ramrod_data core_rx_queue_stop;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index b4ed54ffef9b..648176dfb871 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -369,8 +369,12 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 		       qed_chain_get_pbl_phys(&p_hwfn->p_eq->chain));
 	page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
 	p_ramrod->event_ring_num_pages = page_cnt;
-	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
+
+	/* Place consolidation queue address in ramrod */
+	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_base_addr,
 		       qed_chain_get_pbl_phys(&p_hwfn->p_consq->chain));
+	page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_consq->chain);
+	p_ramrod->consolid_q_num_pages = page_cnt;
 
 	qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
@@ -401,8 +405,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 	if (p_hwfn->cdev->p_iov_info) {
 		struct qed_hw_sriov_info *p_iov = p_hwfn->cdev->p_iov_info;
 
-		p_ramrod->base_vf_id = (u8) p_iov->first_vf_in_pf;
-		p_ramrod->num_vfs = (u8) p_iov->total_vfs;
+		p_ramrod->base_vf_id = (u8)p_iov->first_vf_in_pf;
+		p_ramrod->num_vfs = (u8)p_iov->total_vfs;
 	}
 	p_ramrod->hsi_fp_ver.major_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MAJOR;
 	p_ramrod->hsi_fp_ver.minor_ver_arr[ETH_VER_KEY] = ETH_HSI_VER_MINOR;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 8bef53ca7597..65dbc08196b7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -32,8 +32,8 @@
 #include "qed_rdma.h"
 
 /***************************************************************************
-* Structures & Definitions
-***************************************************************************/
+ * Structures & Definitions
+ ***************************************************************************/
 
 #define SPQ_HIGH_PRI_RESERVE_DEFAULT    (1)
 
@@ -43,8 +43,8 @@
 #define SPQ_BLOCK_SLEEP_MS              (5)
 
 /***************************************************************************
-* Blocking Imp. (BLOCK/EBLOCK mode)
-***************************************************************************/
+ * Blocking Imp. (BLOCK/EBLOCK mode)
+ ***************************************************************************/
 static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
 				void *cookie,
 				union event_ring_data *data, u8 fw_return_code)
@@ -150,8 +150,8 @@ err:
 }
 
 /***************************************************************************
-* SPQ entries inner API
-***************************************************************************/
+ * SPQ entries inner API
+ ***************************************************************************/
 static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
 			      struct qed_spq_entry *p_ent)
 {
@@ -185,8 +185,8 @@ static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn,
 }
 
 /***************************************************************************
-* HSI access
-***************************************************************************/
+ * HSI access
+ ***************************************************************************/
 static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 				  struct qed_spq *p_spq)
 {
@@ -218,13 +218,10 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn,
 	physical_q = qed_get_cm_pq_idx(p_hwfn, PQ_FLAGS_LB);
 	p_cxt->xstorm_ag_context.physical_q0 = cpu_to_le16(physical_q);
 
-	p_cxt->xstorm_st_context.spq_base_lo =
+	p_cxt->xstorm_st_context.spq_base_addr.lo =
 		DMA_LO_LE(p_spq->chain.p_phys_addr);
-	p_cxt->xstorm_st_context.spq_base_hi =
+	p_cxt->xstorm_st_context.spq_base_addr.hi =
 		DMA_HI_LE(p_spq->chain.p_phys_addr);
-
-	DMA_REGPAIR_LE(p_cxt->xstorm_st_context.consolid_base_addr,
-		       p_hwfn->p_consq->chain.p_phys_addr);
 }
 
 static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
@@ -266,8 +263,8 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn,
 }
 
 /***************************************************************************
-* Asynchronous events
-***************************************************************************/
+ * Asynchronous events
+ ***************************************************************************/
 static int
 qed_async_event_completion(struct qed_hwfn *p_hwfn,
 			   struct event_ring_entry *p_eqe)
@@ -312,8 +309,8 @@ qed_spq_unregister_async_cb(struct qed_hwfn *p_hwfn,
 }
 
 /***************************************************************************
-* EQ API
-***************************************************************************/
+ * EQ API
+ ***************************************************************************/
 void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod)
 {
 	u32 addr = GTT_BAR0_MAP_REG_USDM_RAM +
@@ -434,8 +431,8 @@ void qed_eq_free(struct qed_hwfn *p_hwfn)
 }
 
 /***************************************************************************
-* CQE API - manipulate EQ functionality
-***************************************************************************/
+ * CQE API - manipulate EQ functionality
+ ***************************************************************************/
 static int qed_cqe_completion(struct qed_hwfn *p_hwfn,
 			      struct eth_slow_path_rx_cqe *cqe,
 			      enum protocol_type protocol)
@@ -465,8 +462,8 @@ int qed_eth_cqe_completion(struct qed_hwfn *p_hwfn,
 }
 
 /***************************************************************************
-* Slow hwfn Queue (spq)
-***************************************************************************/
+ * Slow hwfn Queue (spq)
+ ***************************************************************************/
 void qed_spq_setup(struct qed_hwfn *p_hwfn)
 {
 	struct qed_spq *p_spq = p_hwfn->p_spq;
@@ -549,7 +546,7 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn)
 	int ret;
 
 	/* SPQ struct */
-	p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL);
+	p_spq = kzalloc(sizeof(*p_spq), GFP_KERNEL);
 	if (!p_spq)
 		return -ENOMEM;
 
@@ -677,7 +674,6 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 	struct qed_spq *p_spq = p_hwfn->p_spq;
 
 	if (p_ent->queue == &p_spq->unlimited_pending) {
-
 		if (list_empty(&p_spq->free_pool)) {
 			list_add_tail(&p_ent->list, &p_spq->unlimited_pending);
 			p_spq->unlimited_pending_count++;
@@ -726,8 +722,8 @@ static int qed_spq_add_entry(struct qed_hwfn *p_hwfn,
 }
 
 /***************************************************************************
-* Accessor
-***************************************************************************/
+ * Accessor
+ ***************************************************************************/
 u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn)
 {
 	if (!p_hwfn->p_spq)
@@ -736,8 +732,8 @@ u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn)
 }
 
 /***************************************************************************
-* Posting new Ramrods
-***************************************************************************/
+ * Posting new Ramrods
+ ***************************************************************************/
 static int qed_spq_post_list(struct qed_hwfn *p_hwfn,
 			     struct list_head *head, u32 keep_reserve)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2a67b1308fe0..9556a2c4d3a6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -20,12 +20,13 @@
 #include "qed_sp.h"
 #include "qed_sriov.h"
 #include "qed_vf.h"
-static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
-			       u8 opcode,
-			       __le16 echo,
-			       union event_ring_data *data, u8 fw_return_code);
 static int qed_iov_bulletin_set_mac(struct qed_hwfn *p_hwfn, u8 *mac, int vfid);
 
+static u16 qed_vf_from_entity_id(__le16 entity_id)
+{
+	return le16_to_cpu(entity_id) - MAX_NUM_PFS;
+}
+
 static u8 qed_vf_calculate_legacy(struct qed_vf_info *p_vf)
 {
 	u8 legacy = 0;
@@ -170,8 +171,8 @@ static struct qed_vf_info *qed_iov_get_vf_info(struct qed_hwfn *p_hwfn,
 				  b_enabled_only, false))
 		vf = &p_hwfn->pf_iov_info->vfs_array[relative_vf_id];
 	else
-		DP_ERR(p_hwfn, "qed_iov_get_vf_info: VF[%d] is not enabled\n",
-		       relative_vf_id);
+		DP_ERR(p_hwfn, "%s: VF[%d] is not enabled\n",
+		       __func__, relative_vf_id);
 
 	return vf;
 }
@@ -309,7 +310,7 @@ static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn,
 	struct qed_dmae_params params;
 	struct qed_vf_info *p_vf;
 
-	p_vf = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	p_vf = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!p_vf)
 		return -EINVAL;
 
@@ -421,7 +422,7 @@ static void qed_iov_setup_vfdb(struct qed_hwfn *p_hwfn)
 	bulletin_p = p_iov_info->bulletins_phys;
 	if (!p_req_virt_addr || !p_reply_virt_addr || !p_bulletin_virt) {
 		DP_ERR(p_hwfn,
-		       "qed_iov_setup_vfdb called without allocating mem first\n");
+		       "%s called without allocating mem first\n", __func__);
 		return;
 	}
 
@@ -465,7 +466,7 @@ static int qed_iov_allocate_vfdb(struct qed_hwfn *p_hwfn)
 	num_vfs = p_hwfn->cdev->p_iov_info->total_vfs;
 
 	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-		   "qed_iov_allocate_vfdb for %d VFs\n", num_vfs);
+		   "%s for %d VFs\n", __func__, num_vfs);
 
 	/* Allocate PF Mailbox buffer (per-VF) */
 	p_iov_info->mbx_msg_size = sizeof(union vfpf_tlvs) * num_vfs;
@@ -501,10 +502,10 @@ static int qed_iov_allocate_vfdb(struct qed_hwfn *p_hwfn)
 		   QED_MSG_IOV,
 		   "PF's Requests mailbox [%p virt 0x%llx phys],  Response mailbox [%p virt 0x%llx phys] Bulletins [%p virt 0x%llx phys]\n",
 		   p_iov_info->mbx_msg_virt_addr,
-		   (u64) p_iov_info->mbx_msg_phys_addr,
+		   (u64)p_iov_info->mbx_msg_phys_addr,
 		   p_iov_info->mbx_reply_virt_addr,
-		   (u64) p_iov_info->mbx_reply_phys_addr,
-		   p_iov_info->p_bulletins, (u64) p_iov_info->bulletins_phys);
+		   (u64)p_iov_info->mbx_reply_phys_addr,
+		   p_iov_info->p_bulletins, (u64)p_iov_info->bulletins_phys);
 
 	return 0;
 }
@@ -609,7 +610,7 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
 	if (rc)
 		return rc;
 
-	/* We want PF IOV to be synonemous with the existance of p_iov_info;
+	/* We want PF IOV to be synonemous with the existence of p_iov_info;
 	 * In case the capability is published but there are no VFs, simply
 	 * de-allocate the struct.
 	 */
@@ -715,12 +716,12 @@ static void qed_iov_vf_igu_reset(struct qed_hwfn *p_hwfn,
 	int i;
 
 	/* Set VF masks and configuration - pretend */
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) vf->concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)vf->concrete_fid);
 
 	qed_wr(p_hwfn, p_ptt, IGU_REG_STATISTIC_NUM_VF_MSG_SENT, 0);
 
 	/* unpretend */
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_hwfn->hw_info.concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)p_hwfn->hw_info.concrete_fid);
 
 	/* iterate over all queues, clear sb consumer */
 	for (i = 0; i < vf->num_sbs; i++)
@@ -735,7 +736,7 @@ static void qed_iov_vf_igu_set_int(struct qed_hwfn *p_hwfn,
 {
 	u32 igu_vf_conf;
 
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) vf->concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)vf->concrete_fid);
 
 	igu_vf_conf = qed_rd(p_hwfn, p_ptt, IGU_REG_VF_CONFIGURATION);
 
@@ -747,7 +748,7 @@ static void qed_iov_vf_igu_set_int(struct qed_hwfn *p_hwfn,
 	qed_wr(p_hwfn, p_ptt, IGU_REG_VF_CONFIGURATION, igu_vf_conf);
 
 	/* unpretend */
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_hwfn->hw_info.concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)p_hwfn->hw_info.concrete_fid);
 }
 
 static int
@@ -808,7 +809,7 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
 	if (rc)
 		return rc;
 
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) vf->concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)vf->concrete_fid);
 
 	SET_FIELD(igu_vf_conf, IGU_VF_CONF_PARENT, p_hwfn->rel_pf_id);
 	STORE_RT_REG(p_hwfn, IGU_REG_VF_CONFIGURATION_RT_OFFSET, igu_vf_conf);
@@ -817,7 +818,7 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
 		     p_hwfn->hw_info.hw_mode);
 
 	/* unpretend */
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_hwfn->hw_info.concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)p_hwfn->hw_info.concrete_fid);
 
 	vf->state = VF_FREE;
 
@@ -905,7 +906,7 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 				  p_block->igu_sb_id * sizeof(u64), 2, NULL);
 	}
 
-	vf->num_sbs = (u8) num_rx_queues;
+	vf->num_sbs = (u8)num_rx_queues;
 
 	return vf->num_sbs;
 }
@@ -989,7 +990,7 @@ static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
 
 	vf = qed_iov_get_vf_info(p_hwfn, p_params->rel_vf_id, false);
 	if (!vf) {
-		DP_ERR(p_hwfn, "qed_iov_init_hw_for_vf : vf is NULL\n");
+		DP_ERR(p_hwfn, "%s : vf is NULL\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1093,7 +1094,7 @@ static int qed_iov_release_hw_for_vf(struct qed_hwfn *p_hwfn,
 
 	vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, true);
 	if (!vf) {
-		DP_ERR(p_hwfn, "qed_iov_release_hw_for_vf : vf is NULL\n");
+		DP_ERR(p_hwfn, "%s : vf is NULL\n", __func__);
 		return -EINVAL;
 	}
 
@@ -1546,7 +1547,7 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn,
 	memset(resp, 0, sizeof(*resp));
 
 	/* Write the PF version so that VF would know which version
-	 * is supported - might be later overriden. This guarantees that
+	 * is supported - might be later overridden. This guarantees that
 	 * VF could recognize legacy PF based on lack of versions in reply.
 	 */
 	pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR;
@@ -1898,7 +1899,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
 	int sb_id;
 	int rc;
 
-	vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vf->relative_vf_id, true);
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vf->relative_vf_id, true);
 	if (!vf_info) {
 		DP_NOTICE(p_hwfn->cdev,
 			  "Failed to get VF info, invalid vfid [%d]\n",
@@ -1958,7 +1959,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn,
 	rc = qed_sp_eth_vport_start(p_hwfn, &params);
 	if (rc) {
 		DP_ERR(p_hwfn,
-		       "qed_iov_vf_mbx_start_vport returned error %d\n", rc);
+		       "%s returned error %d\n", __func__, rc);
 		status = PFVF_STATUS_FAILURE;
 	} else {
 		vf->vport_instance++;
@@ -1994,8 +1995,8 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn,
 
 	rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id);
 	if (rc) {
-		DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n",
-		       rc);
+		DP_ERR(p_hwfn, "%s returned error %d\n",
+		       __func__, rc);
 		status = PFVF_STATUS_FAILURE;
 	}
 
@@ -3031,7 +3032,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn,
 		goto out;
 	}
 	p_rss_params = vzalloc(sizeof(*p_rss_params));
-	if (p_rss_params == NULL) {
+	if (!p_rss_params) {
 		status = PFVF_STATUS_FAILURE;
 		goto out;
 	}
@@ -3551,6 +3552,7 @@ out:
 	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_COALESCE_UPDATE,
 			     sizeof(struct pfvf_def_resp_tlv), status);
 }
+
 static int
 qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn,
 			 struct qed_vf_info *p_vf, struct qed_ptt *p_ptt)
@@ -3558,7 +3560,7 @@ qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn,
 	int cnt;
 	u32 val;
 
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_vf->concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)p_vf->concrete_fid);
 
 	for (cnt = 0; cnt < 50; cnt++) {
 		val = qed_rd(p_hwfn, p_ptt, DORQ_REG_VF_USAGE_CNT);
@@ -3566,7 +3568,7 @@ qed_iov_vf_flr_poll_dorq(struct qed_hwfn *p_hwfn,
 			break;
 		msleep(20);
 	}
-	qed_fid_pretend(p_hwfn, p_ptt, (u16) p_hwfn->hw_info.concrete_fid);
+	qed_fid_pretend(p_hwfn, p_ptt, (u16)p_hwfn->hw_info.concrete_fid);
 
 	if (cnt == 50) {
 		DP_ERR(p_hwfn,
@@ -3843,7 +3845,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 	struct qed_iov_vf_mbx *mbx;
 	struct qed_vf_info *p_vf;
 
-	p_vf = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	p_vf = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!p_vf)
 		return;
 
@@ -3980,7 +3982,7 @@ static void qed_iov_pf_get_pending_events(struct qed_hwfn *p_hwfn, u64 *events)
 static struct qed_vf_info *qed_sriov_get_vf_from_absid(struct qed_hwfn *p_hwfn,
 						       u16 abs_vfid)
 {
-	u8 min = (u8) p_hwfn->cdev->p_iov_info->first_vf_in_pf;
+	u8 min = (u8)p_hwfn->cdev->p_iov_info->first_vf_in_pf;
 
 	if (!_qed_iov_pf_sanity_check(p_hwfn, (int)abs_vfid - min, false)) {
 		DP_VERBOSE(p_hwfn,
@@ -3990,7 +3992,7 @@ static struct qed_vf_info *qed_sriov_get_vf_from_absid(struct qed_hwfn *p_hwfn,
 		return NULL;
 	}
 
-	return &p_hwfn->pf_iov_info->vfs_array[(u8) abs_vfid - min];
+	return &p_hwfn->pf_iov_info->vfs_array[(u8)abs_vfid - min];
 }
 
 static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
@@ -4014,13 +4016,13 @@ static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
-static void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
-				     struct malicious_vf_eqe_data *p_data)
+void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
+			      struct fw_err_data *p_data)
 {
 	struct qed_vf_info *p_vf;
 
-	p_vf = qed_sriov_get_vf_from_absid(p_hwfn, p_data->vf_id);
-
+	p_vf = qed_sriov_get_vf_from_absid(p_hwfn, qed_vf_from_entity_id
+					   (p_data->entity_id));
 	if (!p_vf)
 		return;
 
@@ -4037,16 +4039,13 @@ static void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
 	}
 }
 
-static int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo,
-			       union event_ring_data *data, u8 fw_return_code)
+int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo,
+			union event_ring_data *data, u8 fw_return_code)
 {
 	switch (opcode) {
 	case COMMON_EVENT_VF_PF_CHANNEL:
 		return qed_sriov_vfpf_msg(p_hwfn, le16_to_cpu(echo),
 					  &data->vf_pf_channel.msg_addr);
-	case COMMON_EVENT_MALICIOUS_VF:
-		qed_sriov_vfpf_malicious(p_hwfn, &data->malicious_vf);
-		return 0;
 	default:
 		DP_INFO(p_hwfn->cdev, "Unknown sriov eqe event 0x%02x\n",
 			opcode);
@@ -4076,7 +4075,7 @@ static int qed_iov_copy_vf_msg(struct qed_hwfn *p_hwfn, struct qed_ptt *ptt,
 	struct qed_dmae_params params;
 	struct qed_vf_info *vf_info;
 
-	vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!vf_info)
 		return -EINVAL;
 
@@ -4177,7 +4176,7 @@ static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
 	struct qed_vf_info *vf_info;
 	u64 feature;
 
-	vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!vf_info) {
 		DP_NOTICE(p_hwfn->cdev,
 			  "Can not set forced MAC, invalid vfid [%d]\n", vfid);
@@ -4227,7 +4226,7 @@ static bool qed_iov_vf_has_vport_instance(struct qed_hwfn *p_hwfn, int vfid)
 {
 	struct qed_vf_info *p_vf_info;
 
-	p_vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	p_vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!p_vf_info)
 		return false;
 
@@ -4238,7 +4237,7 @@ static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid)
 {
 	struct qed_vf_info *p_vf_info;
 
-	p_vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	p_vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!p_vf_info)
 		return true;
 
@@ -4249,7 +4248,7 @@ static bool qed_iov_spoofchk_get(struct qed_hwfn *p_hwfn, int vfid)
 {
 	struct qed_vf_info *vf_info;
 
-	vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!vf_info)
 		return false;
 
@@ -4267,7 +4266,7 @@ static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val)
 		goto out;
 	}
 
-	vf = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	vf = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!vf)
 		goto out;
 
@@ -4346,7 +4345,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	rl_id = abs_vp_id;	/* The "rl_id" is set as the "vport_id" */
-	return qed_init_global_rl(p_hwfn, p_ptt, rl_id, (u32)val);
+	return qed_init_global_rl(p_hwfn, p_ptt, rl_id, (u32)val,
+				  QM_RL_TYPE_NORMAL);
 }
 
 static int
@@ -4377,7 +4377,7 @@ static int qed_iov_get_vf_min_rate(struct qed_hwfn *p_hwfn, int vfid)
 	struct qed_wfq_data *vf_vp_wfq;
 	struct qed_vf_info *vf_info;
 
-	vf_info = qed_iov_get_vf_info(p_hwfn, (u16) vfid, true);
+	vf_info = qed_iov_get_vf_info(p_hwfn, (u16)vfid, true);
 	if (!vf_info)
 		return 0;
 
@@ -4396,8 +4396,10 @@ static int qed_iov_get_vf_min_rate(struct qed_hwfn *p_hwfn, int vfid)
  */
 void qed_schedule_iov(struct qed_hwfn *hwfn, enum qed_iov_wq_flag flag)
 {
+	/* Memory barrier for setting atomic bit */
 	smp_mb__before_atomic();
 	set_bit(flag, &hwfn->iov_task_flags);
+	/* Memory barrier after setting atomic bit */
 	smp_mb__after_atomic();
 	DP_VERBOSE(hwfn, QED_MSG_IOV, "Scheduling iov task [Flag: %d]\n", flag);
 	queue_delayed_work(hwfn->iov_wq, &hwfn->iov_task, 0);
@@ -4408,8 +4410,8 @@ void qed_vf_start_iov_wq(struct qed_dev *cdev)
 	int i;
 
 	for_each_hwfn(cdev, i)
-	    queue_delayed_work(cdev->hwfns[i].iov_wq,
-			       &cdev->hwfns[i].iov_task, 0);
+		queue_delayed_work(cdev->hwfns[i].iov_wq,
+				   &cdev->hwfns[i].iov_task, 0);
 }
 
 int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
@@ -4417,8 +4419,8 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	int i, j;
 
 	for_each_hwfn(cdev, i)
-	    if (cdev->hwfns[i].iov_wq)
-		flush_workqueue(cdev->hwfns[i].iov_wq);
+		if (cdev->hwfns[i].iov_wq)
+			flush_workqueue(cdev->hwfns[i].iov_wq);
 
 	/* Mark VFs for disablement */
 	qed_iov_set_vfs_to_disable(cdev, true);
@@ -5011,7 +5013,7 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn)
 	}
 
 	qed_for_each_vf(hwfn, i)
-	    qed_iov_post_vf_bulletin(hwfn, i, ptt);
+		qed_iov_post_vf_bulletin(hwfn, i, ptt);
 
 	qed_ptt_release(hwfn, ptt);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 9f81295c6f45..1edf9c44dc67 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -142,7 +142,7 @@ struct qed_vf_queue {
 
 enum vf_state {
 	VF_FREE = 0,		/* VF ready to be acquired holds no resc */
-	VF_ACQUIRED,		/* VF, acquired, but not initalized */
+	VF_ACQUIRED,		/* VF, acquired, but not initialized */
 	VF_ENABLED,		/* VF, Enabled */
 	VF_RESET,		/* VF, FLR'd, pending cleanup */
 	VF_STOPPED		/* VF, Stopped */
@@ -313,6 +313,31 @@ void *qed_add_tlv(struct qed_hwfn *p_hwfn, u8 **offset, u16 type, u16 length);
  */
 void qed_dp_tlv_list(struct qed_hwfn *p_hwfn, void *tlvs_list);
 
+/**
+ * qed_sriov_vfpf_malicious(): Handle malicious VF/PF.
+ *
+ * @p_hwfn: HW device data.
+ * @p_data: Pointer to data.
+ *
+ * Return: Void.
+ */
+void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
+			      struct fw_err_data *p_data);
+
+/**
+ * qed_sriov_eqe_event(): Callback for SRIOV events.
+ *
+ * @p_hwfn: HW device data.
+ * @opcode: Opcode.
+ * @echo: Echo.
+ * @data: data
+ * @fw_return_code: FW return code.
+ *
+ * Return: Int.
+ */
+int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn, u8 opcode, __le16 echo,
+			union event_ring_data *data, u8  fw_return_code);
+
 /**
  * qed_iov_alloc(): allocate sriov related resources
  *
diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h
index cd1207ad4ada..c84e08bc6802 100644
--- a/include/linux/qed/eth_common.h
+++ b/include/linux/qed/eth_common.h
@@ -67,6 +67,7 @@
 /* Ethernet vport update constants */
 #define ETH_FILTER_RULES_COUNT		10
 #define ETH_RSS_IND_TABLE_ENTRIES_NUM	128
+#define ETH_RSS_IND_TABLE_MASK_SIZE_REGS    (ETH_RSS_IND_TABLE_ENTRIES_NUM / 32)
 #define ETH_RSS_KEY_SIZE_REGS		10
 #define ETH_RSS_ENGINE_NUM_K2		207
 #define ETH_RSS_ENGINE_NUM_BB		127
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index bab078b25834..6dfed163ab6c 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -27,6 +27,7 @@
 #define RDMA_MAX_PDS			(64 * 1024)
 #define RDMA_MAX_XRC_SRQS                       (1024)
 #define RDMA_MAX_SRQS                           (32 * 1024)
+#define RDMA_MAX_IRQ_ELEMS_IN_PAGE      (128)
 
 #define RDMA_NUM_STATISTIC_COUNTERS	MAX_NUM_VPORTS
 #define RDMA_NUM_STATISTIC_COUNTERS_K2	MAX_NUM_VPORTS_K2
-- 
cgit v1.2.3


From 3a6f5d0cbda37f24f60ca778bd1675125b7d594f Mon Sep 17 00:00:00 2001
From: Nikolay Assa <nassa@marvell.com>
Date: Mon, 4 Oct 2021 09:58:49 +0300
Subject: qed: Update TCP silly-window-syndrome timeout for iwarp, scsi

Update TCP silly-window-syndrome timeout, for the cases where
initiator's small TCP window size prevents FW from transmitting
packets on the connection. Timeout causes FW to retransmit
window probes if needed, preventing I/O stall if initiator ignores
first window probe.

Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Nikolay Assa <nassa@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 1 +
 drivers/scsi/qedi/qedi_main.c               | 1 +
 include/linux/qed/qed_if.h                  | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 186d0048a9d1..b2c0e522e51f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -114,6 +114,7 @@ qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 	    RESC_START(p_hwfn, QED_LL2_RAM_QUEUE) +
 	    p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
 
+	p_ramrod->tcp.tx_sws_timer = cpu_to_le16(QED_TX_SWS_TIMER_DFLT);
 	p_ramrod->tcp.max_fin_rt = QED_IWARP_MAX_FIN_RT_DEFAULT;
 
 	return;
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index fe36ddb82aef..8c2f32655424 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -866,6 +866,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
 	qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues;
 	qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug;
 	qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000;
+	qedi->pf_params.iscsi_pf_params.tx_sws_timer = QED_TX_SWS_TIMER_DFLT;
 	qedi->pf_params.iscsi_pf_params.max_fin_rt = 2;
 
 	for (log_page_size = 0 ; log_page_size < 32 ; log_page_size++) {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 4dcd0d37a521..07f8d19421ab 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -24,6 +24,8 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <net/devlink.h>
 
+#define QED_TX_SWS_TIMER_DFLT  500
+
 enum dcbx_protocol_type {
 	DCBX_PROTOCOL_ISCSI,
 	DCBX_PROTOCOL_FCOE,
-- 
cgit v1.2.3


From a64aa0a8b991e3cae61ee8322cf936c00068a5c1 Mon Sep 17 00:00:00 2001
From: Prabhakar Kushwaha <pkushwaha@marvell.com>
Date: Mon, 4 Oct 2021 09:58:50 +0300
Subject: qed: Update the TCP active termination 2 MSL timer ("TIME_WAIT")

Initialize 2 MSL timeout value used for the TCP TIME_WAIT state to
non-zero default.

This patch also removes magic number from qedi/qedi_main.c.

Reviewed-by: Manish Rangankar <mrangankar@marvell.com>
Signed-off-by: Nikolay Assa <nassa@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_iwarp.c | 1 +
 drivers/scsi/qedi/qedi_main.c               | 2 +-
 include/linux/qed/qed_if.h                  | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index b2c0e522e51f..1d1d4caad680 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -115,6 +115,7 @@ qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
 	    p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
 
 	p_ramrod->tcp.tx_sws_timer = cpu_to_le16(QED_TX_SWS_TIMER_DFLT);
+	p_ramrod->tcp.two_msl_timer = cpu_to_le32(QED_TWO_MSL_TIMER_DFLT);
 	p_ramrod->tcp.max_fin_rt = QED_IWARP_MAX_FIN_RT_DEFAULT;
 
 	return;
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 8c2f32655424..1dec814d8788 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -865,7 +865,7 @@ static int qedi_set_iscsi_pf_param(struct qedi_ctx *qedi)
 	qedi->pf_params.iscsi_pf_params.num_uhq_pages_in_ring = num_sq_pages;
 	qedi->pf_params.iscsi_pf_params.num_queues = qedi->num_queues;
 	qedi->pf_params.iscsi_pf_params.debug_mode = qedi_fw_debug;
-	qedi->pf_params.iscsi_pf_params.two_msl_timer = 4000;
+	qedi->pf_params.iscsi_pf_params.two_msl_timer = QED_TWO_MSL_TIMER_DFLT;
 	qedi->pf_params.iscsi_pf_params.tx_sws_timer = QED_TX_SWS_TIMER_DFLT;
 	qedi->pf_params.iscsi_pf_params.max_fin_rt = 2;
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 07f8d19421ab..ad220d5da18f 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -25,6 +25,7 @@
 #include <net/devlink.h>
 
 #define QED_TX_SWS_TIMER_DFLT  500
+#define QED_TWO_MSL_TIMER_DFLT 4000
 
 enum dcbx_protocol_type {
 	DCBX_PROTOCOL_ISCSI,
-- 
cgit v1.2.3


From a2c27a61b4335344c1bacaade61e9b2dc6e12a76 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Mon, 4 Oct 2021 12:03:28 +0100
Subject: net: phylink: add phylink_set_10g_modes() helper

Add a helper for setting 10Gigabit modes, so we have one central
place that sets all appropriate 10G modes for a driver.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 11 +++++++++++
 include/linux/phylink.h   |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 5a58c77d0002..b32774fd65f8 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -132,6 +132,17 @@ void phylink_set_port_modes(unsigned long *mask)
 }
 EXPORT_SYMBOL_GPL(phylink_set_port_modes);
 
+void phylink_set_10g_modes(unsigned long *mask)
+{
+	phylink_set(mask, 10000baseT_Full);
+	phylink_set(mask, 10000baseCR_Full);
+	phylink_set(mask, 10000baseSR_Full);
+	phylink_set(mask, 10000baseLR_Full);
+	phylink_set(mask, 10000baseLRM_Full);
+	phylink_set(mask, 10000baseER_Full);
+}
+EXPORT_SYMBOL_GPL(phylink_set_10g_modes);
+
 static int phylink_is_empty_linkmode(const unsigned long *linkmode)
 {
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(tmp) = { 0, };
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 237291196ce2..f7b5ed06a815 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -484,6 +484,7 @@ int phylink_speed_up(struct phylink *pl);
 #define phylink_test(bm, mode)	__phylink_do_bit(test_bit, bm, mode)
 
 void phylink_set_port_modes(unsigned long *bits);
+void phylink_set_10g_modes(unsigned long *mask);
 void phylink_helper_basex_speed(struct phylink_link_state *state);
 
 void phylink_mii_c22_pcs_get_state(struct mdio_device *pcs,
-- 
cgit v1.2.3


From 571e5c0efcb29c5dac8cf2949d3eed84ec43056c Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 19 May 2021 16:00:22 -0400
Subject: audit: add OPENAT2 record to list "how" info

Since the openat2(2) syscall uses a struct open_how pointer to communicate
its parameters they are not usefully recorded by the audit SYSCALL record's
four existing arguments.

Add a new audit record type OPENAT2 that reports the parameters in its
third argument, struct open_how with fields oflag, mode and resolve.

The new record in the context of an event would look like:
time->Wed Mar 17 16:28:53 2021
type=PROCTITLE msg=audit(1616012933.531:184): proctitle=
  73797363616C6C735F66696C652F6F70656E617432002F746D702F61756469742D
  7465737473756974652D737641440066696C652D6F70656E617432
type=PATH msg=audit(1616012933.531:184): item=1 name="file-openat2"
  inode=29 dev=00:1f mode=0100600 ouid=0 ogid=0 rdev=00:00
  obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE
  cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0
type=PATH msg=audit(1616012933.531:184):
  item=0 name="/root/rgb/git/audit-testsuite/tests"
  inode=25 dev=00:1f mode=040700 ouid=0 ogid=0 rdev=00:00
  obj=unconfined_u:object_r:user_tmp_t:s0 nametype=PARENT
  cap_fp=0 cap_fi=0 cap_fe=0 cap_fver=0 cap_frootid=0
type=CWD msg=audit(1616012933.531:184):
  cwd="/root/rgb/git/audit-testsuite/tests"
type=OPENAT2 msg=audit(1616012933.531:184):
  oflag=0100302 mode=0600 resolve=0xa
type=SYSCALL msg=audit(1616012933.531:184): arch=c000003e syscall=437
  success=yes exit=4 a0=3 a1=7ffe315f1c53 a2=7ffe315f1550 a3=18
  items=2 ppid=528 pid=540 auid=0 uid=0 gid=0 euid=0 suid=0
  fsuid=0 egid=0 sgid=0 fsgid=0 tty=ttyS0 ses=1 comm="openat2"
  exe="/root/rgb/git/audit-testsuite/tests/syscalls_file/openat2"
  subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
  key="testsuite-1616012933-bjAUcEPO"

Link: https://lore.kernel.org/r/d23fbb89186754487850367224b060e26f9b7181.1621363275.git.rgb@redhat.com
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
[PM: tweak subject, wrap example, move AUDIT_OPENAT2 to 1337]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/open.c                  |  2 ++
 include/linux/audit.h      | 10 ++++++++++
 include/uapi/linux/audit.h |  1 +
 kernel/audit.h             |  2 ++
 kernel/auditsc.c           | 18 +++++++++++++++++-
 5 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index daa324606a41..a7f6cab81267 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1248,6 +1248,8 @@ SYSCALL_DEFINE4(openat2, int, dfd, const char __user *, filename,
 	if (err)
 		return err;
 
+	audit_openat2_how(&tmp);
+
 	/* O_LARGEFILE is only allowed for non-O_PATH. */
 	if (!(tmp.flags & O_PATH) && force_o_largefile())
 		tmp.flags |= O_LARGEFILE;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 5fbeeeb6b726..0e0eb4c0fa10 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -399,6 +399,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 				  const struct cred *old);
 extern void __audit_log_capset(const struct cred *new, const struct cred *old);
 extern void __audit_mmap_fd(int fd, int flags);
+extern void __audit_openat2_how(struct open_how *how);
 extern void __audit_log_kern_module(char *name);
 extern void __audit_fanotify(unsigned int response);
 extern void __audit_tk_injoffset(struct timespec64 offset);
@@ -495,6 +496,12 @@ static inline void audit_mmap_fd(int fd, int flags)
 		__audit_mmap_fd(fd, flags);
 }
 
+static inline void audit_openat2_how(struct open_how *how)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_openat2_how(how);
+}
+
 static inline void audit_log_kern_module(char *name)
 {
 	if (!audit_dummy_context())
@@ -646,6 +653,9 @@ static inline void audit_log_capset(const struct cred *new,
 static inline void audit_mmap_fd(int fd, int flags)
 { }
 
+static inline void audit_openat2_how(struct open_how *how)
+{ }
+
 static inline void audit_log_kern_module(char *name)
 {
 }
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index daa481729e9b..afa2472ad5d6 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -118,6 +118,7 @@
 #define AUDIT_TIME_ADJNTPVAL	1333	/* NTP value adjustment */
 #define AUDIT_BPF		1334	/* BPF subsystem */
 #define AUDIT_EVENT_LISTENER	1335	/* Task joined multicast read socket */
+#define AUDIT_OPENAT2		1337	/* Record showing openat2 how args */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/kernel/audit.h b/kernel/audit.h
index d6a2c899a8db..3b64a97f6091 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -14,6 +14,7 @@
 #include <linux/skbuff.h>
 #include <uapi/linux/mqueue.h>
 #include <linux/tty.h>
+#include <uapi/linux/openat2.h> // struct open_how
 
 /* AUDIT_NAMES is the number of slots we reserve in the audit_context
  * for saving names from getname().  If we get more names we will allocate
@@ -188,6 +189,7 @@ struct audit_context {
 			int			fd;
 			int			flags;
 		} mmap;
+		struct open_how openat2;
 		struct {
 			int			argc;
 		} execve;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 8c4335a35274..a4ba53f5354e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -63,7 +63,7 @@
 #include <linux/fsnotify_backend.h>
 #include <uapi/linux/limits.h>
 #include <uapi/linux/netfilter/nf_tables.h>
-#include <uapi/linux/openat2.h>
+#include <uapi/linux/openat2.h> // struct open_how
 
 #include "audit.h"
 
@@ -1306,6 +1306,12 @@ static void show_special(struct audit_context *context, int *call_panic)
 		audit_log_format(ab, "fd=%d flags=0x%x", context->mmap.fd,
 				 context->mmap.flags);
 		break;
+	case AUDIT_OPENAT2:
+		audit_log_format(ab, "oflag=0%llo mode=0%llo resolve=0x%llx",
+				 context->openat2.flags,
+				 context->openat2.mode,
+				 context->openat2.resolve);
+		break;
 	case AUDIT_EXECVE:
 		audit_log_execve_info(context, &ab);
 		break;
@@ -2536,6 +2542,16 @@ void __audit_mmap_fd(int fd, int flags)
 	context->type = AUDIT_MMAP;
 }
 
+void __audit_openat2_how(struct open_how *how)
+{
+	struct audit_context *context = audit_context();
+
+	context->openat2.flags = how->flags;
+	context->openat2.mode = how->mode;
+	context->openat2.resolve = how->resolve;
+	context->type = AUDIT_OPENAT2;
+}
+
 void __audit_log_kern_module(char *name)
 {
 	struct audit_context *context = audit_context();
-- 
cgit v1.2.3


From 5249001d69a223811ad654884c6115d55158fd51 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Wed, 1 Sep 2021 19:05:02 +0300
Subject: net/mlx5: Bridge, mark reg_c1 when pushing VLAN

On ingress VLAN push also assign value 0x7FE to reg_c1 tunnel id+opts
bits (tunnel id 0, which is not a valid tunnel id, and option 0x7FE which
was reserved by one of previous patches in the series). In following patch
the reg value is matched on egress miss to restore the packet to its
original state by removing the VLAN before passing it to the software data
path.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/bridge.c   | 40 +++++++++++++++++++++-
 .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h  |  1 +
 include/linux/mlx5/eswitch.h                       |  9 +++++
 3 files changed, 49 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
index 8361dfc0bf1a..439b67b4380f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
@@ -465,8 +465,10 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char
 		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num));
 
 	if (vlan && vlan->pkt_reformat_push) {
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+			MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 		flow_act.pkt_reformat = vlan->pkt_reformat_push;
+		flow_act.modify_hdr = vlan->pkt_mod_hdr_push_mark;
 	} else if (vlan) {
 		MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria,
 				 outer_headers.cvlan_tag);
@@ -845,6 +847,33 @@ mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_
 	vlan->pkt_reformat_pop = NULL;
 }
 
+static int
+mlx5_esw_bridge_vlan_push_mark_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_modify_hdr *pkt_mod_hdr;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_C_1);
+	MLX5_SET(set_action_in, action, offset, 8);
+	MLX5_SET(set_action_in, action, length, ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS);
+	MLX5_SET(set_action_in, action, data, ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN);
+
+	pkt_mod_hdr = mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_FDB, 1, action);
+	if (IS_ERR(pkt_mod_hdr))
+		return PTR_ERR(pkt_mod_hdr);
+
+	vlan->pkt_mod_hdr_push_mark = pkt_mod_hdr;
+	return 0;
+}
+
+static void
+mlx5_esw_bridge_vlan_push_mark_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw)
+{
+	mlx5_modify_header_dealloc(esw->dev, vlan->pkt_mod_hdr_push_mark);
+	vlan->pkt_mod_hdr_push_mark = NULL;
+}
+
 static struct mlx5_esw_bridge_vlan *
 mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port,
 			    struct mlx5_eswitch *esw)
@@ -864,6 +893,10 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por
 		err = mlx5_esw_bridge_vlan_push_create(vlan, esw);
 		if (err)
 			goto err_vlan_push;
+
+		err = mlx5_esw_bridge_vlan_push_mark_create(vlan, esw);
+		if (err)
+			goto err_vlan_push_mark;
 	}
 	if (flags & BRIDGE_VLAN_INFO_UNTAGGED) {
 		err = mlx5_esw_bridge_vlan_pop_create(vlan, esw);
@@ -882,6 +915,9 @@ err_xa_insert:
 	if (vlan->pkt_reformat_pop)
 		mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
 err_vlan_pop:
+	if (vlan->pkt_mod_hdr_push_mark)
+		mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
+err_vlan_push_mark:
 	if (vlan->pkt_reformat_push)
 		mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
 err_vlan_push:
@@ -908,6 +944,8 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan,
 
 	if (vlan->pkt_reformat_pop)
 		mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw);
+	if (vlan->pkt_mod_hdr_push_mark)
+		mlx5_esw_bridge_vlan_push_mark_cleanup(vlan, esw);
 	if (vlan->pkt_reformat_push)
 		mlx5_esw_bridge_vlan_push_cleanup(vlan, esw);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
index 52964a82d6a6..878311fe950a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
@@ -49,6 +49,7 @@ struct mlx5_esw_bridge_vlan {
 	struct list_head fdb_list;
 	struct mlx5_pkt_reformat *pkt_reformat_push;
 	struct mlx5_pkt_reformat *pkt_reformat_pop;
+	struct mlx5_modify_hdr *pkt_mod_hdr_push_mark;
 };
 
 struct mlx5_esw_bridge_port {
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 4ab5c1fc1270..97afcea39a7b 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -130,11 +130,20 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 #define ESW_TUN_OPTS_MASK GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, ESW_TUN_OPTS_OFFSET)
 #define ESW_TUN_MASK GENMASK(31 - ESW_RESERVED_BITS, ESW_TUN_OFFSET)
 #define ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT 0 /* 0 is not a valid tunnel id */
+#define ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT
 /* 0x7FF is a reserved mapping */
 #define ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT GENMASK(ESW_TUN_OPTS_BITS - 1, 0)
 #define ESW_TUN_SLOW_TABLE_GOTO_VPORT ((ESW_TUN_ID_SLOW_TABLE_GOTO_VPORT << ESW_TUN_OPTS_BITS) | \
 				       ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT)
 #define ESW_TUN_SLOW_TABLE_GOTO_VPORT_MARK ESW_TUN_OPTS_MASK
+/* 0x7FE is a reserved mapping for bridge ingress push vlan mark */
+#define ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN (ESW_TUN_OPTS_SLOW_TABLE_GOTO_VPORT - 1)
+#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN ((ESW_TUN_ID_BRIDGE_INGRESS_PUSH_VLAN << \
+					   ESW_TUN_OPTS_BITS) | \
+					  ESW_TUN_OPTS_BRIDGE_INGRESS_PUSH_VLAN)
+#define ESW_TUN_BRIDGE_INGRESS_PUSH_VLAN_MARK \
+	GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
+		ESW_TUN_OPTS_OFFSET + 1)
 
 u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 3663ad34bc707fc85492f4d83a313f5df84718d4 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 19 Aug 2021 16:18:57 +0300
Subject: net/mlx5: Shift control IRQ to the last index

Control IRQ is the first IRQ vector. This complicates handling of
completion irqs as we need to offset them by one.
in the next patch, there are scenarios where completion and control EQs
will share the same irq. for example: functions with single IRQ. To ease
such scenarios, we shift control IRQ to the end of the irq array.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/infiniband/hw/mlx5/odp.c                   |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  9 +++++----
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h |  2 --
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c  | 10 +++++-----
 include/linux/mlx5/driver.h                        |  2 ++
 5 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d0d98e584ebc..81147d774dd2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1559,6 +1559,7 @@ int mlx5r_odp_create_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
 
 	eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int;
 	param = (struct mlx5_eq_param) {
+		.irq_index = MLX5_IRQ_EQ_CTRL,
 		.nent = MLX5_IB_NUM_PF_EQE,
 	};
 	param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 605c8ecc3610..792e0d6aa861 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -632,6 +632,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	mlx5_eq_notifier_register(dev, &table->cq_err_nb);
 
 	param = (struct mlx5_eq_param) {
+		.irq_index = MLX5_IRQ_EQ_CTRL,
 		.nent = MLX5_NUM_CMD_EQE,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
 	};
@@ -644,6 +645,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 
 	param = (struct mlx5_eq_param) {
+		.irq_index = MLX5_IRQ_EQ_CTRL,
 		.nent = MLX5_NUM_ASYNC_EQE,
 	};
 
@@ -653,6 +655,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 		goto err2;
 
 	param = (struct mlx5_eq_param) {
+		.irq_index = MLX5_IRQ_EQ_CTRL,
 		.nent = /* TODO: sriov max_vf + */ 1,
 		.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
 	};
@@ -806,8 +809,8 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 	ncomp_eqs = table->num_comp_eqs;
 	nent = MLX5_COMP_EQ_SIZE;
 	for (i = 0; i < ncomp_eqs; i++) {
-		int vecidx = i + MLX5_IRQ_VEC_COMP_BASE;
 		struct mlx5_eq_param param = {};
+		int vecidx = i;
 
 		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
 		if (!eq) {
@@ -953,9 +956,7 @@ static int set_rmap(struct mlx5_core_dev *mdev)
 		goto err_out;
 	}
 
-	vecidx = MLX5_IRQ_VEC_COMP_BASE;
-	for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE;
-	     vecidx++) {
+	for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
 		err = irq_cpu_rmap_add(eq_table->rmap,
 				       pci_irq_vector(mdev->pdev, vecidx));
 		if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index abd024173c42..8116815663a7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -8,8 +8,6 @@
 
 #define MLX5_COMP_EQS_PER_SF 8
 
-#define MLX5_IRQ_EQ_CTRL (0)
-
 struct mlx5_irq;
 
 int mlx5_irq_table_init(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 763c83a02380..a66144b54fc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -194,15 +194,14 @@ static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 	snprintf(name, MLX5_MAX_IRQ_NAME, "%s%d", pool->name, vecidx);
 }
 
-static void irq_set_name(char *name, int vecidx)
+static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 {
-	if (vecidx == 0) {
+	if (vecidx == pool->xa_num_irqs.max) {
 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
 		return;
 	}
 
-	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d",
-		 vecidx - MLX5_IRQ_VEC_COMP_BASE);
+	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 }
 
 static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
@@ -217,7 +216,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
 		return ERR_PTR(-ENOMEM);
 	irq->irqn = pci_irq_vector(dev->pdev, i);
 	if (!pool->name[0])
-		irq_set_name(name, i);
+		irq_set_name(pool, name, i);
 	else
 		irq_sf_set_name(pool, name, i);
 	ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
@@ -440,6 +439,7 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
 	}
 pf_irq:
 	pool = irq_table->pf_pool;
+	vecidx = (vecidx == MLX5_IRQ_EQ_CTRL) ? pool->xa_num_irqs.max : vecidx;
 	irq = irq_pool_request_vector(pool, vecidx, affinity);
 out:
 	if (IS_ERR(irq))
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..0ca719c00824 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -59,6 +59,8 @@
 
 #define MLX5_ADEV_NAME "mlx5_core"
 
+#define MLX5_IRQ_EQ_CTRL (U8_MAX)
+
 enum {
 	MLX5_BOARD_ID_LEN = 64,
 };
-- 
cgit v1.2.3


From f891b7cdbdcda116fd26bbd706f91bd58567aa17 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Sun, 1 Aug 2021 12:08:49 +0300
Subject: net/mlx5: Enable single IRQ for PCI Function

Prior to this patch the driver requires two IRQs to function properly,
one required IRQ for control and at least one required IRQ for IO.

This requirement can be relaxed to one as the driver now allows
sharing of IRQs, so control and IO EQs can share the same irq.

This is needed for high scale amount of VFs.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 26 +++++++++++++++++------
 include/linux/mlx5/eq.h                           |  1 -
 2 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index a66144b54fc8..830444f927d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -196,6 +196,12 @@ static void irq_sf_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 
 static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 {
+	if (!pool->xa_num_irqs.max) {
+		/* in case we only have a single irq for the device */
+		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_combined%d", vecidx);
+		return;
+	}
+
 	if (vecidx == pool->xa_num_irqs.max) {
 		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
 		return;
@@ -204,6 +210,11 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
 	snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 }
 
+static bool irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
+{
+	return !strncmp("mlx5_sf", pool->name, strlen("mlx5_sf"));
+}
+
 static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
 {
 	struct mlx5_core_dev *dev = pool->dev;
@@ -215,7 +226,7 @@ static struct mlx5_irq *irq_request(struct mlx5_irq_pool *pool, int i)
 	if (!irq)
 		return ERR_PTR(-ENOMEM);
 	irq->irqn = pci_irq_vector(dev->pdev, i);
-	if (!pool->name[0])
+	if (!irq_pool_is_sf_pool(pool))
 		irq_set_name(pool, name, i);
 	else
 		irq_sf_set_name(pool, name, i);
@@ -385,6 +396,9 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
 	if (IS_ERR(irq) || !affinity)
 		goto unlock;
 	cpumask_copy(irq->mask, affinity);
+	if (!irq_pool_is_sf_pool(pool) && !pool->xa_num_irqs.max &&
+	    cpumask_empty(irq->mask))
+		cpumask_set_cpu(0, irq->mask);
 	irq_set_affinity_hint(irq->irqn, irq->mask);
 unlock:
 	mutex_unlock(&pool->lock);
@@ -577,6 +591,8 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
 
 int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
 {
+	if (!table->pf_pool->xa_num_irqs.max)
+		return 1;
 	return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
 }
 
@@ -592,19 +608,15 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
 	if (mlx5_core_is_sf(dev))
 		return 0;
 
-	pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() +
-		 MLX5_IRQ_VEC_COMP_BASE;
+	pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
 	pf_vec = min_t(int, pf_vec, num_eqs);
-	if (pf_vec <= MLX5_IRQ_VEC_COMP_BASE)
-		return -ENOMEM;
 
 	total_vec = pf_vec;
 	if (mlx5_sf_max_functions(dev))
 		total_vec += MLX5_IRQ_CTRL_SF_MAX +
 			MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
 
-	total_vec = pci_alloc_irq_vectors(dev->pdev, MLX5_IRQ_VEC_COMP_BASE + 1,
-					  total_vec, PCI_IRQ_MSIX);
+	total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
 	if (total_vec < 0)
 		return total_vec;
 	pf_vec = min(pf_vec, total_vec);
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index cea6ecb4b73e..ea3ff5a8ced3 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -4,7 +4,6 @@
 #ifndef MLX5_CORE_EQ_H
 #define MLX5_CORE_EQ_H
 
-#define MLX5_IRQ_VEC_COMP_BASE 1
 #define MLX5_NUM_CMD_EQE   (32)
 #define MLX5_NUM_ASYNC_EQE (0x1000)
 #define MLX5_NUM_SPARE_EQE (0x80)
-- 
cgit v1.2.3


From e68ce0faf29c7c268666e11e95bf27dca97d28b0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 2 Sep 2021 14:47:48 +0200
Subject: mfd: hi6421-spmi-pmic: Cleanup drvdata to only include regmap

There are lots of fields in struct hi6421_spmi_pmic that aren't
used. As a matter of fact, only regmap is needed.

So, drop the struct as a whole, and set regmap as the drvdata.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/1828cb783b1ebca0b98bf0b3077d8701adb228f7.1630586862.git.mchehab+huawei@kernel.org
---
 drivers/mfd/hi6421-spmi-pmic.c           | 16 +++++-----------
 drivers/misc/hi6421v600-irq.c            |  9 ++++-----
 drivers/regulator/hi6421v600-regulator.c | 10 +++++-----
 include/linux/mfd/hi6421-spmi-pmic.h     | 25 -------------------------
 4 files changed, 14 insertions(+), 46 deletions(-)
 delete mode 100644 include/linux/mfd/hi6421-spmi-pmic.h

(limited to 'include/linux')

diff --git a/drivers/mfd/hi6421-spmi-pmic.c b/drivers/mfd/hi6421-spmi-pmic.c
index 4f136826681b..c9c0c3d7011f 100644
--- a/drivers/mfd/hi6421-spmi-pmic.c
+++ b/drivers/mfd/hi6421-spmi-pmic.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/mfd/core.h>
-#include <linux/mfd/hi6421-spmi-pmic.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -30,19 +29,14 @@ static const struct regmap_config regmap_config = {
 static int hi6421_spmi_pmic_probe(struct spmi_device *sdev)
 {
 	struct device *dev = &sdev->dev;
+	struct regmap *regmap;
 	int ret;
-	struct hi6421_spmi_pmic *ddata;
-	ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
-	if (!ddata)
-		return -ENOMEM;
 
-	ddata->regmap = devm_regmap_init_spmi_ext(sdev, &regmap_config);
-	if (IS_ERR(ddata->regmap))
-		return PTR_ERR(ddata->regmap);
+	regmap = devm_regmap_init_spmi_ext(sdev, &regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
 
-	ddata->dev = dev;
-
-	dev_set_drvdata(&sdev->dev, ddata);
+	dev_set_drvdata(&sdev->dev, regmap);
 
 	ret = devm_mfd_add_devices(&sdev->dev, PLATFORM_DEVID_NONE,
 				   hi6421v600_devs, ARRAY_SIZE(hi6421v600_devs),
diff --git a/drivers/misc/hi6421v600-irq.c b/drivers/misc/hi6421v600-irq.c
index 08535e97ff43..1c763796cf1f 100644
--- a/drivers/misc/hi6421v600-irq.c
+++ b/drivers/misc/hi6421v600-irq.c
@@ -10,7 +10,6 @@
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/mfd/hi6421-spmi-pmic.h>
 #include <linux/module.h>
 #include <linux/of_gpio.h>
 #include <linux/platform_device.h>
@@ -220,7 +219,7 @@ static int hi6421v600_irq_probe(struct platform_device *pdev)
 	struct platform_device *pmic_pdev;
 	struct device *dev = &pdev->dev;
 	struct hi6421v600_irq *priv;
-	struct hi6421_spmi_pmic *pmic;
+	struct regmap *regmap;
 	unsigned int virq;
 	int i, ret;
 
@@ -229,8 +228,8 @@ static int hi6421v600_irq_probe(struct platform_device *pdev)
 	 * which should first set drvdata. If this doesn't happen, hit
 	 * a warn on and return.
 	 */
-	pmic = dev_get_drvdata(pmic_dev);
-	if (WARN_ON(!pmic))
+	regmap = dev_get_drvdata(pmic_dev);
+	if (WARN_ON(!regmap))
 		return -ENODEV;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -238,7 +237,7 @@ static int hi6421v600_irq_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv->dev = dev;
-	priv->regmap = pmic->regmap;
+	priv->regmap = regmap;
 
 	spin_lock_init(&priv->lock);
 
diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c
index 662d87ae61cb..4671678f6b19 100644
--- a/drivers/regulator/hi6421v600-regulator.c
+++ b/drivers/regulator/hi6421v600-regulator.c
@@ -9,8 +9,8 @@
 // Guodong Xu <guodong.xu@linaro.org>
 
 #include <linux/delay.h>
-#include <linux/mfd/hi6421-spmi-pmic.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/regulator/driver.h>
@@ -237,7 +237,7 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 	struct hi6421_spmi_reg_priv *priv;
 	struct hi6421_spmi_reg_info *info;
 	struct device *dev = &pdev->dev;
-	struct hi6421_spmi_pmic *pmic;
+	struct regmap *regmap;
 	struct regulator_dev *rdev;
 	int i;
 
@@ -246,8 +246,8 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 	 * which should first set drvdata. If this doesn't happen, hit
 	 * a warn on and return.
 	 */
-	pmic = dev_get_drvdata(pmic_dev);
-	if (WARN_ON(!pmic))
+	regmap = dev_get_drvdata(pmic_dev);
+	if (WARN_ON(!regmap))
 		return -ENODEV;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -261,7 +261,7 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev)
 
 		config.dev = pdev->dev.parent;
 		config.driver_data = priv;
-		config.regmap = pmic->regmap;
+		config.regmap = regmap;
 
 		rdev = devm_regulator_register(dev, &info->desc, &config);
 		if (IS_ERR(rdev)) {
diff --git a/include/linux/mfd/hi6421-spmi-pmic.h b/include/linux/mfd/hi6421-spmi-pmic.h
deleted file mode 100644
index e5b8dbf828b6..000000000000
--- a/include/linux/mfd/hi6421-spmi-pmic.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Header file for device driver Hi6421 PMIC
- *
- * Copyright (c) 2013 Linaro Ltd.
- * Copyright (C) 2011 Hisilicon.
- * Copyright (c) 2020-2021 Huawei Technologies Co., Ltd
- *
- * Guodong Xu <guodong.xu@linaro.org>
- */
-
-#ifndef	__HISI_PMIC_H
-#define	__HISI_PMIC_H
-
-#include <linux/irqdomain.h>
-#include <linux/regmap.h>
-
-struct hi6421_spmi_pmic {
-	struct resource				*res;
-	struct device				*dev;
-	void __iomem				*regs;
-	struct regmap				*regmap;
-};
-
-#endif		/* __HISI_PMIC_H */
-- 
cgit v1.2.3


From c1baf6c591e6901d3422d7a0d0d32ccf29883edf Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Sun, 12 Sep 2021 21:17:15 +0300
Subject: usb: phy: tegra: Support OTG mode programming

Support programming USB PHY into OTG mode.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Link: https://lore.kernel.org/r/20210912181718.1328-5-digetx@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-tegra-usb.c   | 198 +++++++++++++++++++++++++++++++++++++-
 include/linux/usb/tegra_usb_phy.h |   5 +
 2 files changed, 198 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-tegra-usb.c b/drivers/usb/phy/phy-tegra-usb.c
index c0f432d509aa..68cd4b68e3a2 100644
--- a/drivers/usb/phy/phy-tegra-usb.c
+++ b/drivers/usb/phy/phy-tegra-usb.c
@@ -63,6 +63,10 @@
 #define   A_VBUS_VLD_WAKEUP_EN			BIT(30)
 
 #define USB_PHY_VBUS_WAKEUP_ID			0x408
+#define   ID_INT_EN				BIT(0)
+#define   ID_CHG_DET				BIT(1)
+#define   VBUS_WAKEUP_INT_EN			BIT(8)
+#define   VBUS_WAKEUP_CHG_DET			BIT(9)
 #define   VBUS_WAKEUP_STS			BIT(10)
 #define   VBUS_WAKEUP_WAKEUP_EN			BIT(30)
 
@@ -158,6 +162,10 @@
 #define   USB_USBMODE_HOST			(3 << 0)
 #define   USB_USBMODE_DEVICE			(2 << 0)
 
+#define PMC_USB_AO				0xf0
+#define   VBUS_WAKEUP_PD_P0			BIT(2)
+#define   ID_PD_P0				BIT(3)
+
 static DEFINE_SPINLOCK(utmip_pad_lock);
 static unsigned int utmip_pad_count;
 
@@ -533,13 +541,14 @@ static int utmi_phy_power_on(struct tegra_usb_phy *phy)
 	val &= ~USB_WAKE_ON_RESUME_EN;
 	writel_relaxed(val, base + USB_SUSP_CTRL);
 
-	if (phy->mode == USB_DR_MODE_PERIPHERAL) {
+	if (phy->mode != USB_DR_MODE_HOST) {
 		val = readl_relaxed(base + USB_SUSP_CTRL);
 		val &= ~(USB_WAKE_ON_CNNT_EN_DEV | USB_WAKE_ON_DISCON_EN_DEV);
 		writel_relaxed(val, base + USB_SUSP_CTRL);
 
 		val = readl_relaxed(base + USB_PHY_VBUS_WAKEUP_ID);
 		val &= ~VBUS_WAKEUP_WAKEUP_EN;
+		val &= ~(ID_CHG_DET | VBUS_WAKEUP_CHG_DET);
 		writel_relaxed(val, base + USB_PHY_VBUS_WAKEUP_ID);
 
 		val = readl_relaxed(base + USB_PHY_VBUS_SENSORS);
@@ -687,9 +696,10 @@ static int utmi_phy_power_off(struct tegra_usb_phy *phy)
 		 * Ask VBUS sensor to generate wake event once cable is
 		 * connected.
 		 */
-		if (phy->mode == USB_DR_MODE_PERIPHERAL) {
+		if (phy->mode != USB_DR_MODE_HOST) {
 			val = readl_relaxed(base + USB_PHY_VBUS_WAKEUP_ID);
 			val |= VBUS_WAKEUP_WAKEUP_EN;
+			val &= ~(ID_CHG_DET | VBUS_WAKEUP_CHG_DET);
 			writel_relaxed(val, base + USB_PHY_VBUS_WAKEUP_ID);
 
 			val = readl_relaxed(base + USB_PHY_VBUS_SENSORS);
@@ -893,6 +903,7 @@ static void tegra_usb_phy_shutdown(struct usb_phy *u_phy)
 	if (WARN_ON(!phy->freq))
 		return;
 
+	usb_phy_set_wakeup(u_phy, false);
 	tegra_usb_phy_power_off(phy);
 
 	if (!phy->is_ulpi_phy)
@@ -904,26 +915,146 @@ static void tegra_usb_phy_shutdown(struct usb_phy *u_phy)
 	phy->freq = NULL;
 }
 
+static irqreturn_t tegra_usb_phy_isr(int irq, void *data)
+{
+	u32 val, int_mask = ID_CHG_DET | VBUS_WAKEUP_CHG_DET;
+	struct tegra_usb_phy *phy = data;
+	void __iomem *base = phy->regs;
+
+	/*
+	 * The PHY interrupt also wakes the USB controller driver since
+	 * interrupt is shared. We don't do anything in the PHY driver,
+	 * so just clear the interrupt.
+	 */
+	val = readl_relaxed(base + USB_PHY_VBUS_WAKEUP_ID);
+	writel_relaxed(val, base + USB_PHY_VBUS_WAKEUP_ID);
+
+	return val & int_mask ? IRQ_HANDLED : IRQ_NONE;
+}
+
 static int tegra_usb_phy_set_wakeup(struct usb_phy *u_phy, bool enable)
 {
 	struct tegra_usb_phy *phy = to_tegra_usb_phy(u_phy);
+	void __iomem *base = phy->regs;
+	int ret = 0;
+	u32 val;
+
+	if (phy->wakeup_enabled && phy->mode != USB_DR_MODE_HOST &&
+	    phy->irq > 0) {
+		disable_irq(phy->irq);
+
+		val = readl_relaxed(base + USB_PHY_VBUS_WAKEUP_ID);
+		val &= ~(ID_INT_EN | VBUS_WAKEUP_INT_EN);
+		writel_relaxed(val, base + USB_PHY_VBUS_WAKEUP_ID);
+
+		enable_irq(phy->irq);
+
+		free_irq(phy->irq, phy);
+
+		phy->wakeup_enabled = false;
+	}
+
+	if (enable && phy->mode != USB_DR_MODE_HOST && phy->irq > 0) {
+		ret = request_irq(phy->irq, tegra_usb_phy_isr, IRQF_SHARED,
+				  dev_name(phy->u_phy.dev), phy);
+		if (!ret) {
+			disable_irq(phy->irq);
+
+			/*
+			 * USB clock will be resumed once wake event will be
+			 * generated.  The ID-change event requires to have
+			 * interrupts enabled, otherwise it won't be generated.
+			 */
+			val = readl_relaxed(base + USB_PHY_VBUS_WAKEUP_ID);
+			val |= ID_INT_EN | VBUS_WAKEUP_INT_EN;
+			writel_relaxed(val, base + USB_PHY_VBUS_WAKEUP_ID);
+
+			enable_irq(phy->irq);
+		} else {
+			dev_err(phy->u_phy.dev,
+				"Failed to request interrupt: %d", ret);
+			enable = false;
+		}
+	}
 
 	phy->wakeup_enabled = enable;
 
-	return 0;
+	return ret;
 }
 
 static int tegra_usb_phy_set_suspend(struct usb_phy *u_phy, int suspend)
 {
 	struct tegra_usb_phy *phy = to_tegra_usb_phy(u_phy);
+	int ret;
 
 	if (WARN_ON(!phy->freq))
 		return -EINVAL;
 
+	/*
+	 * PHY is sharing IRQ with the CI driver, hence here we either
+	 * disable interrupt for both PHY and CI or for CI only.  The
+	 * interrupt needs to be disabled while hardware is reprogrammed
+	 * because interrupt touches the programmed registers, and thus,
+	 * there could be a race condition.
+	 */
+	if (phy->irq > 0)
+		disable_irq(phy->irq);
+
 	if (suspend)
-		return tegra_usb_phy_power_off(phy);
+		ret = tegra_usb_phy_power_off(phy);
 	else
-		return tegra_usb_phy_power_on(phy);
+		ret = tegra_usb_phy_power_on(phy);
+
+	if (phy->irq > 0)
+		enable_irq(phy->irq);
+
+	return ret;
+}
+
+static int tegra_usb_phy_configure_pmc(struct tegra_usb_phy *phy)
+{
+	int err, val = 0;
+
+	/* older device-trees don't have PMC regmap */
+	if (!phy->pmc_regmap)
+		return 0;
+
+	/*
+	 * Tegra20 has a different layout of PMC USB register bits and AO is
+	 * enabled by default after system reset on Tegra20, so assume nothing
+	 * to do on Tegra20.
+	 */
+	if (!phy->soc_config->requires_pmc_ao_power_up)
+		return 0;
+
+	/* enable VBUS wake-up detector */
+	if (phy->mode != USB_DR_MODE_HOST)
+		val |= VBUS_WAKEUP_PD_P0 << phy->instance * 4;
+
+	/* enable ID-pin ACC detector for OTG mode switching */
+	if (phy->mode == USB_DR_MODE_OTG)
+		val |= ID_PD_P0 << phy->instance * 4;
+
+	/* disable detectors to reset them */
+	err = regmap_set_bits(phy->pmc_regmap, PMC_USB_AO, val);
+	if (err) {
+		dev_err(phy->u_phy.dev, "Failed to disable PMC AO: %d\n", err);
+		return err;
+	}
+
+	usleep_range(10, 100);
+
+	/* enable detectors */
+	err = regmap_clear_bits(phy->pmc_regmap, PMC_USB_AO, val);
+	if (err) {
+		dev_err(phy->u_phy.dev, "Failed to enable PMC AO: %d\n", err);
+		return err;
+	}
+
+	/* detectors starts to work after 10ms */
+	usleep_range(10000, 15000);
+
+	return 0;
 }
 
 static int tegra_usb_phy_init(struct usb_phy *u_phy)
@@ -967,6 +1098,10 @@ static int tegra_usb_phy_init(struct usb_phy *u_phy)
 			goto disable_vbus;
 	}
 
+	err = tegra_usb_phy_configure_pmc(phy);
+	if (err)
+		goto close_phy;
+
 	err = tegra_usb_phy_power_on(phy);
 	if (err)
 		goto close_phy;
@@ -1135,11 +1270,56 @@ static int utmi_phy_probe(struct tegra_usb_phy *tegra_phy,
 	return 0;
 }
 
+static void tegra_usb_phy_put_pmc_device(void *dev)
+{
+	put_device(dev);
+}
+
+static int tegra_usb_phy_parse_pmc(struct device *dev,
+				   struct tegra_usb_phy *phy)
+{
+	struct platform_device *pmc_pdev;
+	struct of_phandle_args args;
+	int err;
+
+	err = of_parse_phandle_with_fixed_args(dev->of_node, "nvidia,pmc",
+					       1, 0, &args);
+	if (err) {
+		if (err != -ENOENT)
+			return err;
+
+		dev_warn_once(dev, "nvidia,pmc is missing, please update your device-tree\n");
+		return 0;
+	}
+
+	pmc_pdev = of_find_device_by_node(args.np);
+	of_node_put(args.np);
+	if (!pmc_pdev)
+		return -ENODEV;
+
+	err = devm_add_action_or_reset(dev, tegra_usb_phy_put_pmc_device,
+				       &pmc_pdev->dev);
+	if (err)
+		return err;
+
+	if (!platform_get_drvdata(pmc_pdev))
+		return -EPROBE_DEFER;
+
+	phy->pmc_regmap = dev_get_regmap(&pmc_pdev->dev, "usb_sleepwalk");
+	if (!phy->pmc_regmap)
+		return -EINVAL;
+
+	phy->instance = args.args[0];
+
+	return 0;
+}
+
 static const struct tegra_phy_soc_config tegra20_soc_config = {
 	.utmi_pll_config_in_car_module = false,
 	.has_hostpc = false,
 	.requires_usbmode_setup = false,
 	.requires_extra_tuning_parameters = false,
+	.requires_pmc_ao_power_up = false,
 };
 
 static const struct tegra_phy_soc_config tegra30_soc_config = {
@@ -1147,6 +1327,7 @@ static const struct tegra_phy_soc_config tegra30_soc_config = {
 	.has_hostpc = true,
 	.requires_usbmode_setup = true,
 	.requires_extra_tuning_parameters = true,
+	.requires_pmc_ao_power_up = true,
 };
 
 static const struct of_device_id tegra_usb_phy_id_table[] = {
@@ -1172,6 +1353,7 @@ static int tegra_usb_phy_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	tegra_phy->soc_config = of_device_get_match_data(&pdev->dev);
+	tegra_phy->irq = platform_get_irq_optional(pdev, 0);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res) {
@@ -1215,6 +1397,12 @@ static int tegra_usb_phy_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	err = tegra_usb_phy_parse_pmc(&pdev->dev, tegra_phy);
+	if (err) {
+		dev_err_probe(&pdev->dev, err, "Failed to get PMC regmap\n");
+		return err;
+	}
+
 	phy_type = of_usb_get_phy_mode(np);
 	switch (phy_type) {
 	case USBPHY_INTERFACE_MODE_UTMI:
diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h
index fd1c9f6a4e37..d3e65eb9e16f 100644
--- a/include/linux/usb/tegra_usb_phy.h
+++ b/include/linux/usb/tegra_usb_phy.h
@@ -18,6 +18,7 @@
 
 #include <linux/clk.h>
 #include <linux/gpio.h>
+#include <linux/regmap.h>
 #include <linux/reset.h>
 #include <linux/usb/otg.h>
 
@@ -30,6 +31,7 @@
  *      enter host mode
  * requires_extra_tuning_parameters: true if xcvr_hsslew, hssquelch_level
  *      and hsdiscon_level should be set for adequate signal quality
+ * requires_pmc_ao_power_up: true if USB AO is powered down by default
  */
 
 struct tegra_phy_soc_config {
@@ -37,6 +39,7 @@ struct tegra_phy_soc_config {
 	bool has_hostpc;
 	bool requires_usbmode_setup;
 	bool requires_extra_tuning_parameters;
+	bool requires_pmc_ao_power_up;
 };
 
 struct tegra_utmip_config {
@@ -62,6 +65,7 @@ enum tegra_usb_phy_port_speed {
 struct tegra_xtal_freq;
 
 struct tegra_usb_phy {
+	int irq;
 	int instance;
 	const struct tegra_xtal_freq *freq;
 	void __iomem *regs;
@@ -70,6 +74,7 @@ struct tegra_usb_phy {
 	struct clk *pll_u;
 	struct clk *pad_clk;
 	struct regulator *vbus;
+	struct regmap *pmc_regmap;
 	enum usb_dr_mode mode;
 	void *config;
 	const struct tegra_phy_soc_config *soc_config;
-- 
cgit v1.2.3


From 3f6cffb8604b537e3d7ea040d7f4368689638eaf Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 4 Oct 2021 16:01:40 -0700
Subject: etherdevice: use __dev_addr_set()

Andrew points out that eth_hw_addr_set() replaces memcpy()
calls so we can't use ether_addr_copy() which assumes
both arguments are 2-bytes aligned.

Reported-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 928c411bd509..c58d50451485 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -308,7 +308,7 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src)
  */
 static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr)
 {
-	ether_addr_copy(dev->dev_addr, addr);
+	__dev_addr_set(dev, addr, ETH_ALEN);
 }
 
 /**
-- 
cgit v1.2.3


From 1e4071f6282b3323435b02b1719bcfbfe1b57150 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Fri, 24 Sep 2021 07:12:42 -0500
Subject: ipmi: Export ipmb_checksum()

It will be needed by the upcoming ipmb direct addressing.

Signed-off-by: Corey Minyard <minyard@acm.org>
Tested-by: Andrew Manley <andrew.manley@sealingtech.com>
Reviewed-by: Andrew Manley <andrew.manley@sealingtech.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 3 ++-
 include/linux/ipmi.h                | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 13988f88f1b0..ad1a8fc379b9 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1710,7 +1710,7 @@ int ipmi_unregister_for_cmd(struct ipmi_user *user,
 }
 EXPORT_SYMBOL(ipmi_unregister_for_cmd);
 
-static unsigned char
+unsigned char
 ipmb_checksum(unsigned char *data, int size)
 {
 	unsigned char csum = 0;
@@ -1720,6 +1720,7 @@ ipmb_checksum(unsigned char *data, int size)
 
 	return -csum;
 }
+EXPORT_SYMBOL(ipmb_checksum);
 
 static inline void format_ipmb_msg(struct ipmi_smi_msg   *smi_msg,
 				   struct kernel_ipmi_msg *msg,
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 52850a02a3d0..163831a087ef 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -335,4 +335,7 @@ extern int ipmi_get_smi_info(int if_num, struct ipmi_smi_info *data);
 
 #define GET_DEVICE_ID_MAX_RETRY		5
 
+/* Helper function for computing the IPMB checksum of some data. */
+unsigned char ipmb_checksum(unsigned char *data, int size);
+
 #endif /* __LINUX_IPMI_H */
-- 
cgit v1.2.3


From 059747c245f0e9af5e109eece7d3414dbe08d513 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Fri, 24 Sep 2021 11:42:56 -0500
Subject: ipmi: Add support for IPMB direct messages

An application has come up that has a device sitting right on the IPMB
that would like to communicate with the BMC on the IPMB using normal
IPMI commands.

Sending these commands and handling the responses is easy enough, no
modifications are needed to the IPMI infrastructure.  But if this is an
application that also needs to receive IPMB commands and respond, some
way is needed to handle these incoming commands and send the responses.

Currently, the IPMI message handler only sends commands to the interface
and only receives responses from interface.  This change extends the
interface to receive commands/responses and send commands/responses.
These are formatted differently in support of receiving/sending IPMB
messages directly.

Signed-off-by: Corey Minyard <minyard@acm.org>
Tested-by: Andrew Manley <andrew.manley@sealingtech.com>
Reviewed-by: Andrew Manley <andrew.manley@sealingtech.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 288 +++++++++++++++++++++++++++++++-----
 include/linux/ipmi_smi.h            |  59 ++++++++
 include/uapi/linux/ipmi.h           |  14 ++
 3 files changed, 328 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index ad1a8fc379b9..a60201d3f735 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -653,6 +653,11 @@ static int is_ipmb_bcast_addr(struct ipmi_addr *addr)
 	return addr->addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE;
 }
 
+static int is_ipmb_direct_addr(struct ipmi_addr *addr)
+{
+	return addr->addr_type == IPMI_IPMB_DIRECT_ADDR_TYPE;
+}
+
 static void free_recv_msg_list(struct list_head *q)
 {
 	struct ipmi_recv_msg *msg, *msg2;
@@ -805,6 +810,17 @@ ipmi_addr_equal(struct ipmi_addr *addr1, struct ipmi_addr *addr2)
 			&& (ipmb_addr1->lun == ipmb_addr2->lun));
 	}
 
+	if (is_ipmb_direct_addr(addr1)) {
+		struct ipmi_ipmb_direct_addr *daddr1
+			= (struct ipmi_ipmb_direct_addr *) addr1;
+		struct ipmi_ipmb_direct_addr *daddr2
+			= (struct ipmi_ipmb_direct_addr *) addr2;
+
+		return daddr1->slave_addr == daddr2->slave_addr &&
+			daddr1->rq_lun == daddr2->rq_lun &&
+			daddr1->rs_lun == daddr2->rs_lun;
+	}
+
 	if (is_lan_addr(addr1)) {
 		struct ipmi_lan_addr *lan_addr1
 			= (struct ipmi_lan_addr *) addr1;
@@ -843,6 +859,23 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len)
 		return 0;
 	}
 
+	if (is_ipmb_direct_addr(addr)) {
+		struct ipmi_ipmb_direct_addr *daddr = (void *) addr;
+
+		if (addr->channel != 0)
+			return -EINVAL;
+		if (len < sizeof(struct ipmi_ipmb_direct_addr))
+			return -EINVAL;
+
+		if (daddr->slave_addr & 0x01)
+			return -EINVAL;
+		if (daddr->rq_lun >= 4)
+			return -EINVAL;
+		if (daddr->rs_lun >= 4)
+			return -EINVAL;
+		return 0;
+	}
+
 	if (is_lan_addr(addr)) {
 		if (len < sizeof(struct ipmi_lan_addr))
 			return -EINVAL;
@@ -862,6 +895,9 @@ unsigned int ipmi_addr_length(int addr_type)
 			|| (addr_type == IPMI_IPMB_BROADCAST_ADDR_TYPE))
 		return sizeof(struct ipmi_ipmb_addr);
 
+	if (addr_type == IPMI_IPMB_DIRECT_ADDR_TYPE)
+		return sizeof(struct ipmi_ipmb_direct_addr);
+
 	if (addr_type == IPMI_LAN_ADDR_TYPE)
 		return sizeof(struct ipmi_lan_addr);
 
@@ -2052,6 +2088,58 @@ out_err:
 	return rv;
 }
 
+static int i_ipmi_req_ipmb_direct(struct ipmi_smi        *intf,
+				  struct ipmi_addr       *addr,
+				  long			 msgid,
+				  struct kernel_ipmi_msg *msg,
+				  struct ipmi_smi_msg    *smi_msg,
+				  struct ipmi_recv_msg   *recv_msg,
+				  unsigned char          source_lun)
+{
+	struct ipmi_ipmb_direct_addr *daddr;
+	bool is_cmd = !(recv_msg->msg.netfn & 0x1);
+
+	if (!(intf->handlers->flags & IPMI_SMI_CAN_HANDLE_IPMB_DIRECT))
+		return -EAFNOSUPPORT;
+
+	/* Responses must have a completion code. */
+	if (!is_cmd && msg->data_len < 1) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	if ((msg->data_len + 4) > IPMI_MAX_MSG_LENGTH) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EMSGSIZE;
+	}
+
+	daddr = (struct ipmi_ipmb_direct_addr *) addr;
+	if (daddr->rq_lun > 3 || daddr->rs_lun > 3) {
+		ipmi_inc_stat(intf, sent_invalid_commands);
+		return -EINVAL;
+	}
+
+	smi_msg->type = IPMI_SMI_MSG_TYPE_IPMB_DIRECT;
+	smi_msg->msgid = msgid;
+
+	if (is_cmd) {
+		smi_msg->data[0] = msg->netfn << 2 | daddr->rs_lun;
+		smi_msg->data[2] = recv_msg->msgid << 2 | daddr->rq_lun;
+	} else {
+		smi_msg->data[0] = msg->netfn << 2 | daddr->rq_lun;
+		smi_msg->data[2] = recv_msg->msgid << 2 | daddr->rs_lun;
+	}
+	smi_msg->data[1] = daddr->slave_addr;
+	smi_msg->data[3] = msg->cmd;
+
+	memcpy(smi_msg->data + 4, msg->data, msg->data_len);
+	smi_msg->data_size = msg->data_len + 4;
+
+	smi_msg->user_data = recv_msg;
+
+	return 0;
+}
+
 static int i_ipmi_req_lan(struct ipmi_smi        *intf,
 			  struct ipmi_addr       *addr,
 			  long                   msgid,
@@ -2241,6 +2329,9 @@ static int i_ipmi_request(struct ipmi_user     *user,
 		rv = i_ipmi_req_ipmb(intf, addr, msgid, msg, smi_msg, recv_msg,
 				     source_address, source_lun,
 				     retries, retry_time_ms);
+	} else if (is_ipmb_direct_addr(addr)) {
+		rv = i_ipmi_req_ipmb_direct(intf, addr, msgid, msg, smi_msg,
+					    recv_msg, source_lun);
 	} else if (is_lan_addr(addr)) {
 		rv = i_ipmi_req_lan(intf, addr, msgid, msg, smi_msg, recv_msg,
 				    source_lun, retries, retry_time_ms);
@@ -3802,6 +3893,123 @@ static int handle_ipmb_get_msg_cmd(struct ipmi_smi *intf,
 	return rv;
 }
 
+static int handle_ipmb_direct_rcv_cmd(struct ipmi_smi *intf,
+				      struct ipmi_smi_msg *msg)
+{
+	struct cmd_rcvr          *rcvr;
+	int                      rv = 0;
+	struct ipmi_user         *user = NULL;
+	struct ipmi_ipmb_direct_addr *daddr;
+	struct ipmi_recv_msg     *recv_msg;
+	unsigned char netfn = msg->rsp[0] >> 2;
+	unsigned char cmd = msg->rsp[3];
+
+	rcu_read_lock();
+	/* We always use channel 0 for direct messages. */
+	rcvr = find_cmd_rcvr(intf, netfn, cmd, 0);
+	if (rcvr) {
+		user = rcvr->user;
+		kref_get(&user->refcount);
+	} else
+		user = NULL;
+	rcu_read_unlock();
+
+	if (user == NULL) {
+		/* We didn't find a user, deliver an error response. */
+		ipmi_inc_stat(intf, unhandled_commands);
+
+		msg->data[0] = ((netfn + 1) << 2) | (msg->rsp[4] & 0x3);
+		msg->data[1] = msg->rsp[2];
+		msg->data[2] = msg->rsp[4] & ~0x3;
+		msg->data[3] = cmd;
+		msg->data[4] = IPMI_INVALID_CMD_COMPLETION_CODE;
+		msg->data_size = 5;
+
+		rcu_read_lock();
+		if (!intf->in_shutdown) {
+			smi_send(intf, intf->handlers, msg, 0);
+			/*
+			 * We used the message, so return the value
+			 * that causes it to not be freed or
+			 * queued.
+			 */
+			rv = -1;
+		}
+		rcu_read_unlock();
+	} else {
+		recv_msg = ipmi_alloc_recv_msg();
+		if (!recv_msg) {
+			/*
+			 * We couldn't allocate memory for the
+			 * message, so requeue it for handling
+			 * later.
+			 */
+			rv = 1;
+			kref_put(&user->refcount, free_user);
+		} else {
+			/* Extract the source address from the data. */
+			daddr = (struct ipmi_ipmb_direct_addr *)&recv_msg->addr;
+			daddr->addr_type = IPMI_IPMB_DIRECT_ADDR_TYPE;
+			daddr->channel = 0;
+			daddr->slave_addr = msg->rsp[1];
+			daddr->rs_lun = msg->rsp[0] & 3;
+			daddr->rq_lun = msg->rsp[2] & 3;
+
+			/*
+			 * Extract the rest of the message information
+			 * from the IPMB header.
+			 */
+			recv_msg->user = user;
+			recv_msg->recv_type = IPMI_CMD_RECV_TYPE;
+			recv_msg->msgid = (msg->rsp[2] >> 2);
+			recv_msg->msg.netfn = msg->rsp[0] >> 2;
+			recv_msg->msg.cmd = msg->rsp[3];
+			recv_msg->msg.data = recv_msg->msg_data;
+
+			recv_msg->msg.data_len = msg->rsp_size - 4;
+			memcpy(recv_msg->msg_data, msg->rsp + 4,
+			       msg->rsp_size - 4);
+			if (deliver_response(intf, recv_msg))
+				ipmi_inc_stat(intf, unhandled_commands);
+			else
+				ipmi_inc_stat(intf, handled_commands);
+		}
+	}
+
+	return rv;
+}
+
+static int handle_ipmb_direct_rcv_rsp(struct ipmi_smi *intf,
+				      struct ipmi_smi_msg *msg)
+{
+	struct ipmi_recv_msg *recv_msg;
+	struct ipmi_ipmb_direct_addr *daddr;
+
+	recv_msg = (struct ipmi_recv_msg *) msg->user_data;
+	if (recv_msg == NULL) {
+		dev_warn(intf->si_dev,
+			 "IPMI message received with no owner. This could be because of a malformed message, or because of a hardware error.  Contact your hardware vendor for assistance.\n");
+		return 0;
+	}
+
+	recv_msg->recv_type = IPMI_RESPONSE_RECV_TYPE;
+	recv_msg->msgid = msg->msgid;
+	daddr = (struct ipmi_ipmb_direct_addr *) &recv_msg->addr;
+	daddr->addr_type = IPMI_IPMB_DIRECT_ADDR_TYPE;
+	daddr->channel = 0;
+	daddr->slave_addr = msg->rsp[1];
+	daddr->rq_lun = msg->rsp[0] & 3;
+	daddr->rs_lun = msg->rsp[2] & 3;
+	recv_msg->msg.netfn = msg->rsp[0] >> 2;
+	recv_msg->msg.cmd = msg->rsp[3];
+	memcpy(recv_msg->msg_data, &msg->rsp[4], msg->rsp_size - 4);
+	recv_msg->msg.data = recv_msg->msg_data;
+	recv_msg->msg.data_len = msg->rsp_size - 4;
+	deliver_local_response(intf, recv_msg);
+
+	return 0;
+}
+
 static int handle_lan_get_msg_rsp(struct ipmi_smi *intf,
 				  struct ipmi_smi_msg *msg)
 {
@@ -4227,18 +4435,40 @@ static int handle_bmc_rsp(struct ipmi_smi *intf,
 static int handle_one_recv_msg(struct ipmi_smi *intf,
 			       struct ipmi_smi_msg *msg)
 {
-	int requeue;
+	int requeue = 0;
 	int chan;
+	unsigned char cc;
+	bool is_cmd = !((msg->rsp[0] >> 2) & 1);
 
 	pr_debug("Recv: %*ph\n", msg->rsp_size, msg->rsp);
 
-	if ((msg->data_size >= 2)
+	if (msg->rsp_size < 2) {
+		/* Message is too small to be correct. */
+		dev_warn(intf->si_dev,
+			 "BMC returned too small a message for netfn %x cmd %x, got %d bytes\n",
+			 (msg->data[0] >> 2) | 1, msg->data[1], msg->rsp_size);
+
+return_unspecified:
+		/* Generate an error response for the message. */
+		msg->rsp[0] = msg->data[0] | (1 << 2);
+		msg->rsp[1] = msg->data[1];
+		msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
+		msg->rsp_size = 3;
+	} else if (msg->type == IPMI_SMI_MSG_TYPE_IPMB_DIRECT) {
+		/* commands must have at least 3 bytes, responses 4. */
+		if (is_cmd && (msg->rsp_size < 3)) {
+			ipmi_inc_stat(intf, invalid_commands);
+			goto out;
+		}
+		if (!is_cmd && (msg->rsp_size < 4))
+			goto return_unspecified;
+	} else if ((msg->data_size >= 2)
 	    && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
 	    && (msg->data[1] == IPMI_SEND_MSG_CMD)
 	    && (msg->user_data == NULL)) {
 
 		if (intf->in_shutdown)
-			goto free_msg;
+			goto out;
 
 		/*
 		 * This is the local response to a command send, start
@@ -4273,21 +4503,6 @@ static int handle_one_recv_msg(struct ipmi_smi *intf,
 		} else
 			/* The message was sent, start the timer. */
 			intf_start_seq_timer(intf, msg->msgid);
-free_msg:
-		requeue = 0;
-		goto out;
-
-	} else if (msg->rsp_size < 2) {
-		/* Message is too small to be correct. */
-		dev_warn(intf->si_dev,
-			 "BMC returned too small a message for netfn %x cmd %x, got %d bytes\n",
-			 (msg->data[0] >> 2) | 1, msg->data[1], msg->rsp_size);
-
-		/* Generate an error response for the message. */
-		msg->rsp[0] = msg->data[0] | (1 << 2);
-		msg->rsp[1] = msg->data[1];
-		msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
-		msg->rsp_size = 3;
 	} else if (((msg->rsp[0] >> 2) != ((msg->data[0] >> 2) | 1))
 		   || (msg->rsp[1] != msg->data[1])) {
 		/*
@@ -4299,39 +4514,46 @@ free_msg:
 			 (msg->data[0] >> 2) | 1, msg->data[1],
 			 msg->rsp[0] >> 2, msg->rsp[1]);
 
-		/* Generate an error response for the message. */
-		msg->rsp[0] = msg->data[0] | (1 << 2);
-		msg->rsp[1] = msg->data[1];
-		msg->rsp[2] = IPMI_ERR_UNSPECIFIED;
-		msg->rsp_size = 3;
+		goto return_unspecified;
 	}
 
-	if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
-	    && (msg->rsp[1] == IPMI_SEND_MSG_CMD)
-	    && (msg->user_data != NULL)) {
+	if (msg->type == IPMI_SMI_MSG_TYPE_IPMB_DIRECT) {
+		if ((msg->data[0] >> 2) & 1) {
+			/* It's a response to a sent response. */
+			chan = 0;
+			cc = msg->rsp[4];
+			goto process_response_response;
+		}
+		if (is_cmd)
+			requeue = handle_ipmb_direct_rcv_cmd(intf, msg);
+		else
+			requeue = handle_ipmb_direct_rcv_rsp(intf, msg);
+	} else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
+		   && (msg->rsp[1] == IPMI_SEND_MSG_CMD)
+		   && (msg->user_data != NULL)) {
 		/*
 		 * It's a response to a response we sent.  For this we
 		 * deliver a send message response to the user.
 		 */
-		struct ipmi_recv_msg *recv_msg = msg->user_data;
-
-		requeue = 0;
-		if (msg->rsp_size < 2)
-			/* Message is too small to be correct. */
-			goto out;
+		struct ipmi_recv_msg *recv_msg;
 
 		chan = msg->data[2] & 0x0f;
 		if (chan >= IPMI_MAX_CHANNELS)
 			/* Invalid channel number */
 			goto out;
+		cc = msg->rsp[2];
 
+process_response_response:
+		recv_msg = msg->user_data;
+
+		requeue = 0;
 		if (!recv_msg)
 			goto out;
 
 		recv_msg->recv_type = IPMI_RESPONSE_RESPONSE_TYPE;
 		recv_msg->msg.data = recv_msg->msg_data;
+		recv_msg->msg_data[0] = cc;
 		recv_msg->msg.data_len = 1;
-		recv_msg->msg_data[0] = msg->rsp[2];
 		deliver_local_response(intf, recv_msg);
 	} else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
 		   && (msg->rsp[1] == IPMI_GET_MSG_CMD)) {
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index deec18b8944a..9277d21c2690 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -38,6 +38,59 @@ struct ipmi_smi;
 #define IPMI_WATCH_MASK_CHECK_WATCHDOG	(1 << 1)
 #define IPMI_WATCH_MASK_CHECK_COMMANDS	(1 << 2)
 
+/*
+ * SMI messages
+ *
+ * When communicating with an SMI, messages come in two formats:
+ *
+ * * Normal (to a BMC over a BMC interface)
+ *
+ * * IPMB (over a IPMB to another MC)
+ *
+ * When normal, commands are sent using the format defined by a
+ * standard message over KCS (NetFn must be even):
+ *
+ *   +-----------+-----+------+
+ *   | NetFn/LUN | Cmd | Data |
+ *   +-----------+-----+------+
+ *
+ * And responses, similarly, with an completion code added (NetFn must
+ * be odd):
+ *
+ *   +-----------+-----+------+------+
+ *   | NetFn/LUN | Cmd | CC   | Data |
+ *   +-----------+-----+------+------+
+ *
+ * With normal messages, only commands are sent and only responses are
+ * received.
+ *
+ * In IPMB mode, we are acting as an IPMB device. Commands will be in
+ * the following format (NetFn must be even):
+ *
+ *   +-------------+------+-------------+-----+------+
+ *   | NetFn/rsLUN | Addr | rqSeq/rqLUN | Cmd | Data |
+ *   +-------------+------+-------------+-----+------+
+ *
+ * Responses will using the following format:
+ *
+ *   +-------------+------+-------------+-----+------+------+
+ *   | NetFn/rqLUN | Addr | rqSeq/rsLUN | Cmd | CC   | Data |
+ *   +-------------+------+-------------+-----+------+------+
+ *
+ * This is similar to the format defined in the IPMB manual section
+ * 2.11.1 with the checksums and the first address removed.  Also, the
+ * address is always the remote address.
+ *
+ * IPMB messages can be commands and responses in both directions.
+ * Received commands are handled as received commands from the message
+ * queue.
+ */
+
+enum ipmi_smi_msg_type {
+	IPMI_SMI_MSG_TYPE_NORMAL = 0,
+	IPMI_SMI_MSG_TYPE_IPMB_DIRECT
+};
+
 /*
  * Messages to/from the lower layer.  The smi interface will take one
  * of these to send. After the send has occurred and a response has
@@ -54,6 +107,8 @@ struct ipmi_smi;
 struct ipmi_smi_msg {
 	struct list_head link;
 
+	enum ipmi_smi_msg_type type;
+
 	long    msgid;
 	void    *user_data;
 
@@ -73,6 +128,10 @@ struct ipmi_smi_msg {
 struct ipmi_smi_handlers {
 	struct module *owner;
 
+	/* Capabilities of the SMI. */
+#define IPMI_SMI_CAN_HANDLE_IPMB_DIRECT		(1 << 0)
+	unsigned int flags;
+
 	/*
 	 * The low-level interface cannot start sending messages to
 	 * the upper layer until this function is called.  This may
diff --git a/include/uapi/linux/ipmi.h b/include/uapi/linux/ipmi.h
index 007e65f9243b..966c3070959b 100644
--- a/include/uapi/linux/ipmi.h
+++ b/include/uapi/linux/ipmi.h
@@ -80,6 +80,20 @@ struct ipmi_ipmb_addr {
 	unsigned char lun;
 };
 
+/*
+ * Used for messages received directly from an IPMB that have not gone
+ * through a MC.  This is for systems that sit right on an IPMB so
+ * they can receive commands and respond to them.
+ */
+#define IPMI_IPMB_DIRECT_ADDR_TYPE	0x81
+struct ipmi_ipmb_direct_addr {
+	int           addr_type;
+	short         channel;
+	unsigned char slave_addr;
+	unsigned char rs_lun;
+	unsigned char rq_lun;
+};
+
 /*
  * A LAN Address.  This is an address to/from a LAN interface bridged
  * by the BMC, not an address actually out on the LAN.
-- 
cgit v1.2.3


From 549017aa1bb7ec19a1e24e7f65480a1c2e76b90e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 5 Oct 2021 13:52:42 +0200
Subject: netlink: remove netlink_broadcast_filtered

No users in tree since commit a3498436b3a0 ("netns: restrict uevents"),
so remove this functionality.

Cc: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  4 ----
 net/netlink/af_netlink.c | 23 ++---------------------
 2 files changed, 2 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 61b1c7fcc401..1ec631838af9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -156,10 +156,6 @@ bool netlink_strict_get_check(struct sk_buff *skb);
 int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock);
 int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid,
 		      __u32 group, gfp_t allocation);
-int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
-			       __u32 portid, __u32 group, gfp_t allocation,
-			       int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
-			       void *filter_data);
 int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code);
 int netlink_register_notifier(struct notifier_block *nb);
 int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 24b7cf447bc5..4f5f93fd6802 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1407,8 +1407,6 @@ struct netlink_broadcast_data {
 	int delivered;
 	gfp_t allocation;
 	struct sk_buff *skb, *skb2;
-	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
-	void *tx_data;
 };
 
 static void do_one_broadcast(struct sock *sk,
@@ -1462,11 +1460,6 @@ static void do_one_broadcast(struct sock *sk,
 			p->delivery_failure = 1;
 		goto out;
 	}
-	if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
-		kfree_skb(p->skb2);
-		p->skb2 = NULL;
-		goto out;
-	}
 	if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
@@ -1489,10 +1482,8 @@ out:
 	sock_put(sk);
 }
 
-int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
-	u32 group, gfp_t allocation,
-	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
-	void *filter_data)
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
+		      u32 group, gfp_t allocation)
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
@@ -1511,8 +1502,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 	info.allocation = allocation;
 	info.skb = skb;
 	info.skb2 = NULL;
-	info.tx_filter = filter;
-	info.tx_data = filter_data;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -1538,14 +1527,6 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid
 	}
 	return -ESRCH;
 }
-EXPORT_SYMBOL(netlink_broadcast_filtered);
-
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
-		      u32 group, gfp_t allocation)
-{
-	return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
-		NULL, NULL);
-}
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
-- 
cgit v1.2.3


From ded6e16b37e4c8c86cda98604ecd78818d6ca36a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 4 Oct 2021 12:14:43 -0700
Subject: mlx4: replace mlx4_mac_to_u64() with ether_addr_to_u64()

mlx4_mac_to_u64() predates and opencodes ether_addr_to_u64().
It doesn't make the argument constant so it'll be problematic
when dev->dev_addr becomes a const. Convert to the generic helper.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c              |  2 +-
 drivers/infiniband/hw/mlx4/qp.c                |  2 +-
 drivers/net/ethernet/mellanox/mlx4/cmd.c       |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 16 ++++++++--------
 drivers/net/ethernet/mellanox/mlx4/fw.c        |  2 +-
 include/linux/mlx4/driver.h                    | 12 ------------
 6 files changed, 12 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index f367f4a4abff..f3fa2fe6a88a 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2275,7 +2275,7 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
 	u64 release_mac = MLX4_IB_INVALID_MAC;
 	struct mlx4_ib_qp *qp;
 
-	new_smac = mlx4_mac_to_u64(dev->dev_addr);
+	new_smac = ether_addr_to_u64(dev->dev_addr);
 	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
 
 	/* no need for update QP1 and mac registration in non-SRIOV */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8662f462e2a5..aea4182f33a4 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1853,7 +1853,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
 			 u16 vlan_id, u8 *smac)
 {
 	return _mlx4_set_path(dev, &qp->ah_attr,
-			      mlx4_mac_to_u64(smac),
+			      ether_addr_to_u64(smac),
 			      vlan_id,
 			      path, &mqp->pri, port);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 8d751383530b..9fadedfca41c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3009,7 +3009,7 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u8 *mac)
 		return -EPERM;
 	}
 
-	s_info->mac = mlx4_mac_to_u64(mac);
+	s_info->mac = ether_addr_to_u64(mac);
 	mlx4_info(dev, "default mac on vf %d port %d to %llX will take effect only after vf restart\n",
 		  vf, port, s_info->mac);
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index ac480dc79bc1..76c8fe8e0125 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -644,7 +644,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
 	int index = 0;
 	int err = 0;
 	int *qpn = &priv->base_qpn;
-	u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr);
+	u64 mac = ether_addr_to_u64(priv->dev->dev_addr);
 
 	en_dbg(DRV, priv, "Registering MAC: %pM for adding\n",
 	       priv->dev->dev_addr);
@@ -683,7 +683,7 @@ static void mlx4_en_put_qp(struct mlx4_en_priv *priv)
 	int qpn = priv->base_qpn;
 
 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
-		u64 mac = mlx4_mac_to_u64(priv->dev->dev_addr);
+		u64 mac = ether_addr_to_u64(priv->dev->dev_addr);
 		en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n",
 		       priv->dev->dev_addr);
 		mlx4_unregister_mac(dev, priv->port, mac);
@@ -701,14 +701,14 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn,
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_dev *dev = mdev->dev;
 	int err = 0;
-	u64 new_mac_u64 = mlx4_mac_to_u64(new_mac);
+	u64 new_mac_u64 = ether_addr_to_u64(new_mac);
 
 	if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
 		struct hlist_head *bucket;
 		unsigned int mac_hash;
 		struct mlx4_mac_entry *entry;
 		struct hlist_node *tmp;
-		u64 prev_mac_u64 = mlx4_mac_to_u64(prev_mac);
+		u64 prev_mac_u64 = ether_addr_to_u64(prev_mac);
 
 		bucket = &priv->mac_hash[prev_mac[MLX4_EN_MAC_HASH_IDX]];
 		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
@@ -1076,7 +1076,7 @@ static void mlx4_en_do_multicast(struct mlx4_en_priv *priv,
 		mlx4_en_cache_mclist(dev);
 		netif_addr_unlock_bh(dev);
 		list_for_each_entry(mclist, &priv->mc_list, list) {
-			mcast_addr = mlx4_mac_to_u64(mclist->addr);
+			mcast_addr = ether_addr_to_u64(mclist->addr);
 			mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
 					    mcast_addr, 0, MLX4_MCAST_CONFIG);
 		}
@@ -1169,7 +1169,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 				found = true;
 
 			if (!found) {
-				mac = mlx4_mac_to_u64(entry->mac);
+				mac = ether_addr_to_u64(entry->mac);
 				mlx4_en_uc_steer_release(priv, entry->mac,
 							 priv->base_qpn,
 							 entry->reg_id);
@@ -1212,7 +1212,7 @@ static void mlx4_en_do_uc_filter(struct mlx4_en_priv *priv,
 				priv->flags |= MLX4_EN_FLAG_FORCE_PROMISC;
 				break;
 			}
-			mac = mlx4_mac_to_u64(ha->addr);
+			mac = ether_addr_to_u64(ha->addr);
 			memcpy(entry->mac, ha->addr, ETH_ALEN);
 			err = mlx4_register_mac(mdev->dev, priv->port, mac);
 			if (err < 0) {
@@ -1348,7 +1348,7 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv)
 	for (i = 0; i < MLX4_EN_MAC_HASH_SIZE; ++i) {
 		bucket = &priv->mac_hash[i];
 		hlist_for_each_entry_safe(entry, tmp, bucket, hlist) {
-			mac = mlx4_mac_to_u64(entry->mac);
+			mac = ether_addr_to_u64(entry->mac);
 			en_dbg(DRV, priv, "Registering MAC:%pM for deleting\n",
 			       entry->mac);
 			mlx4_en_uc_steer_release(priv, entry->mac,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index dc4ac1a2b6b6..42c96c9d7fb1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -3105,7 +3105,7 @@ void mlx4_replace_zero_macs(struct mlx4_dev *dev)
 		    dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) {
 			eth_random_addr(mac_addr);
 			dev->port_random_macs |= 1 << i;
-			dev->caps.def_mac[i] = mlx4_mac_to_u64(mac_addr);
+			dev->caps.def_mac[i] = ether_addr_to_u64(mac_addr);
 		}
 }
 EXPORT_SYMBOL_GPL(mlx4_replace_zero_macs);
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index a858bcb6220b..b26b71f62fb4 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -92,18 +92,6 @@ void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int
 
 struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port);
 
-static inline u64 mlx4_mac_to_u64(u8 *addr)
-{
-	u64 mac = 0;
-	int i;
-
-	for (i = 0; i < ETH_ALEN; i++) {
-		mac <<= 8;
-		mac |= addr[i];
-	}
-	return mac;
-}
-
 static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
 {
 	int i;
-- 
cgit v1.2.3


From 1bb96a07f9a8f2fe9725f6689605e32b75d20508 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 4 Oct 2021 12:14:44 -0700
Subject: mlx4: replace mlx4_u64_to_mac() with u64_to_ether_addr()

mlx4_u64_to_mac() predates the common helper but doesn't
make the argument constant.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c |  2 +-
 include/linux/mlx4/driver.h              | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 9fadedfca41c..94ead263081f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -3195,7 +3195,7 @@ int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting)
 	port = mlx4_slaves_closest_port(dev, slave, port);
 	s_info = &priv->mfunc.master.vf_admin[slave].vport[port];
 
-	mlx4_u64_to_mac(mac, s_info->mac);
+	u64_to_ether_addr(s_info->mac, mac);
 	if (setting && !is_valid_ether_addr(mac)) {
 		mlx4_info(dev, "Illegal MAC with spoofchk\n");
 		return -EPERM;
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index b26b71f62fb4..1834c8fad12e 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -92,14 +92,4 @@ void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int
 
 struct devlink_port *mlx4_get_devlink_port(struct mlx4_dev *dev, int port);
 
-static inline void mlx4_u64_to_mac(u8 *addr, u64 mac)
-{
-	int i;
-
-	for (i = ETH_ALEN; i > 0; i--) {
-		addr[i - 1] = mac & 0xFF;
-		mac >>= 8;
-	}
-}
-
 #endif /* MLX4_DRIVER_H */
-- 
cgit v1.2.3


From ebb1fdb589bd6d0ee647d4fa285bc934ba369cde Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 4 Oct 2021 12:14:46 -0700
Subject: mlx4: constify args for const dev_addr

netdev->dev_addr will become const soon. Make sure all
functions which pass it around mark appropriate args
as const.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 +++++---
 drivers/net/ethernet/mellanox/mlx4/mcg.c       | 2 +-
 include/linux/mlx4/device.h                    | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index dce228170b14..3f6d5c384637 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -536,7 +536,8 @@ static void mlx4_en_u64_to_mac(struct net_device *dev, u64 src_mac)
 }
 
 
-static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr,
+static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv,
+				    const unsigned char *addr,
 				    int qpn, u64 *reg_id)
 {
 	int err;
@@ -557,7 +558,7 @@ static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *ad
 
 
 static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
-				unsigned char *mac, int *qpn, u64 *reg_id)
+				const unsigned char *mac, int *qpn, u64 *reg_id)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_dev *dev = mdev->dev;
@@ -609,7 +610,8 @@ static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv,
 }
 
 static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv,
-				     unsigned char *mac, int qpn, u64 reg_id)
+				     const unsigned char *mac,
+				     int qpn, u64 reg_id)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_dev *dev = mdev->dev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f1b4ad9c66d2..f1716a83a4d3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -1046,7 +1046,7 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
 }
 EXPORT_SYMBOL_GPL(mlx4_flow_detach);
 
-int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
+int mlx4_tunnel_steer_add(struct mlx4_dev *dev, const unsigned char *addr,
 			  int port, int qpn, u16 prio, u64 *reg_id)
 {
 	int err;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 30bb59fe970c..6646634a0b9d 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -1436,7 +1436,7 @@ int mlx4_map_sw_to_hw_steering_id(struct mlx4_dev *dev,
 				  enum mlx4_net_trans_rule_id id);
 int mlx4_hw_rule_sz(struct mlx4_dev *dev, enum mlx4_net_trans_rule_id id);
 
-int mlx4_tunnel_steer_add(struct mlx4_dev *dev, unsigned char *addr,
+int mlx4_tunnel_steer_add(struct mlx4_dev *dev, const unsigned char *addr,
 			  int port, int qpn, u16 prio, u64 *reg_id);
 
 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port,
-- 
cgit v1.2.3


From 027b57170bf8bb6999a28e4a5f3d78bf1db0f90c Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Sat, 2 Oct 2021 15:09:00 +0200
Subject: serial: core: Fix initializing and restoring termios speed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit edc6afc54968 ("tty: switch to ktermios and new framework")
termios speed is no longer stored only in c_cflag member but also in new
additional c_ispeed and c_ospeed members. If BOTHER flag is set in c_cflag
then termios speed is stored only in these new members.

Therefore to correctly restore termios speed it is required to store also
ispeed and ospeed members, not only cflag member.

In case only cflag member with BOTHER flag is restored then functions
tty_termios_baud_rate() and tty_termios_input_baud_rate() returns baudrate
stored in c_ospeed / c_ispeed member, which is zero as it was not restored
too. If reported baudrate is invalid (e.g. zero) then serial core functions
report fallback baudrate value 9600. So it means that in this case original
baudrate is lost and kernel changes it to value 9600.

Simple reproducer of this issue is to boot kernel with following command
line argument: "console=ttyXXX,86400" (where ttyXXX is the device name).
For speed 86400 there is no Bnnn constant and therefore kernel has to
represent this speed via BOTHER c_cflag. Which means that speed is stored
only in c_ospeed and c_ispeed members, not in c_cflag anymore.

If bootloader correctly configures serial device to speed 86400 then kernel
prints boot log to early console at speed speed 86400 without any issue.
But after kernel starts initializing real console device ttyXXX then speed
is changed to fallback value 9600 because information about speed was lost.

This patch fixes above issue by storing and restoring also ispeed and
ospeed members, which are required for BOTHER flag.

Fixes: edc6afc54968 ("[PATCH] tty: switch to ktermios and new framework")
Cc: stable@vger.kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Link: https://lore.kernel.org/r/20211002130900.9518-1-pali@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 16 ++++++++++++++--
 include/linux/console.h          |  2 ++
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0e2e35ab64c7..1e738f265eea 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -222,7 +222,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 	if (retval == 0) {
 		if (uart_console(uport) && uport->cons->cflag) {
 			tty->termios.c_cflag = uport->cons->cflag;
+			tty->termios.c_ispeed = uport->cons->ispeed;
+			tty->termios.c_ospeed = uport->cons->ospeed;
 			uport->cons->cflag = 0;
+			uport->cons->ispeed = 0;
+			uport->cons->ospeed = 0;
 		}
 		/*
 		 * Initialise the hardware port settings.
@@ -290,8 +294,11 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state)
 		/*
 		 * Turn off DTR and RTS early.
 		 */
-		if (uport && uart_console(uport) && tty)
+		if (uport && uart_console(uport) && tty) {
 			uport->cons->cflag = tty->termios.c_cflag;
+			uport->cons->ispeed = tty->termios.c_ispeed;
+			uport->cons->ospeed = tty->termios.c_ospeed;
+		}
 
 		if (!tty || C_HUPCL(tty))
 			uart_port_dtr_rts(uport, 0);
@@ -2094,8 +2101,11 @@ uart_set_options(struct uart_port *port, struct console *co,
 	 * Allow the setting of the UART parameters with a NULL console
 	 * too:
 	 */
-	if (co)
+	if (co) {
 		co->cflag = termios.c_cflag;
+		co->ispeed = termios.c_ispeed;
+		co->ospeed = termios.c_ospeed;
+	}
 
 	return 0;
 }
@@ -2229,6 +2239,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 		 */
 		memset(&termios, 0, sizeof(struct ktermios));
 		termios.c_cflag = uport->cons->cflag;
+		termios.c_ispeed = uport->cons->ispeed;
+		termios.c_ospeed = uport->cons->ospeed;
 
 		/*
 		 * If that's unset, use the tty termios setting.
diff --git a/include/linux/console.h b/include/linux/console.h
index 20874db50bc8..a97f277cfdfa 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -149,6 +149,8 @@ struct console {
 	short	flags;
 	short	index;
 	int	cflag;
+	uint	ispeed;
+	uint	ospeed;
 	void	*data;
 	struct	 console *next;
 };
-- 
cgit v1.2.3


From 94ad8aacbc2d4908b052c8bdb5ae13bc702f77ea Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 27 Sep 2021 16:40:50 +0200
Subject: ARM: omap1: move omap15xx local bus handling to usb.c

Commit 38225f2ef2f4 ("ARM/omap1: switch to use dma_direct_set_offset for
lbus DMA offsets") removed a lot of mach/memory.h, but left the USB
offset handling split into arch/arm/mach-omap1/usb.c and
drivers/usb/host/ohci-omap.c.

This can cause a randconfig build warning that now fails the build
with -Werror:

arch/arm/mach-omap1/usb.c:561:30: error: 'omap_1510_usb_ohci_nb' defined but not used [-Werror=unused-variable]
  561 | static struct notifier_block omap_1510_usb_ohci_nb = {
      |                              ^~~~~~~~~~~~~~~~~~~~~

Move it all into the platform file to get rid of the final
location that relies on mach/memory.h.

Acked-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20210927144118.2464881-1-arnd@kernel.org'
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-omap1/include/mach/memory.h |  12 ----
 arch/arm/mach-omap1/usb.c                 | 116 +++++++++++++++++++++---------
 drivers/usb/host/ohci-omap.c              |  72 +------------------
 include/linux/platform_data/usb-omap1.h   |   2 +
 4 files changed, 86 insertions(+), 116 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h
index 36bc0000cb6a..ba3a350479c8 100644
--- a/arch/arm/mach-omap1/include/mach/memory.h
+++ b/arch/arm/mach-omap1/include/mach/memory.h
@@ -9,16 +9,4 @@
 /* REVISIT: omap1 legacy drivers still rely on this */
 #include <mach/soc.h>
 
-/*
- * Bus address is physical address, except for OMAP-1510 Local Bus.
- * OMAP-1510 bus address is translated into a Local Bus address if the
- * OMAP bus type is lbus. We do the address translation based on the
- * device overriding the defaults used in the dma-mapping API.
- */
-
-/*
- * OMAP-1510 Local Bus address offset
- */
-#define OMAP1510_LB_OFFSET	UL(0x30000000)
-
 #endif
diff --git a/arch/arm/mach-omap1/usb.c b/arch/arm/mach-omap1/usb.c
index 86d3b3c157af..e60831c82b78 100644
--- a/arch/arm/mach-omap1/usb.c
+++ b/arch/arm/mach-omap1/usb.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-map-ops.h>
 #include <linux/io.h>
+#include <linux/delay.h>
 
 #include <asm/irq.h>
 
@@ -206,8 +207,6 @@ static inline void udc_device_init(struct omap_usb_config *pdata)
 
 #endif
 
-#if	IS_ENABLED(CONFIG_USB_OHCI_HCD)
-
 /* The dmamask must be set for OHCI to work */
 static u64 ohci_dmamask = ~(u32)0;
 
@@ -236,20 +235,15 @@ static struct platform_device ohci_device = {
 
 static inline void ohci_device_init(struct omap_usb_config *pdata)
 {
+	if (!IS_ENABLED(CONFIG_USB_OHCI_HCD))
+		return;
+
 	if (cpu_is_omap7xx())
 		ohci_resources[1].start = INT_7XX_USB_HHC_1;
 	pdata->ohci_device = &ohci_device;
 	pdata->ocpi_enable = &ocpi_enable;
 }
 
-#else
-
-static inline void ohci_device_init(struct omap_usb_config *pdata)
-{
-}
-
-#endif
-
 #if	defined(CONFIG_USB_OTG) && defined(CONFIG_ARCH_OMAP_OTG)
 
 static struct resource otg_resources[] = {
@@ -534,33 +528,87 @@ bad:
 }
 
 #ifdef	CONFIG_ARCH_OMAP15XX
+/* OMAP-1510 OHCI has its own MMU for DMA */
+#define OMAP1510_LB_MEMSIZE	32	/* Should be same as SDRAM size */
+#define OMAP1510_LB_CLOCK_DIV	0xfffec10c
+#define OMAP1510_LB_MMU_CTL	0xfffec208
+#define OMAP1510_LB_MMU_LCK	0xfffec224
+#define OMAP1510_LB_MMU_LD_TLB	0xfffec228
+#define OMAP1510_LB_MMU_CAM_H	0xfffec22c
+#define OMAP1510_LB_MMU_CAM_L	0xfffec230
+#define OMAP1510_LB_MMU_RAM_H	0xfffec234
+#define OMAP1510_LB_MMU_RAM_L	0xfffec238
 
-/* ULPD_DPLL_CTRL */
-#define DPLL_IOB		(1 << 13)
-#define DPLL_PLL_ENABLE		(1 << 4)
-#define DPLL_LOCK		(1 << 0)
+/*
+ * Bus address is physical address, except for OMAP-1510 Local Bus.
+ * OMAP-1510 bus address is translated into a Local Bus address if the
+ * OMAP bus type is lbus.
+ */
+#define OMAP1510_LB_OFFSET	   UL(0x30000000)
 
-/* ULPD_APLL_CTRL */
-#define APLL_NDPLL_SWITCH	(1 << 0)
+/*
+ * OMAP-1510 specific Local Bus clock on/off
+ */
+static int omap_1510_local_bus_power(int on)
+{
+	if (on) {
+		omap_writel((1 << 1) | (1 << 0), OMAP1510_LB_MMU_CTL);
+		udelay(200);
+	} else {
+		omap_writel(0, OMAP1510_LB_MMU_CTL);
+	}
 
-static int omap_1510_usb_ohci_notifier(struct notifier_block *nb,
-		unsigned long event, void *data)
+	return 0;
+}
+
+/*
+ * OMAP-1510 specific Local Bus initialization
+ * NOTE: This assumes 32MB memory size in OMAP1510LB_MEMSIZE.
+ *       See also arch/mach-omap/memory.h for __virt_to_dma() and
+ *       __dma_to_virt() which need to match with the physical
+ *       Local Bus address below.
+ */
+static int omap_1510_local_bus_init(void)
 {
-	struct device *dev = data;
+	unsigned int tlb;
+	unsigned long lbaddr, physaddr;
+
+	omap_writel((omap_readl(OMAP1510_LB_CLOCK_DIV) & 0xfffffff8) | 0x4,
+	       OMAP1510_LB_CLOCK_DIV);
+
+	/* Configure the Local Bus MMU table */
+	for (tlb = 0; tlb < OMAP1510_LB_MEMSIZE; tlb++) {
+		lbaddr = tlb * 0x00100000 + OMAP1510_LB_OFFSET;
+		physaddr = tlb * 0x00100000 + PHYS_OFFSET;
+		omap_writel((lbaddr & 0x0fffffff) >> 22, OMAP1510_LB_MMU_CAM_H);
+		omap_writel(((lbaddr & 0x003ffc00) >> 6) | 0xc,
+		       OMAP1510_LB_MMU_CAM_L);
+		omap_writel(physaddr >> 16, OMAP1510_LB_MMU_RAM_H);
+		omap_writel((physaddr & 0x0000fc00) | 0x300, OMAP1510_LB_MMU_RAM_L);
+		omap_writel(tlb << 4, OMAP1510_LB_MMU_LCK);
+		omap_writel(0x1, OMAP1510_LB_MMU_LD_TLB);
+	}
 
-	if (event != BUS_NOTIFY_ADD_DEVICE)
-		return NOTIFY_DONE;
+	/* Enable the walking table */
+	omap_writel(omap_readl(OMAP1510_LB_MMU_CTL) | (1 << 3), OMAP1510_LB_MMU_CTL);
+	udelay(200);
 
-	if (strncmp(dev_name(dev), "ohci", 4) == 0 &&
-	    dma_direct_set_offset(dev, PHYS_OFFSET, OMAP1510_LB_OFFSET,
-			(u64)-1))
-		WARN_ONCE(1, "failed to set DMA offset\n");
-	return NOTIFY_OK;
+	return 0;
 }
 
-static struct notifier_block omap_1510_usb_ohci_nb = {
-	.notifier_call		= omap_1510_usb_ohci_notifier,
-};
+static void omap_1510_local_bus_reset(void)
+{
+	omap_1510_local_bus_power(1);
+	omap_1510_local_bus_init();
+}
+
+/* ULPD_DPLL_CTRL */
+#define DPLL_IOB		(1 << 13)
+#define DPLL_PLL_ENABLE		(1 << 4)
+#define DPLL_LOCK		(1 << 0)
+
+/* ULPD_APLL_CTRL */
+#define APLL_NDPLL_SWITCH	(1 << 0)
 
 static void __init omap_1510_usb_init(struct omap_usb_config *config)
 {
@@ -616,19 +664,19 @@ static void __init omap_1510_usb_init(struct omap_usb_config *config)
 	}
 #endif
 
-#if	IS_ENABLED(CONFIG_USB_OHCI_HCD)
-	if (config->register_host) {
+	if (IS_ENABLED(CONFIG_USB_OHCI_HCD) && config->register_host) {
 		int status;
 
-		bus_register_notifier(&platform_bus_type,
-				      &omap_1510_usb_ohci_nb);
 		ohci_device.dev.platform_data = config;
+		dma_direct_set_offset(&ohci_device.dev, PHYS_OFFSET,
+				      OMAP1510_LB_OFFSET, (u64)-1);
 		status = platform_device_register(&ohci_device);
 		if (status)
 			pr_debug("can't register OHCI device, %d\n", status);
 		/* hcd explicitly gates 48MHz */
+
+		config->lb_reset = omap_1510_local_bus_reset;
 	}
-#endif
 }
 
 #else
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 0b3722770760..ded9738392e4 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -40,17 +40,6 @@
 #include <mach/usb.h>
 
 
-/* OMAP-1510 OHCI has its own MMU for DMA */
-#define OMAP1510_LB_MEMSIZE	32	/* Should be same as SDRAM size */
-#define OMAP1510_LB_CLOCK_DIV	0xfffec10c
-#define OMAP1510_LB_MMU_CTL	0xfffec208
-#define OMAP1510_LB_MMU_LCK	0xfffec224
-#define OMAP1510_LB_MMU_LD_TLB	0xfffec228
-#define OMAP1510_LB_MMU_CAM_H	0xfffec22c
-#define OMAP1510_LB_MMU_CAM_L	0xfffec230
-#define OMAP1510_LB_MMU_RAM_H	0xfffec234
-#define OMAP1510_LB_MMU_RAM_L	0xfffec238
-
 #define DRIVER_DESC "OHCI OMAP driver"
 
 struct ohci_omap_priv {
@@ -104,61 +93,6 @@ static int omap_ohci_transceiver_power(struct ohci_omap_priv *priv, int on)
 	return 0;
 }
 
-#ifdef CONFIG_ARCH_OMAP15XX
-/*
- * OMAP-1510 specific Local Bus clock on/off
- */
-static int omap_1510_local_bus_power(int on)
-{
-	if (on) {
-		omap_writel((1 << 1) | (1 << 0), OMAP1510_LB_MMU_CTL);
-		udelay(200);
-	} else {
-		omap_writel(0, OMAP1510_LB_MMU_CTL);
-	}
-
-	return 0;
-}
-
-/*
- * OMAP-1510 specific Local Bus initialization
- * NOTE: This assumes 32MB memory size in OMAP1510LB_MEMSIZE.
- *       See also arch/mach-omap/memory.h for __virt_to_dma() and
- *       __dma_to_virt() which need to match with the physical
- *       Local Bus address below.
- */
-static int omap_1510_local_bus_init(void)
-{
-	unsigned int tlb;
-	unsigned long lbaddr, physaddr;
-
-	omap_writel((omap_readl(OMAP1510_LB_CLOCK_DIV) & 0xfffffff8) | 0x4,
-	       OMAP1510_LB_CLOCK_DIV);
-
-	/* Configure the Local Bus MMU table */
-	for (tlb = 0; tlb < OMAP1510_LB_MEMSIZE; tlb++) {
-		lbaddr = tlb * 0x00100000 + OMAP1510_LB_OFFSET;
-		physaddr = tlb * 0x00100000 + PHYS_OFFSET;
-		omap_writel((lbaddr & 0x0fffffff) >> 22, OMAP1510_LB_MMU_CAM_H);
-		omap_writel(((lbaddr & 0x003ffc00) >> 6) | 0xc,
-		       OMAP1510_LB_MMU_CAM_L);
-		omap_writel(physaddr >> 16, OMAP1510_LB_MMU_RAM_H);
-		omap_writel((physaddr & 0x0000fc00) | 0x300, OMAP1510_LB_MMU_RAM_L);
-		omap_writel(tlb << 4, OMAP1510_LB_MMU_LCK);
-		omap_writel(0x1, OMAP1510_LB_MMU_LD_TLB);
-	}
-
-	/* Enable the walking table */
-	omap_writel(omap_readl(OMAP1510_LB_MMU_CTL) | (1 << 3), OMAP1510_LB_MMU_CTL);
-	udelay(200);
-
-	return 0;
-}
-#else
-#define omap_1510_local_bus_power(x)	{}
-#define omap_1510_local_bus_init()	{}
-#endif
-
 #ifdef	CONFIG_USB_OTG
 
 static void start_hnp(struct ohci_hcd *ohci)
@@ -229,10 +163,8 @@ static int ohci_omap_reset(struct usb_hcd *hcd)
 
 	omap_ohci_clock_power(priv, 1);
 
-	if (cpu_is_omap15xx()) {
-		omap_1510_local_bus_power(1);
-		omap_1510_local_bus_init();
-	}
+	if (config->lb_reset)
+		config->lb_reset();
 
 	ret = ohci_setup(hcd);
 	if (ret < 0)
diff --git a/include/linux/platform_data/usb-omap1.h b/include/linux/platform_data/usb-omap1.h
index 43b5ce139c37..878e572a78bf 100644
--- a/include/linux/platform_data/usb-omap1.h
+++ b/include/linux/platform_data/usb-omap1.h
@@ -48,6 +48,8 @@ struct omap_usb_config {
 	u32 (*usb2_init)(unsigned nwires, unsigned alt_pingroup);
 
 	int (*ocpi_enable)(void);
+
+	void (*lb_reset)(void);
 };
 
 #endif /* __LINUX_USB_OMAP1_H */
-- 
cgit v1.2.3


From a130e8fbc7de796eb6e680724d87f4737a26d0ac Mon Sep 17 00:00:00 2001
From: Josh Don <joshdon@google.com>
Date: Fri, 27 Aug 2021 09:54:38 -0700
Subject: fs/proc/uptime.c: Fix idle time reporting in /proc/uptime

/proc/uptime reports idle time by reading the CPUTIME_IDLE field from
the per-cpu kcpustats. However, on NO_HZ systems, idle time is not
continually updated on idle cpus, leading this value to appear
incorrectly small.

/proc/stat performs an accounting update when reading idle time; we
can use the same approach for uptime.

With this patch, /proc/stat and /proc/uptime now agree on idle time.
Additionally, the following shows idle time tick up consistently on an
idle machine:

  (while true; do cat /proc/uptime; sleep 1; done) | awk '{print $2-prev; prev=$2}'

Reported-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Josh Don <joshdon@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lkml.kernel.org/r/20210827165438.3280779-1-joshdon@google.com
---
 fs/proc/stat.c              |  4 ++--
 fs/proc/uptime.c            | 14 +++++++++-----
 include/linux/kernel_stat.h |  1 +
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 6561a06ef905..4fb8729a68d4 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -24,7 +24,7 @@
 
 #ifdef arch_idle_time
 
-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 {
 	u64 idle;
 
@@ -46,7 +46,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu)
 
 #else
 
-static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
+u64 get_idle_time(struct kernel_cpustat *kcs, int cpu)
 {
 	u64 idle, idle_usecs = -1ULL;
 
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 5a1b228964fb..deb99bc9b7e6 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v)
 {
 	struct timespec64 uptime;
 	struct timespec64 idle;
-	u64 nsec;
+	u64 idle_nsec;
 	u32 rem;
 	int i;
 
-	nsec = 0;
-	for_each_possible_cpu(i)
-		nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
+	idle_nsec = 0;
+	for_each_possible_cpu(i) {
+		struct kernel_cpustat kcs;
+
+		kcpustat_cpu_fetch(&kcs, i);
+		idle_nsec += get_idle_time(&kcs, i);
+	}
 
 	ktime_get_boottime_ts64(&uptime);
 	timens_add_boottime(&uptime);
 
-	idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+	idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem);
 	idle.tv_nsec = rem;
 	seq_printf(m, "%lu.%02lu %lu.%02lu\n",
 			(unsigned long) uptime.tv_sec,
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 44ae1a7eb9e3..69ae6b278464 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -102,6 +102,7 @@ extern void account_system_index_time(struct task_struct *, u64,
 				      enum cpu_usage_stat);
 extern void account_steal_time(u64);
 extern void account_idle_time(u64);
+extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
 static inline void account_process_tick(struct task_struct *tsk, int user)
-- 
cgit v1.2.3


From ceeadb83aea28372e54857bf88ab7e17af48ab7b Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sun, 5 Sep 2021 14:35:41 +0000
Subject: sched: Make struct sched_statistics independent of fair sched class

If we want to use the schedstats facility to trace other sched classes, we
should make it independent of fair sched class. The struct sched_statistics
is the schedular statistics of a task_struct or a task_group. So we can
move it into struct task_struct and struct task_group to achieve the goal.

After the patch, schestats are orgnized as follows,

    struct task_struct {
       ...
       struct sched_entity se;
       struct sched_rt_entity rt;
       struct sched_dl_entity dl;
       ...
       struct sched_statistics stats;
       ...
   };

Regarding the task group, schedstats is only supported for fair group
sched, and a new struct sched_entity_stats is introduced, suggested by
Peter -

    struct sched_entity_stats {
        struct sched_entity     se;
        struct sched_statistics stats;
    } __no_randomize_layout;

Then with the se in a task_group, we can easily get the stats.

The sched_statistics members may be frequently modified when schedstats is
enabled, in order to avoid impacting on random data which may in the same
cacheline with them, the struct sched_statistics is defined as cacheline
aligned.

As this patch changes the core struct of scheduler, so I verified the
performance it may impact on the scheduler with 'perf bench sched
pipe', suggested by Mel. Below is the result, in which all the values
are in usecs/op.
                                  Before               After
      kernel.sched_schedstats=0  5.2~5.4               5.2~5.4
      kernel.sched_schedstats=1  5.3~5.5               5.3~5.5
[These data is a little difference with the earlier version, that is
 because my old test machine is destroyed so I have to use a new
 different test machine.]

Almost no impact on the sched performance.

No functional change.

[lkp@intel.com: reported build failure in earlier version]

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20210905143547.4668-3-laoar.shao@gmail.com
---
 include/linux/sched.h    |  6 ++--
 kernel/sched/core.c      | 25 +++++++------
 kernel/sched/deadline.c  |  4 +--
 kernel/sched/debug.c     | 92 +++++++++++++++++++++++++-----------------------
 kernel/sched/fair.c      | 89 +++++++++++++++++++++++++++-------------------
 kernel/sched/rt.c        |  4 +--
 kernel/sched/stats.h     | 19 ++++++++++
 kernel/sched/stop_task.c |  4 +--
 8 files changed, 143 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a927ddec64..2bc4c72fec2d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -522,7 +522,7 @@ struct sched_statistics {
 	u64				nr_wakeups_passive;
 	u64				nr_wakeups_idle;
 #endif
-};
+} ____cacheline_aligned;
 
 struct sched_entity {
 	/* For load-balancing: */
@@ -538,8 +538,6 @@ struct sched_entity {
 
 	u64				nr_migrations;
 
-	struct sched_statistics		statistics;
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	int				depth;
 	struct sched_entity		*parent;
@@ -803,6 +801,8 @@ struct task_struct {
 	struct uclamp_se		uclamp[UCLAMP_CNT];
 #endif
 
+	struct sched_statistics         stats;
+
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	/* List of struct preempt_notifier: */
 	struct hlist_head		preempt_notifiers;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ccb604a1bd22..a1741e004eaa 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3489,11 +3489,11 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 #ifdef CONFIG_SMP
 	if (cpu == rq->cpu) {
 		__schedstat_inc(rq->ttwu_local);
-		__schedstat_inc(p->se.statistics.nr_wakeups_local);
+		__schedstat_inc(p->stats.nr_wakeups_local);
 	} else {
 		struct sched_domain *sd;
 
-		__schedstat_inc(p->se.statistics.nr_wakeups_remote);
+		__schedstat_inc(p->stats.nr_wakeups_remote);
 		rcu_read_lock();
 		for_each_domain(rq->cpu, sd) {
 			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
@@ -3505,14 +3505,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
 	}
 
 	if (wake_flags & WF_MIGRATED)
-		__schedstat_inc(p->se.statistics.nr_wakeups_migrate);
+		__schedstat_inc(p->stats.nr_wakeups_migrate);
 #endif /* CONFIG_SMP */
 
 	__schedstat_inc(rq->ttwu_count);
-	__schedstat_inc(p->se.statistics.nr_wakeups);
+	__schedstat_inc(p->stats.nr_wakeups);
 
 	if (wake_flags & WF_SYNC)
-		__schedstat_inc(p->se.statistics.nr_wakeups_sync);
+		__schedstat_inc(p->stats.nr_wakeups_sync);
 }
 
 /*
@@ -4196,7 +4196,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 #ifdef CONFIG_SCHEDSTATS
 	/* Even if schedstat is disabled, there should not be garbage */
-	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
+	memset(&p->stats, 0, sizeof(p->stats));
 #endif
 
 	RB_CLEAR_NODE(&p->dl.rb_node);
@@ -9553,9 +9553,9 @@ void normalize_rt_tasks(void)
 			continue;
 
 		p->se.exec_start = 0;
-		schedstat_set(p->se.statistics.wait_start,  0);
-		schedstat_set(p->se.statistics.sleep_start, 0);
-		schedstat_set(p->se.statistics.block_start, 0);
+		schedstat_set(p->stats.wait_start,  0);
+		schedstat_set(p->stats.sleep_start, 0);
+		schedstat_set(p->stats.block_start, 0);
 
 		if (!dl_task(p) && !rt_task(p)) {
 			/*
@@ -10397,11 +10397,14 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
 	seq_printf(sf, "throttled_time %llu\n", cfs_b->throttled_time);
 
 	if (schedstat_enabled() && tg != &root_task_group) {
+		struct sched_statistics *stats;
 		u64 ws = 0;
 		int i;
 
-		for_each_possible_cpu(i)
-			ws += schedstat_val(tg->se[i]->statistics.wait_sum);
+		for_each_possible_cpu(i) {
+			stats = __schedstats_from_se(tg->se[i]);
+			ws += schedstat_val(stats->wait_sum);
+		}
 
 		seq_printf(sf, "wait_sum %llu\n", ws);
 	}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index e94314633b39..51dd30990042 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1265,8 +1265,8 @@ static void update_curr_dl(struct rq *rq)
 		return;
 	}
 
-	schedstat_set(curr->se.statistics.exec_max,
-		      max(curr->se.statistics.exec_max, delta_exec));
+	schedstat_set(curr->stats.exec_max,
+		      max(curr->stats.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 34913a7f775a..76fd38ea844c 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -449,9 +449,11 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	struct sched_entity *se = tg->se[cpu];
 
 #define P(F)		SEQ_printf(m, "  .%-30s: %lld\n",	#F, (long long)F)
-#define P_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld\n",	#F, (long long)schedstat_val(F))
+#define P_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld\n",	\
+		#F, (long long)schedstat_val(stats->F))
 #define PN(F)		SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
-#define PN_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
+#define PN_SCHEDSTAT(F)	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", \
+		#F, SPLIT_NS((long long)schedstat_val(stats->F)))
 
 	if (!se)
 		return;
@@ -461,16 +463,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
 	PN(se->sum_exec_runtime);
 
 	if (schedstat_enabled()) {
-		PN_SCHEDSTAT(se->statistics.wait_start);
-		PN_SCHEDSTAT(se->statistics.sleep_start);
-		PN_SCHEDSTAT(se->statistics.block_start);
-		PN_SCHEDSTAT(se->statistics.sleep_max);
-		PN_SCHEDSTAT(se->statistics.block_max);
-		PN_SCHEDSTAT(se->statistics.exec_max);
-		PN_SCHEDSTAT(se->statistics.slice_max);
-		PN_SCHEDSTAT(se->statistics.wait_max);
-		PN_SCHEDSTAT(se->statistics.wait_sum);
-		P_SCHEDSTAT(se->statistics.wait_count);
+               struct sched_statistics *stats =  __schedstats_from_se(se);
+
+		PN_SCHEDSTAT(wait_start);
+		PN_SCHEDSTAT(sleep_start);
+		PN_SCHEDSTAT(block_start);
+		PN_SCHEDSTAT(sleep_max);
+		PN_SCHEDSTAT(block_max);
+		PN_SCHEDSTAT(exec_max);
+		PN_SCHEDSTAT(slice_max);
+		PN_SCHEDSTAT(wait_max);
+		PN_SCHEDSTAT(wait_sum);
+		P_SCHEDSTAT(wait_count);
 	}
 
 	P(se->load.weight);
@@ -537,9 +541,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		p->prio);
 
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
-		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.wait_sum)),
+		SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val_or_zero(p->se.statistics.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -958,8 +962,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 		"---------------------------------------------------------"
 		"----------\n");
 
-#define P_SCHEDSTAT(F)  __PS(#F, schedstat_val(p->F))
-#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
+#define P_SCHEDSTAT(F)  __PS(#F, schedstat_val(p->stats.F))
+#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F))
 
 	PN(se.exec_start);
 	PN(se.vruntime);
@@ -972,33 +976,33 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 	if (schedstat_enabled()) {
 		u64 avg_atom, avg_per_cpu;
 
-		PN_SCHEDSTAT(se.statistics.sum_sleep_runtime);
-		PN_SCHEDSTAT(se.statistics.wait_start);
-		PN_SCHEDSTAT(se.statistics.sleep_start);
-		PN_SCHEDSTAT(se.statistics.block_start);
-		PN_SCHEDSTAT(se.statistics.sleep_max);
-		PN_SCHEDSTAT(se.statistics.block_max);
-		PN_SCHEDSTAT(se.statistics.exec_max);
-		PN_SCHEDSTAT(se.statistics.slice_max);
-		PN_SCHEDSTAT(se.statistics.wait_max);
-		PN_SCHEDSTAT(se.statistics.wait_sum);
-		P_SCHEDSTAT(se.statistics.wait_count);
-		PN_SCHEDSTAT(se.statistics.iowait_sum);
-		P_SCHEDSTAT(se.statistics.iowait_count);
-		P_SCHEDSTAT(se.statistics.nr_migrations_cold);
-		P_SCHEDSTAT(se.statistics.nr_failed_migrations_affine);
-		P_SCHEDSTAT(se.statistics.nr_failed_migrations_running);
-		P_SCHEDSTAT(se.statistics.nr_failed_migrations_hot);
-		P_SCHEDSTAT(se.statistics.nr_forced_migrations);
-		P_SCHEDSTAT(se.statistics.nr_wakeups);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_sync);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_migrate);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_local);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_remote);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_affine);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_passive);
-		P_SCHEDSTAT(se.statistics.nr_wakeups_idle);
+		PN_SCHEDSTAT(sum_sleep_runtime);
+		PN_SCHEDSTAT(wait_start);
+		PN_SCHEDSTAT(sleep_start);
+		PN_SCHEDSTAT(block_start);
+		PN_SCHEDSTAT(sleep_max);
+		PN_SCHEDSTAT(block_max);
+		PN_SCHEDSTAT(exec_max);
+		PN_SCHEDSTAT(slice_max);
+		PN_SCHEDSTAT(wait_max);
+		PN_SCHEDSTAT(wait_sum);
+		P_SCHEDSTAT(wait_count);
+		PN_SCHEDSTAT(iowait_sum);
+		P_SCHEDSTAT(iowait_count);
+		P_SCHEDSTAT(nr_migrations_cold);
+		P_SCHEDSTAT(nr_failed_migrations_affine);
+		P_SCHEDSTAT(nr_failed_migrations_running);
+		P_SCHEDSTAT(nr_failed_migrations_hot);
+		P_SCHEDSTAT(nr_forced_migrations);
+		P_SCHEDSTAT(nr_wakeups);
+		P_SCHEDSTAT(nr_wakeups_sync);
+		P_SCHEDSTAT(nr_wakeups_migrate);
+		P_SCHEDSTAT(nr_wakeups_local);
+		P_SCHEDSTAT(nr_wakeups_remote);
+		P_SCHEDSTAT(nr_wakeups_affine);
+		P_SCHEDSTAT(nr_wakeups_affine_attempts);
+		P_SCHEDSTAT(nr_wakeups_passive);
+		P_SCHEDSTAT(nr_wakeups_idle);
 
 		avg_atom = p->se.sum_exec_runtime;
 		if (nr_switches)
@@ -1064,7 +1068,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
+	memset(&p->stats, 0, sizeof(p->stats));
 #endif
 }
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 61d3e3b97fe5..97c6ecb03bb2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -856,8 +856,13 @@ static void update_curr(struct cfs_rq *cfs_rq)
 
 	curr->exec_start = now;
 
-	schedstat_set(curr->statistics.exec_max,
-		      max(delta_exec, curr->statistics.exec_max));
+	if (schedstat_enabled()) {
+		struct sched_statistics *stats;
+
+		stats = __schedstats_from_se(curr);
+		__schedstat_set(stats->exec_max,
+				max(delta_exec, stats->exec_max));
+	}
 
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq->exec_clock, delta_exec);
@@ -885,39 +890,45 @@ static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	u64 wait_start, prev_wait_start;
+	struct sched_statistics *stats;
 
 	if (!schedstat_enabled())
 		return;
 
+	stats = __schedstats_from_se(se);
+
 	wait_start = rq_clock(rq_of(cfs_rq));
-	prev_wait_start = schedstat_val(se->statistics.wait_start);
+	prev_wait_start = schedstat_val(stats->wait_start);
 
 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
 	    likely(wait_start > prev_wait_start))
 		wait_start -= prev_wait_start;
 
-	__schedstat_set(se->statistics.wait_start, wait_start);
+	__schedstat_set(stats->wait_start, wait_start);
 }
 
 static inline void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	struct task_struct *p;
+	struct sched_statistics *stats;
+	struct task_struct *p = NULL;
 	u64 delta;
 
 	if (!schedstat_enabled())
 		return;
 
+	stats = __schedstats_from_se(se);
+
 	/*
 	 * When the sched_schedstat changes from 0 to 1, some sched se
 	 * maybe already in the runqueue, the se->statistics.wait_start
 	 * will be 0.So it will let the delta wrong. We need to avoid this
 	 * scenario.
 	 */
-	if (unlikely(!schedstat_val(se->statistics.wait_start)))
+	if (unlikely(!schedstat_val(stats->wait_start)))
 		return;
 
-	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
+	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(stats->wait_start);
 
 	if (entity_is_task(se)) {
 		p = task_of(se);
@@ -927,30 +938,33 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			 * time stamp can be adjusted to accumulate wait time
 			 * prior to migration.
 			 */
-			__schedstat_set(se->statistics.wait_start, delta);
+			__schedstat_set(stats->wait_start, delta);
 			return;
 		}
 		trace_sched_stat_wait(p, delta);
 	}
 
-	__schedstat_set(se->statistics.wait_max,
-		      max(schedstat_val(se->statistics.wait_max), delta));
-	__schedstat_inc(se->statistics.wait_count);
-	__schedstat_add(se->statistics.wait_sum, delta);
-	__schedstat_set(se->statistics.wait_start, 0);
+	__schedstat_set(stats->wait_max,
+		      max(schedstat_val(stats->wait_max), delta));
+	__schedstat_inc(stats->wait_count);
+	__schedstat_add(stats->wait_sum, delta);
+	__schedstat_set(stats->wait_start, 0);
 }
 
 static inline void
 update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	struct sched_statistics *stats;
 	struct task_struct *tsk = NULL;
 	u64 sleep_start, block_start;
 
 	if (!schedstat_enabled())
 		return;
 
-	sleep_start = schedstat_val(se->statistics.sleep_start);
-	block_start = schedstat_val(se->statistics.block_start);
+	stats = __schedstats_from_se(se);
+
+	sleep_start = schedstat_val(stats->sleep_start);
+	block_start = schedstat_val(stats->block_start);
 
 	if (entity_is_task(se))
 		tsk = task_of(se);
@@ -961,11 +975,11 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
-			__schedstat_set(se->statistics.sleep_max, delta);
+		if (unlikely(delta > schedstat_val(stats->sleep_max)))
+			__schedstat_set(stats->sleep_max, delta);
 
-		__schedstat_set(se->statistics.sleep_start, 0);
-		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
+		__schedstat_set(stats->sleep_start, 0);
+		__schedstat_add(stats->sum_sleep_runtime, delta);
 
 		if (tsk) {
 			account_scheduler_latency(tsk, delta >> 10, 1);
@@ -978,16 +992,16 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
-			__schedstat_set(se->statistics.block_max, delta);
+		if (unlikely(delta > schedstat_val(stats->block_max)))
+			__schedstat_set(stats->block_max, delta);
 
-		__schedstat_set(se->statistics.block_start, 0);
-		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
+		__schedstat_set(stats->block_start, 0);
+		__schedstat_add(stats->sum_sleep_runtime, delta);
 
 		if (tsk) {
 			if (tsk->in_iowait) {
-				__schedstat_add(se->statistics.iowait_sum, delta);
-				__schedstat_inc(se->statistics.iowait_count);
+				__schedstat_add(stats->iowait_sum, delta);
+				__schedstat_inc(stats->iowait_count);
 				trace_sched_stat_iowait(tsk, delta);
 			}
 
@@ -1049,10 +1063,10 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		/* XXX racy against TTWU */
 		state = READ_ONCE(tsk->__state);
 		if (state & TASK_INTERRUPTIBLE)
-			__schedstat_set(se->statistics.sleep_start,
+			__schedstat_set(tsk->stats.sleep_start,
 				      rq_clock(rq_of(cfs_rq)));
 		if (state & TASK_UNINTERRUPTIBLE)
-			__schedstat_set(se->statistics.block_start,
+			__schedstat_set(tsk->stats.block_start,
 				      rq_clock(rq_of(cfs_rq)));
 	}
 }
@@ -4530,8 +4544,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 */
 	if (schedstat_enabled() &&
 	    rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
-		__schedstat_set(se->statistics.slice_max,
-				max((u64)se->statistics.slice_max,
+		struct sched_statistics *stats;
+
+		stats = __schedstats_from_se(se);
+		__schedstat_set(stats->slice_max,
+				max((u64)stats->slice_max,
 				    se->sum_exec_runtime - se->prev_sum_exec_runtime));
 	}
 
@@ -6046,12 +6063,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
 	if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
 		target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
 
-	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
+	schedstat_inc(p->stats.nr_wakeups_affine_attempts);
 	if (target == nr_cpumask_bits)
 		return prev_cpu;
 
 	schedstat_inc(sd->ttwu_move_affine);
-	schedstat_inc(p->se.statistics.nr_wakeups_affine);
+	schedstat_inc(p->stats.nr_wakeups_affine);
 	return target;
 }
 
@@ -7855,7 +7872,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
 		int cpu;
 
-		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
+		schedstat_inc(p->stats.nr_failed_migrations_affine);
 
 		env->flags |= LBF_SOME_PINNED;
 
@@ -7889,7 +7906,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	env->flags &= ~LBF_ALL_PINNED;
 
 	if (task_running(env->src_rq, p)) {
-		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
+		schedstat_inc(p->stats.nr_failed_migrations_running);
 		return 0;
 	}
 
@@ -7911,12 +7928,12 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
 		if (tsk_cache_hot == 1) {
 			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
-			schedstat_inc(p->se.statistics.nr_forced_migrations);
+			schedstat_inc(p->stats.nr_forced_migrations);
 		}
 		return 1;
 	}
 
-	schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
+	schedstat_inc(p->stats.nr_failed_migrations_hot);
 	return 0;
 }
 
@@ -11457,7 +11474,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 		if (!cfs_rq)
 			goto err;
 
-		se = kzalloc_node(sizeof(struct sched_entity),
+		se = kzalloc_node(sizeof(struct sched_entity_stats),
 				  GFP_KERNEL, cpu_to_node(i));
 		if (!se)
 			goto err_free_rq;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 3daf42a0f462..95a7c3ad2dc3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1009,8 +1009,8 @@ static void update_curr_rt(struct rq *rq)
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 
-	schedstat_set(curr->se.statistics.exec_max,
-		      max(curr->se.statistics.exec_max, delta_exec));
+	schedstat_set(curr->stats.exec_max,
+		      max(curr->stats.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index d8f8eb0c655b..fb6022e860af 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -41,6 +41,7 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
 #define   schedstat_val_or_zero(var)	((schedstat_enabled()) ? (var) : 0)
 
 #else /* !CONFIG_SCHEDSTATS: */
+
 static inline void rq_sched_info_arrive  (struct rq *rq, unsigned long long delta) { }
 static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { }
 static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delta) { }
@@ -53,8 +54,26 @@ static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delt
 # define   schedstat_set(var, val)	do { } while (0)
 # define   schedstat_val(var)		0
 # define   schedstat_val_or_zero(var)	0
+
 #endif /* CONFIG_SCHEDSTATS */
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+struct sched_entity_stats {
+	struct sched_entity     se;
+	struct sched_statistics stats;
+} __no_randomize_layout;
+#endif
+
+static inline struct sched_statistics *
+__schedstats_from_se(struct sched_entity *se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (!entity_is_task(se))
+		return &container_of(se, struct sched_entity_stats, se)->stats;
+#endif
+	return &task_of(se)->stats;
+}
+
 #ifdef CONFIG_PSI
 /*
  * PSI tracks state that persists across sleeps, such as iowaits and
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c
index f988ebe3febb..0b165a25f22f 100644
--- a/kernel/sched/stop_task.c
+++ b/kernel/sched/stop_task.c
@@ -78,8 +78,8 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
 	if (unlikely((s64)delta_exec < 0))
 		delta_exec = 0;
 
-	schedstat_set(curr->se.statistics.exec_max,
-			max(curr->se.statistics.exec_max, delta_exec));
+	schedstat_set(curr->stats.exec_max,
+		      max(curr->stats.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
-- 
cgit v1.2.3


From 847fc0cd0664fcb2a08ac66df6b85935361ec454 Mon Sep 17 00:00:00 2001
From: Yafang Shao <laoar.shao@gmail.com>
Date: Sun, 5 Sep 2021 14:35:43 +0000
Subject: sched: Introduce task block time in schedstats

Currently in schedstats we have sum_sleep_runtime and iowait_sum, but
there's no metric to show how long the task is in D state.  Once a task in
D state, it means the task is blocked in the kernel, for example the
task may be waiting for a mutex. The D state is more frequent than
iowait, and it is more critital than S state. So it is worth to add a
metric to measure it.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210905143547.4668-5-laoar.shao@gmail.com
---
 include/linux/sched.h | 2 ++
 kernel/sched/debug.c  | 6 ++++--
 kernel/sched/stats.c  | 1 +
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2bc4c72fec2d..193e16e2d0e4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -503,6 +503,8 @@ struct sched_statistics {
 
 	u64				block_start;
 	u64				block_max;
+	s64				sum_block_runtime;
+
 	u64				exec_max;
 	u64				slice_max;
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 76fd38ea844c..26fac5e28bc0 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -540,10 +540,11 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 
-	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
+	SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld %9lld.%06ld",
 		SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)));
+		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)),
+		SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime)));
 
 #ifdef CONFIG_NUMA_BALANCING
 	SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
@@ -977,6 +978,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
 		u64 avg_atom, avg_per_cpu;
 
 		PN_SCHEDSTAT(sum_sleep_runtime);
+		PN_SCHEDSTAT(sum_block_runtime);
 		PN_SCHEDSTAT(wait_start);
 		PN_SCHEDSTAT(sleep_start);
 		PN_SCHEDSTAT(block_start);
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c
index fad781ca7791..07dde2928c79 100644
--- a/kernel/sched/stats.c
+++ b/kernel/sched/stats.c
@@ -82,6 +82,7 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
 
 		__schedstat_set(stats->block_start, 0);
 		__schedstat_add(stats->sum_sleep_runtime, delta);
+		__schedstat_add(stats->sum_block_runtime, delta);
 
 		if (p) {
 			if (p->in_iowait) {
-- 
cgit v1.2.3


From 8d491de6edc27138806cae6e8eca455beb325b62 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2021 14:24:32 +0200
Subject: sched: Move mmdrop to RCU on RT

mmdrop() is invoked from finish_task_switch() by the incoming task to drop
the mm which was handed over by the previous task. mmdrop() can be quite
expensive which prevents an incoming real-time task from getting useful
work done.

Provide mmdrop_sched() which maps to mmdrop() on !RT kernels. On RT kernels
it delagates the eventually required invocation of __mmdrop() to RCU.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210928122411.648582026@linutronix.de
---
 include/linux/mm_types.h |  4 ++++
 include/linux/sched/mm.h | 29 +++++++++++++++++++++++++++++
 kernel/sched/core.c      |  2 +-
 3 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..e9672de22cf2 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
 #include <linux/completion.h>
 #include <linux/cpumask.h>
 #include <linux/uprobes.h>
+#include <linux/rcupdate.h>
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
 #include <linux/seqlock.h>
@@ -572,6 +573,9 @@ struct mm_struct {
 		bool tlb_flush_batched;
 #endif
 		struct uprobes_state uprobes_state;
+#ifdef CONFIG_PREEMPT_RT
+		struct rcu_head delayed_drop;
+#endif
 #ifdef CONFIG_HUGETLB_PAGE
 		atomic_long_t hugetlb_usage;
 #endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 5561486fddef..aca874d33fe6 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,6 +49,35 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
+#ifdef CONFIG_PREEMPT_RT
+/*
+ * RCU callback for delayed mm drop. Not strictly RCU, but call_rcu() is
+ * by far the least expensive way to do that.
+ */
+static inline void __mmdrop_delayed(struct rcu_head *rhp)
+{
+	struct mm_struct *mm = container_of(rhp, struct mm_struct, delayed_drop);
+
+	__mmdrop(mm);
+}
+
+/*
+ * Invoked from finish_task_switch(). Delegates the heavy lifting on RT
+ * kernels via RCU.
+ */
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+	/* Provides a full memory barrier. See mmdrop() */
+	if (atomic_dec_and_test(&mm->mm_count))
+		call_rcu(&mm->delayed_drop, __mmdrop_delayed);
+}
+#else
+static inline void mmdrop_sched(struct mm_struct *mm)
+{
+	mmdrop(mm);
+}
+#endif
+
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 95f4e16f98a2..9eaeba671001 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4836,7 +4836,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 	 */
 	if (mm) {
 		membarrier_mm_sync_core_before_usermode(mm);
-		mmdrop(mm);
+		mmdrop_sched(mm);
 	}
 	if (unlikely(prev_state == TASK_DEAD)) {
 		if (prev->sched_class->task_dead)
-- 
cgit v1.2.3


From c8ed99533dbc0fcc1142671ec80acb33045d2999 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:23 +0100
Subject: PM: EM: Mark inefficient states

Some SoCs, such as the sd855 have OPPs within the same performance domain,
whose cost is higher than others with a higher frequency. Even though
those OPPs are interesting from a cooling perspective, it makes no sense
to use them when the device can run at full capacity. Those OPPs handicap
the performance domain, when choosing the most energy-efficient CPU and
are wasting energy. They are inefficient.

Hence, add support for such OPPs to the Energy Model. The table can now
be read skipping inefficient performance states (and by extension,
inefficient OPPs).

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/energy_model.h | 12 ++++++++++++
 kernel/power/energy_model.c  |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 39dcadd492b5..3641ca4acf04 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -17,13 +17,25 @@
  *		device). It can be a total power: static and dynamic.
  * @cost:	The cost coefficient associated with this level, used during
  *		energy calculation. Equal to: power * max_frequency / frequency
+ * @flags:	see "em_perf_state flags" description below.
  */
 struct em_perf_state {
 	unsigned long frequency;
 	unsigned long power;
 	unsigned long cost;
+	unsigned long flags;
 };
 
+/*
+ * em_perf_state flags:
+ *
+ * EM_PERF_STATE_INEFFICIENT: The performance state is inefficient. There is
+ * in this em_perf_domain, another performance state with a higher frequency
+ * but a lower or equal power cost. Such inefficient states are ignored when
+ * using em_pd_get_efficient_*() functions.
+ */
+#define EM_PERF_STATE_INEFFICIENT BIT(0)
+
 /**
  * struct em_perf_domain - Performance domain
  * @table:		List of performance states, in ascending order
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 97e62469a6b3..6d8438347535 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -2,7 +2,7 @@
 /*
  * Energy Model of devices
  *
- * Copyright (c) 2018-2020, Arm ltd.
+ * Copyright (c) 2018-2021, Arm ltd.
  * Written by: Quentin Perret, Arm ltd.
  * Improvements provided by: Lukasz Luba, Arm ltd.
  */
@@ -42,6 +42,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
 	debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
 	debugfs_create_ulong("power", 0444, d, &ps->power);
 	debugfs_create_ulong("cost", 0444, d, &ps->cost);
+	debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
 }
 
 static int em_debug_cpus_show(struct seq_file *s, void *unused)
@@ -162,6 +163,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
 		table[i].cost = div64_u64(fmax * power_res,
 					  table[i].frequency);
 		if (table[i].cost >= prev_cost) {
+			table[i].flags = EM_PERF_STATE_INEFFICIENT;
 			dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
 				table[i].frequency);
 		} else {
-- 
cgit v1.2.3


From 88f7a89560f6d0fc7803a8933637488f14e0a098 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:24 +0100
Subject: PM: EM: Extend em_perf_domain with a flag field

Merge the current "milliwatts" option into a "flag" field. This intends to
prepare the extension of this structure for inefficient states support in
the Energy Model.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/energy_model.h | 13 ++++++++++---
 kernel/power/energy_model.c  |  6 ++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 3641ca4acf04..671440371a95 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -40,8 +40,7 @@ struct em_perf_state {
  * struct em_perf_domain - Performance domain
  * @table:		List of performance states, in ascending order
  * @nr_perf_states:	Number of performance states
- * @milliwatts:		Flag indicating the power values are in milli-Watts
- *			or some other scale.
+ * @flags:		See "em_perf_domain flags"
  * @cpus:		Cpumask covering the CPUs of the domain. It's here
  *			for performance reasons to avoid potential cache
  *			misses during energy calculations in the scheduler
@@ -56,10 +55,18 @@ struct em_perf_state {
 struct em_perf_domain {
 	struct em_perf_state *table;
 	int nr_perf_states;
-	int milliwatts;
+	unsigned long flags;
 	unsigned long cpus[];
 };
 
+/*
+ *  em_perf_domain flags:
+ *
+ *  EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some
+ *  other scale.
+ */
+#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
+
 #define em_span_cpus(em) (to_cpumask((em)->cpus))
 
 #ifdef CONFIG_ENERGY_MODEL
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 6d8438347535..3a7d1573b214 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -56,7 +56,8 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
 static int em_debug_units_show(struct seq_file *s, void *unused)
 {
 	struct em_perf_domain *pd = s->private;
-	char *units = pd->milliwatts ? "milliWatts" : "bogoWatts";
+	char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
+		"milliWatts" : "bogoWatts";
 
 	seq_printf(s, "%s\n", units);
 
@@ -330,7 +331,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 	if (ret)
 		goto unlock;
 
-	dev->em_pd->milliwatts = milliwatts;
+	if (milliwatts)
+		dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
 
 	em_debug_create_pd(dev);
 	dev_info(dev, "EM: created perf domain\n");
-- 
cgit v1.2.3


From 8354eb9eb3ddb4a8d0857648a470beffcc9d8639 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:25 +0100
Subject: PM: EM: Allow skipping inefficient states

The new performance domain flag EM_PERF_DOMAIN_SKIP_INEFFICIENCIES allows
to not take into account inefficient states when estimating energy
consumption. This intends to let the Energy Model know that CPUFreq itself
will skip inefficiencies and such states don't need to be part of the
estimation anymore.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/energy_model.h | 43 +++++++++++++++++++++++++++++++++++++------
 kernel/power/energy_model.c  | 13 +++++++++++++
 2 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 671440371a95..6377adc3b78d 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -64,8 +64,12 @@ struct em_perf_domain {
  *
  *  EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some
  *  other scale.
+ *
+ *  EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
+ *  energy consumption.
  */
 #define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
+#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
 
 #define em_span_cpus(em) (to_cpumask((em)->cpus))
 
@@ -120,6 +124,37 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 				bool milliwatts);
 void em_dev_unregister_perf_domain(struct device *dev);
 
+/**
+ * em_pd_get_efficient_state() - Get an efficient performance state from the EM
+ * @pd   : Performance domain for which we want an efficient frequency
+ * @freq : Frequency to map with the EM
+ *
+ * It is called from the scheduler code quite frequently and as a consequence
+ * doesn't implement any check.
+ *
+ * Return: An efficient performance state, high enough to meet @freq
+ * requirement.
+ */
+static inline
+struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd,
+						unsigned long freq)
+{
+	struct em_perf_state *ps;
+	int i;
+
+	for (i = 0; i < pd->nr_perf_states; i++) {
+		ps = &pd->table[i];
+		if (ps->frequency >= freq) {
+			if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
+			    ps->flags & EM_PERF_STATE_INEFFICIENT)
+				continue;
+			break;
+		}
+	}
+
+	return ps;
+}
+
 /**
  * em_cpu_energy() - Estimates the energy consumed by the CPUs of a
  *		performance domain
@@ -142,7 +177,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 {
 	unsigned long freq, scale_cpu;
 	struct em_perf_state *ps;
-	int i, cpu;
+	int cpu;
 
 	if (!sum_util)
 		return 0;
@@ -167,11 +202,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	 * Find the lowest performance state of the Energy Model above the
 	 * requested frequency.
 	 */
-	for (i = 0; i < pd->nr_perf_states; i++) {
-		ps = &pd->table[i];
-		if (ps->frequency >= freq)
-			break;
-	}
+	ps = em_pd_get_efficient_state(pd, freq);
 
 	/*
 	 * The capacity of a CPU in the domain at the performance state (ps)
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 3a7d1573b214..d353ef29e37f 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -65,6 +65,17 @@ static int em_debug_units_show(struct seq_file *s, void *unused)
 }
 DEFINE_SHOW_ATTRIBUTE(em_debug_units);
 
+static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
+{
+	struct em_perf_domain *pd = s->private;
+	int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
+
+	seq_printf(s, "%d\n", enabled);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
+
 static void em_debug_create_pd(struct device *dev)
 {
 	struct dentry *d;
@@ -78,6 +89,8 @@ static void em_debug_create_pd(struct device *dev)
 				    &em_debug_cpus_fops);
 
 	debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
+	debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
+			    &em_debug_skip_inefficiencies_fops);
 
 	/* Create a sub-directory for each performance state */
 	for (i = 0; i < dev->em_pd->nr_perf_states; i++)
-- 
cgit v1.2.3


From 442d24a5c49a8ed1008dcb9c06dc463d12421e7f Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:27 +0100
Subject: cpufreq: Add an interface to mark inefficient frequencies

Some SoCs such as the sd855 have OPPs within the same policy whose cost is
higher than others with a higher frequency. Those OPPs are inefficients
and it might be interesting for a governor to not use them.

cpufreq_table_set_inefficient() allows the caller to identify a specified
frequency as being inefficient. Inefficient frequencies are only supported
on sorted tables.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ff88bb3e44fc..16997e1ff3fe 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -660,10 +660,11 @@ struct governor_attr {
  *********************************************************************/
 
 /* Special Values of .frequency field */
-#define CPUFREQ_ENTRY_INVALID	~0u
-#define CPUFREQ_TABLE_END	~1u
+#define CPUFREQ_ENTRY_INVALID		~0u
+#define CPUFREQ_TABLE_END		~1u
 /* Special Values of .flags field */
-#define CPUFREQ_BOOST_FREQ	(1 << 0)
+#define CPUFREQ_BOOST_FREQ		(1 << 0)
+#define CPUFREQ_INEFFICIENT_FREQ	(1 << 1)
 
 struct cpufreq_frequency_table {
 	unsigned int	flags;
@@ -1003,6 +1004,36 @@ static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy
 	return count;
 }
 
+/**
+ * cpufreq_table_set_inefficient() - Mark a frequency as inefficient
+ * @policy:	the &struct cpufreq_policy containing the inefficient frequency
+ * @frequency:	the inefficient frequency
+ *
+ * The &struct cpufreq_policy must use a sorted frequency table
+ *
+ * Return:	%0 on success or a negative errno code
+ */
+
+static inline int
+cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+			      unsigned int frequency)
+{
+	struct cpufreq_frequency_table *pos;
+
+	/* Not supported */
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)
+		return -EINVAL;
+
+	cpufreq_for_each_valid_entry(pos, policy->freq_table) {
+		if (pos->frequency == frequency) {
+			pos->flags |= CPUFREQ_INEFFICIENT_FREQ;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
 static inline int parse_perf_domain(int cpu, const char *list_name,
 				    const char *cell_name)
 {
@@ -1071,6 +1102,13 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
 	return false;
 }
 
+static inline int
+cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+			      unsigned int frequency)
+{
+	return -EINVAL;
+}
+
 static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
 						     const char *cell_name, struct cpumask *cpumask)
 {
-- 
cgit v1.2.3


From 1f39fa0dccff71d4788089b5e617229b19166867 Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:28 +0100
Subject: cpufreq: Introducing CPUFREQ_RELATION_E

This newly introduced flag can be applied by a governor to a CPUFreq
relation, when looking for a frequency within the policy table. The
resolution would then only walk through efficient frequencies.

Even with the flag set, the policy max limit will still be honoured. If no
efficient frequencies can be found within the limits of the policy, an
inefficient one would be returned.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c         |   3 +-
 drivers/cpufreq/amd_freq_sensitivity.c |   3 +-
 drivers/cpufreq/cpufreq.c              |  10 ++-
 drivers/cpufreq/cpufreq_ondemand.c     |   6 +-
 drivers/cpufreq/powernv-cpufreq.c      |   4 +-
 drivers/cpufreq/s5pv210-cpufreq.c      |   2 +-
 include/linux/cpufreq.h                | 111 +++++++++++++++++++++++++--------
 7 files changed, 106 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 28467d83c745..3d514b82d055 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -470,7 +470,8 @@ static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
 	if (policy->cached_target_freq == target_freq)
 		index = policy->cached_resolved_idx;
 	else
-		index = cpufreq_table_find_index_dl(policy, target_freq);
+		index = cpufreq_table_find_index_dl(policy, target_freq,
+						    false);
 
 	entry = &policy->freq_table[index];
 	next_freq = entry->frequency;
diff --git a/drivers/cpufreq/amd_freq_sensitivity.c b/drivers/cpufreq/amd_freq_sensitivity.c
index d0b10baf039a..6448e03bcf48 100644
--- a/drivers/cpufreq/amd_freq_sensitivity.c
+++ b/drivers/cpufreq/amd_freq_sensitivity.c
@@ -91,7 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
 			unsigned int index;
 
 			index = cpufreq_table_find_index_h(policy,
-							   policy->cur - 1);
+							   policy->cur - 1,
+							   relation & CPUFREQ_RELATION_E);
 			freq_next = policy->freq_table[index].frequency;
 		}
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 284e940084c6..8069a7bac9ef 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2260,8 +2260,16 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 	    !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS))
 		return 0;
 
-	if (cpufreq_driver->target)
+	if (cpufreq_driver->target) {
+		/*
+		 * If the driver hasn't setup a single inefficient frequency,
+		 * it's unlikely it knows how to decode CPUFREQ_RELATION_E.
+		 */
+		if (!policy->efficiencies_available)
+			relation &= ~CPUFREQ_RELATION_E;
+
 		return cpufreq_driver->target(policy, target_freq, relation);
+	}
 
 	if (!cpufreq_driver->target_index)
 		return -EINVAL;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index eb4320b619c9..f68cad9abd11 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -83,9 +83,11 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
 	freq_avg = freq_req - freq_reduc;
 
 	/* Find freq bounds for freq_avg in freq_table */
-	index = cpufreq_table_find_index_h(policy, freq_avg);
+	index = cpufreq_table_find_index_h(policy, freq_avg,
+					   relation & CPUFREQ_RELATION_E);
 	freq_lo = freq_table[index].frequency;
-	index = cpufreq_table_find_index_l(policy, freq_avg);
+	index = cpufreq_table_find_index_l(policy, freq_avg,
+					   relation & CPUFREQ_RELATION_E);
 	freq_hi = freq_table[index].frequency;
 
 	/* Find out how long we have to be in hi and lo freqs */
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
index 5a2cf5f91ccb..fddbd1ea1635 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -934,7 +934,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
 		policy = cpufreq_cpu_get(cpu);
 		if (!policy)
 			continue;
-		index = cpufreq_table_find_index_c(policy, policy->cur);
+		index = cpufreq_table_find_index_c(policy, policy->cur, false);
 		powernv_cpufreq_target_index(policy, index);
 		cpumask_andnot(&mask, &mask, policy->cpus);
 		cpufreq_cpu_put(policy);
@@ -1022,7 +1022,7 @@ static unsigned int powernv_fast_switch(struct cpufreq_policy *policy,
 	int index;
 	struct powernv_smp_call_data freq_data;
 
-	index = cpufreq_table_find_index_dl(policy, target_freq);
+	index = cpufreq_table_find_index_dl(policy, target_freq, false);
 	freq_data.pstate_id = powernv_freqs[index].driver_data;
 	freq_data.gpstate_id = powernv_freqs[index].driver_data;
 	set_pstate(&freq_data);
diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index ad7d4f272ddc..76c888ed8d16 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -243,7 +243,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
 	new_freq = s5pv210_freq_table[index].frequency;
 
 	/* Finding current running level index */
-	priv_index = cpufreq_table_find_index_h(policy, old_freq);
+	priv_index = cpufreq_table_find_index_h(policy, old_freq, false);
 
 	arm_volt = dvs_conf[index].arm_volt;
 	int_volt = dvs_conf[index].int_volt;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 16997e1ff3fe..6d96bd452d55 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -118,6 +118,13 @@ struct cpufreq_policy {
 	 */
 	bool			strict_target;
 
+	/*
+	 * Set if inefficient frequencies were found in the frequency table.
+	 * This indicates if the relation flag CPUFREQ_RELATION_E can be
+	 * honored.
+	 */
+	bool			efficiencies_available;
+
 	/*
 	 * Preferred average time interval between consecutive invocations of
 	 * the driver to set the frequency for this policy.  To be set by the
@@ -273,6 +280,8 @@ static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy
 #define CPUFREQ_RELATION_L 0  /* lowest frequency at or above target */
 #define CPUFREQ_RELATION_H 1  /* highest frequency below or at target */
 #define CPUFREQ_RELATION_C 2  /* closest frequency to target */
+/* relation flags */
+#define CPUFREQ_RELATION_E BIT(2) /* Get if possible an efficient frequency */
 
 struct freq_attr {
 	struct attribute attr;
@@ -741,6 +750,22 @@ static inline void dev_pm_opp_free_cpufreq_table(struct device *dev,
 			continue;					\
 		else
 
+/**
+ * cpufreq_for_each_efficient_entry_idx - iterate with index over a cpufreq
+ *	frequency_table excluding CPUFREQ_ENTRY_INVALID and
+ *	CPUFREQ_INEFFICIENT_FREQ frequencies.
+ * @pos: the &struct cpufreq_frequency_table to use as a loop cursor.
+ * @table: the &struct cpufreq_frequency_table to iterate over.
+ * @idx: the table entry currently being processed.
+ * @efficiencies: set to true to only iterate over efficient frequencies.
+ */
+
+#define cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies)	\
+	cpufreq_for_each_valid_entry_idx(pos, table, idx)			\
+		if (efficiencies && (pos->flags & CPUFREQ_INEFFICIENT_FREQ))	\
+			continue;						\
+		else
+
 
 int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 				    struct cpufreq_frequency_table *table);
@@ -765,14 +790,15 @@ bool policy_has_boost_freq(struct cpufreq_policy *policy);
 
 /* Find lowest freq at or above target in a table in ascending order */
 static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq >= target_freq)
@@ -786,14 +812,15 @@ static inline int cpufreq_table_find_index_al(struct cpufreq_policy *policy,
 
 /* Find lowest freq at or above target in a table in descending order */
 static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq == target_freq)
@@ -816,26 +843,30 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy,
 
 /* Works only on sorted freq-tables */
 static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
-					     unsigned int target_freq)
+					     unsigned int target_freq,
+					     bool efficiencies)
 {
 	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
 	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
-		return cpufreq_table_find_index_al(policy, target_freq);
+		return cpufreq_table_find_index_al(policy, target_freq,
+						   efficiencies);
 	else
-		return cpufreq_table_find_index_dl(policy, target_freq);
+		return cpufreq_table_find_index_dl(policy, target_freq,
+						   efficiencies);
 }
 
 /* Find highest freq at or below target in a table in ascending order */
 static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq == target_freq)
@@ -858,14 +889,15 @@ static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
 
 /* Find highest freq at or below target in a table in descending order */
 static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq <= target_freq)
@@ -879,26 +911,30 @@ static inline int cpufreq_table_find_index_dh(struct cpufreq_policy *policy,
 
 /* Works only on sorted freq-tables */
 static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
-					     unsigned int target_freq)
+					     unsigned int target_freq,
+					     bool efficiencies)
 {
 	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
 	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
-		return cpufreq_table_find_index_ah(policy, target_freq);
+		return cpufreq_table_find_index_ah(policy, target_freq,
+						   efficiencies);
 	else
-		return cpufreq_table_find_index_dh(policy, target_freq);
+		return cpufreq_table_find_index_dh(policy, target_freq,
+						   efficiencies);
 }
 
 /* Find closest freq to target in a table in ascending order */
 static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq == target_freq)
@@ -925,14 +961,15 @@ static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
 
 /* Find closest freq to target in a table in descending order */
 static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
-					      unsigned int target_freq)
+					      unsigned int target_freq,
+					      bool efficiencies)
 {
 	struct cpufreq_frequency_table *table = policy->freq_table;
 	struct cpufreq_frequency_table *pos;
 	unsigned int freq;
 	int idx, best = -1;
 
-	cpufreq_for_each_valid_entry_idx(pos, table, idx) {
+	cpufreq_for_each_efficient_entry_idx(pos, table, idx, efficiencies) {
 		freq = pos->frequency;
 
 		if (freq == target_freq)
@@ -959,35 +996,58 @@ static inline int cpufreq_table_find_index_dc(struct cpufreq_policy *policy,
 
 /* Works only on sorted freq-tables */
 static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
-					     unsigned int target_freq)
+					     unsigned int target_freq,
+					     bool efficiencies)
 {
 	target_freq = clamp_val(target_freq, policy->min, policy->max);
 
 	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
-		return cpufreq_table_find_index_ac(policy, target_freq);
+		return cpufreq_table_find_index_ac(policy, target_freq,
+						   efficiencies);
 	else
-		return cpufreq_table_find_index_dc(policy, target_freq);
+		return cpufreq_table_find_index_dc(policy, target_freq,
+						   efficiencies);
 }
 
 static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 						 unsigned int target_freq,
 						 unsigned int relation)
 {
+	bool efficiencies = policy->efficiencies_available &&
+			    (relation & CPUFREQ_RELATION_E);
+	int idx;
+
+	/* cpufreq_table_index_unsorted() has no use for this flag anyway */
+	relation &= ~CPUFREQ_RELATION_E;
+
 	if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
 		return cpufreq_table_index_unsorted(policy, target_freq,
 						    relation);
-
+retry:
 	switch (relation) {
 	case CPUFREQ_RELATION_L:
-		return cpufreq_table_find_index_l(policy, target_freq);
+		idx = cpufreq_table_find_index_l(policy, target_freq,
+						 efficiencies);
+		break;
 	case CPUFREQ_RELATION_H:
-		return cpufreq_table_find_index_h(policy, target_freq);
+		idx = cpufreq_table_find_index_h(policy, target_freq,
+						 efficiencies);
+		break;
 	case CPUFREQ_RELATION_C:
-		return cpufreq_table_find_index_c(policy, target_freq);
+		idx = cpufreq_table_find_index_c(policy, target_freq,
+						 efficiencies);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
 	}
+
+	if (idx < 0 && efficiencies) {
+		efficiencies = false;
+		goto retry;
+	}
+
+	return idx;
 }
 
 static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
@@ -1027,6 +1087,7 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
 	cpufreq_for_each_valid_entry(pos, policy->freq_table) {
 		if (pos->frequency == frequency) {
 			pos->flags |= CPUFREQ_INEFFICIENT_FREQ;
+			policy->efficiencies_available = true;
 			return 0;
 		}
 	}
-- 
cgit v1.2.3


From b894d20e6867f04827c7817fbc155460ff108f6f Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Wed, 8 Sep 2021 15:05:29 +0100
Subject: cpufreq: Use CPUFREQ_RELATION_E in DVFS governors

Let the governors schedutil, conservative and ondemand to work, if possible
on efficient frequencies only.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c              |  2 +-
 drivers/cpufreq/cpufreq_conservative.c |  6 ++++--
 drivers/cpufreq/cpufreq_ondemand.c     | 10 +++++-----
 include/linux/cpufreq.h                | 10 ++++++++--
 4 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8069a7bac9ef..e338d2f010fe 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -554,7 +554,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
 unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
 					 unsigned int target_freq)
 {
-	return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_L);
+	return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
 }
 EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index aa39ff31ec9f..0879ec3c170c 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -111,7 +111,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 		if (requested_freq > policy->max)
 			requested_freq = policy->max;
 
-		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(policy, requested_freq,
+					CPUFREQ_RELATION_HE);
 		dbs_info->requested_freq = requested_freq;
 		goto out;
 	}
@@ -134,7 +135,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 		else
 			requested_freq = policy->min;
 
-		__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L);
+		__cpufreq_driver_target(policy, requested_freq,
+					CPUFREQ_RELATION_LE);
 		dbs_info->requested_freq = requested_freq;
 	}
 
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index f68cad9abd11..3b8f924771b4 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -120,12 +120,12 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
 
 	if (od_tuners->powersave_bias)
 		freq = od_ops.powersave_bias_target(policy, freq,
-				CPUFREQ_RELATION_H);
+						    CPUFREQ_RELATION_HE);
 	else if (policy->cur == policy->max)
 		return;
 
 	__cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ?
-			CPUFREQ_RELATION_L : CPUFREQ_RELATION_H);
+			CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE);
 }
 
 /*
@@ -163,9 +163,9 @@ static void od_update(struct cpufreq_policy *policy)
 		if (od_tuners->powersave_bias)
 			freq_next = od_ops.powersave_bias_target(policy,
 								 freq_next,
-								 CPUFREQ_RELATION_L);
+								 CPUFREQ_RELATION_LE);
 
-		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C);
+		__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_CE);
 	}
 }
 
@@ -184,7 +184,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
 	 */
 	if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) {
 		__cpufreq_driver_target(policy, dbs_info->freq_lo,
-					CPUFREQ_RELATION_H);
+					CPUFREQ_RELATION_HE);
 		return dbs_info->freq_lo_delay_us;
 	}
 
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 6d96bd452d55..7ce71c7371fb 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -283,6 +283,10 @@ static inline void cpufreq_stats_record_transition(struct cpufreq_policy *policy
 /* relation flags */
 #define CPUFREQ_RELATION_E BIT(2) /* Get if possible an efficient frequency */
 
+#define CPUFREQ_RELATION_LE (CPUFREQ_RELATION_L | CPUFREQ_RELATION_E)
+#define CPUFREQ_RELATION_HE (CPUFREQ_RELATION_H | CPUFREQ_RELATION_E)
+#define CPUFREQ_RELATION_CE (CPUFREQ_RELATION_C | CPUFREQ_RELATION_E)
+
 struct freq_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct cpufreq_policy *, char *);
@@ -636,9 +640,11 @@ struct cpufreq_governor *cpufreq_fallback_governor(void);
 static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy)
 {
 	if (policy->max < policy->cur)
-		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+		__cpufreq_driver_target(policy, policy->max,
+					CPUFREQ_RELATION_HE);
 	else if (policy->min > policy->cur)
-		__cpufreq_driver_target(policy, policy->min, CPUFREQ_RELATION_L);
+		__cpufreq_driver_target(policy, policy->min,
+					CPUFREQ_RELATION_LE);
 }
 
 /* Governor attribute set */
-- 
cgit v1.2.3


From 4ad81f6ef89b62434f1d2ed26e9bec9d0e3d9dfe Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 4 Oct 2021 22:06:41 +0200
Subject: clk: tegra: Add stubs needed for compile testing

These stubs are needed to allow the tegra-cpuidle driver to be
compile-tested.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/clk/tegra.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h
index d128ad1570aa..3650e926e93f 100644
--- a/include/linux/clk/tegra.h
+++ b/include/linux/clk/tegra.h
@@ -42,6 +42,7 @@ struct tegra_cpu_car_ops {
 #endif
 };
 
+#ifdef CONFIG_ARCH_TEGRA
 extern struct tegra_cpu_car_ops *tegra_cpu_car_ops;
 
 static inline void tegra_wait_cpu_in_reset(u32 cpu)
@@ -83,8 +84,29 @@ static inline void tegra_disable_cpu_clock(u32 cpu)
 
 	tegra_cpu_car_ops->disable_clock(cpu);
 }
+#else
+static inline void tegra_wait_cpu_in_reset(u32 cpu)
+{
+}
 
-#ifdef CONFIG_PM_SLEEP
+static inline void tegra_put_cpu_in_reset(u32 cpu)
+{
+}
+
+static inline void tegra_cpu_out_of_reset(u32 cpu)
+{
+}
+
+static inline void tegra_enable_cpu_clock(u32 cpu)
+{
+}
+
+static inline void tegra_disable_cpu_clock(u32 cpu)
+{
+}
+#endif
+
+#if defined(CONFIG_ARCH_TEGRA) && defined(CONFIG_PM_SLEEP)
 static inline bool tegra_cpu_rail_off_ready(void)
 {
 	if (WARN_ON(!tegra_cpu_car_ops->rail_off_ready))
-- 
cgit v1.2.3


From 2357672c54c3f748f675446f8eba8b0432b1e7e2 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 2 Oct 2021 06:47:49 +0530
Subject: bpf: Introduce BPF support for kernel module function calls

This change adds support on the kernel side to allow for BPF programs to
call kernel module functions. Userspace will prepare an array of module
BTF fds that is passed in during BPF_PROG_LOAD using fd_array parameter.
In the kernel, the module BTFs are placed in the auxilliary struct for
bpf_prog, and loaded as needed.

The verifier then uses insn->off to index into the fd_array. insn->off
0 is reserved for vmlinux BTF (for backwards compat), so userspace must
use an fd_array index > 0 for module kfunc support. kfunc_btf_tab is
sorted based on offset in an array, and each offset corresponds to one
descriptor, with a max limit up to 256 such module BTFs.

We also change existing kfunc_tab to distinguish each element based on
imm, off pair as each such call will now be distinct.

Another change is to check_kfunc_call callback, which now include a
struct module * pointer, this is to be used in later patch such that the
kfunc_id and module pointer are matched for dynamically registered BTF
sets from loadable modules, so that same kfunc_id in two modules doesn't
lead to check_kfunc_call succeeding. For the duration of the
check_kfunc_call, the reference to struct module exists, as it returns
the pointer stored in kfunc_btf_tab.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211002011757.311265-2-memxor@gmail.com
---
 include/linux/bpf.h          |   8 +-
 include/linux/bpf_verifier.h |   2 +
 kernel/bpf/core.c            |   4 +
 kernel/bpf/verifier.c        | 202 +++++++++++++++++++++++++++++++++++++------
 net/bpf/test_run.c           |   2 +-
 net/ipv4/bpf_tcp_ca.c        |   2 +-
 6 files changed, 188 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1c7fd7c4c6d3..d604c8251d88 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -513,7 +513,7 @@ struct bpf_verifier_ops {
 				 const struct btf_type *t, int off, int size,
 				 enum bpf_access_type atype,
 				 u32 *next_btf_id);
-	bool (*check_kfunc_call)(u32 kfunc_btf_id);
+	bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner);
 };
 
 struct bpf_prog_offload_ops {
@@ -877,6 +877,7 @@ struct bpf_prog_aux {
 	void *jit_data; /* JIT specific data. arch dependent */
 	struct bpf_jit_poke_descriptor *poke_tab;
 	struct bpf_kfunc_desc_tab *kfunc_tab;
+	struct bpf_kfunc_btf_tab *kfunc_btf_tab;
 	u32 size_poke_tab;
 	struct bpf_ksym ksym;
 	const struct bpf_prog_ops *ops;
@@ -1639,7 +1640,7 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 				const union bpf_attr *kattr,
 				union bpf_attr __user *uattr);
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
@@ -1860,7 +1861,8 @@ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 	return -ENOTSUPP;
 }
 
-static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id,
+						  struct module *owner)
 {
 	return false;
 }
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 5424124dbe36..c8a78e830fca 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -527,5 +527,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
 			    const struct bpf_prog *tgt_prog,
 			    u32 btf_id,
 			    struct bpf_attach_target_info *tgt_info);
+void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab);
+
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index ea8a468dbded..b6c72af64d5d 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -32,6 +32,7 @@
 #include <linux/perf_event.h>
 #include <linux/extable.h>
 #include <linux/log2.h>
+#include <linux/bpf_verifier.h>
 
 #include <asm/barrier.h>
 #include <asm/unaligned.h>
@@ -2255,6 +2256,9 @@ static void bpf_prog_free_deferred(struct work_struct *work)
 	int i;
 
 	aux = container_of(work, struct bpf_prog_aux, work);
+#ifdef CONFIG_BPF_SYSCALL
+	bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab);
+#endif
 	bpf_free_used_maps(aux);
 	bpf_free_used_btfs(aux);
 	if (bpf_prog_is_dev_bound(aux))
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1433752db740..1d6d10265cab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1640,52 +1640,173 @@ static int add_subprog(struct bpf_verifier_env *env, int off)
 	return env->subprog_cnt - 1;
 }
 
+#define MAX_KFUNC_DESCS 256
+#define MAX_KFUNC_BTFS	256
+
 struct bpf_kfunc_desc {
 	struct btf_func_model func_model;
 	u32 func_id;
 	s32 imm;
+	u16 offset;
+};
+
+struct bpf_kfunc_btf {
+	struct btf *btf;
+	struct module *module;
+	u16 offset;
 };
 
-#define MAX_KFUNC_DESCS 256
 struct bpf_kfunc_desc_tab {
 	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
 	u32 nr_descs;
 };
 
-static int kfunc_desc_cmp_by_id(const void *a, const void *b)
+struct bpf_kfunc_btf_tab {
+	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
+	u32 nr_descs;
+};
+
+static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
 {
 	const struct bpf_kfunc_desc *d0 = a;
 	const struct bpf_kfunc_desc *d1 = b;
 
 	/* func_id is not greater than BTF_MAX_TYPE */
-	return d0->func_id - d1->func_id;
+	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
+}
+
+static int kfunc_btf_cmp_by_off(const void *a, const void *b)
+{
+	const struct bpf_kfunc_btf *d0 = a;
+	const struct bpf_kfunc_btf *d1 = b;
+
+	return d0->offset - d1->offset;
 }
 
 static const struct bpf_kfunc_desc *
-find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
+find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 {
 	struct bpf_kfunc_desc desc = {
 		.func_id = func_id,
+		.offset = offset,
 	};
 	struct bpf_kfunc_desc_tab *tab;
 
 	tab = prog->aux->kfunc_tab;
 	return bsearch(&desc, tab->descs, tab->nr_descs,
-		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
+		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
+}
+
+static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
+					 s16 offset, struct module **btf_modp)
+{
+	struct bpf_kfunc_btf kf_btf = { .offset = offset };
+	struct bpf_kfunc_btf_tab *tab;
+	struct bpf_kfunc_btf *b;
+	struct module *mod;
+	struct btf *btf;
+	int btf_fd;
+
+	tab = env->prog->aux->kfunc_btf_tab;
+	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
+		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
+	if (!b) {
+		if (tab->nr_descs == MAX_KFUNC_BTFS) {
+			verbose(env, "too many different module BTFs\n");
+			return ERR_PTR(-E2BIG);
+		}
+
+		if (bpfptr_is_null(env->fd_array)) {
+			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
+			return ERR_PTR(-EPROTO);
+		}
+
+		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
+					    offset * sizeof(btf_fd),
+					    sizeof(btf_fd)))
+			return ERR_PTR(-EFAULT);
+
+		btf = btf_get_by_fd(btf_fd);
+		if (IS_ERR(btf))
+			return btf;
+
+		if (!btf_is_module(btf)) {
+			verbose(env, "BTF fd for kfunc is not a module BTF\n");
+			btf_put(btf);
+			return ERR_PTR(-EINVAL);
+		}
+
+		mod = btf_try_get_module(btf);
+		if (!mod) {
+			btf_put(btf);
+			return ERR_PTR(-ENXIO);
+		}
+
+		b = &tab->descs[tab->nr_descs++];
+		b->btf = btf;
+		b->module = mod;
+		b->offset = offset;
+
+		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+		     kfunc_btf_cmp_by_off, NULL);
+	}
+	if (btf_modp)
+		*btf_modp = b->module;
+	return b->btf;
 }
 
-static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
+void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
+{
+	if (!tab)
+		return;
+
+	while (tab->nr_descs--) {
+		module_put(tab->descs[tab->nr_descs].module);
+		btf_put(tab->descs[tab->nr_descs].btf);
+	}
+	kfree(tab);
+}
+
+static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env,
+				       u32 func_id, s16 offset,
+				       struct module **btf_modp)
+{
+	struct btf *kfunc_btf;
+
+	if (offset) {
+		if (offset < 0) {
+			/* In the future, this can be allowed to increase limit
+			 * of fd index into fd_array, interpreted as u16.
+			 */
+			verbose(env, "negative offset disallowed for kernel module function call\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		kfunc_btf = __find_kfunc_desc_btf(env, offset, btf_modp);
+		if (IS_ERR_OR_NULL(kfunc_btf)) {
+			verbose(env, "cannot find module BTF for func_id %u\n", func_id);
+			return kfunc_btf ?: ERR_PTR(-ENOENT);
+		}
+		return kfunc_btf;
+	}
+	return btf_vmlinux ?: ERR_PTR(-ENOENT);
+}
+
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 {
 	const struct btf_type *func, *func_proto;
+	struct bpf_kfunc_btf_tab *btf_tab;
 	struct bpf_kfunc_desc_tab *tab;
 	struct bpf_prog_aux *prog_aux;
 	struct bpf_kfunc_desc *desc;
 	const char *func_name;
+	struct btf *desc_btf;
 	unsigned long addr;
 	int err;
 
 	prog_aux = env->prog->aux;
 	tab = prog_aux->kfunc_tab;
+	btf_tab = prog_aux->kfunc_btf_tab;
 	if (!tab) {
 		if (!btf_vmlinux) {
 			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
@@ -1713,7 +1834,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
 		prog_aux->kfunc_tab = tab;
 	}
 
-	if (find_kfunc_desc(env->prog, func_id))
+	if (!btf_tab && offset) {
+		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
+		if (!btf_tab)
+			return -ENOMEM;
+		prog_aux->kfunc_btf_tab = btf_tab;
+	}
+
+	desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL);
+	if (IS_ERR(desc_btf)) {
+		verbose(env, "failed to find BTF for kernel function\n");
+		return PTR_ERR(desc_btf);
+	}
+
+	if (find_kfunc_desc(env->prog, func_id, offset))
 		return 0;
 
 	if (tab->nr_descs == MAX_KFUNC_DESCS) {
@@ -1721,20 +1855,20 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
 		return -E2BIG;
 	}
 
-	func = btf_type_by_id(btf_vmlinux, func_id);
+	func = btf_type_by_id(desc_btf, func_id);
 	if (!func || !btf_type_is_func(func)) {
 		verbose(env, "kernel btf_id %u is not a function\n",
 			func_id);
 		return -EINVAL;
 	}
-	func_proto = btf_type_by_id(btf_vmlinux, func->type);
+	func_proto = btf_type_by_id(desc_btf, func->type);
 	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
 		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
 			func_id);
 		return -EINVAL;
 	}
 
-	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+	func_name = btf_name_by_offset(desc_btf, func->name_off);
 	addr = kallsyms_lookup_name(func_name);
 	if (!addr) {
 		verbose(env, "cannot find address for kernel function %s\n",
@@ -1745,12 +1879,13 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
 	desc = &tab->descs[tab->nr_descs++];
 	desc->func_id = func_id;
 	desc->imm = BPF_CALL_IMM(addr);
-	err = btf_distill_func_proto(&env->log, btf_vmlinux,
+	desc->offset = offset;
+	err = btf_distill_func_proto(&env->log, desc_btf,
 				     func_proto, func_name,
 				     &desc->func_model);
 	if (!err)
 		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
-		     kfunc_desc_cmp_by_id, NULL);
+		     kfunc_desc_cmp_by_id_off, NULL);
 	return err;
 }
 
@@ -1829,7 +1964,7 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 		} else if (bpf_pseudo_call(insn)) {
 			ret = add_subprog(env, i + insn->imm + 1);
 		} else {
-			ret = add_kfunc_call(env, insn->imm);
+			ret = add_kfunc_call(env, insn->imm, insn->off);
 		}
 
 		if (ret < 0)
@@ -2166,12 +2301,17 @@ static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 {
 	const struct btf_type *func;
+	struct btf *desc_btf;
 
 	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
 		return NULL;
 
-	func = btf_type_by_id(btf_vmlinux, insn->imm);
-	return btf_name_by_offset(btf_vmlinux, func->name_off);
+	desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL);
+	if (IS_ERR(desc_btf))
+		return "<error>";
+
+	func = btf_type_by_id(desc_btf, insn->imm);
+	return btf_name_by_offset(desc_btf, func->name_off);
 }
 
 /* For given verifier state backtrack_insn() is called from the last insn to
@@ -6530,23 +6670,29 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	struct bpf_reg_state *regs = cur_regs(env);
 	const char *func_name, *ptr_type_name;
 	u32 i, nargs, func_id, ptr_type_id;
+	struct module *btf_mod = NULL;
 	const struct btf_param *args;
+	struct btf *desc_btf;
 	int err;
 
+	desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod);
+	if (IS_ERR(desc_btf))
+		return PTR_ERR(desc_btf);
+
 	func_id = insn->imm;
-	func = btf_type_by_id(btf_vmlinux, func_id);
-	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
-	func_proto = btf_type_by_id(btf_vmlinux, func->type);
+	func = btf_type_by_id(desc_btf, func_id);
+	func_name = btf_name_by_offset(desc_btf, func->name_off);
+	func_proto = btf_type_by_id(desc_btf, func->type);
 
 	if (!env->ops->check_kfunc_call ||
-	    !env->ops->check_kfunc_call(func_id)) {
+	    !env->ops->check_kfunc_call(func_id, btf_mod)) {
 		verbose(env, "calling kernel function %s is not allowed\n",
 			func_name);
 		return -EACCES;
 	}
 
 	/* Check the arguments */
-	err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
+	err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
 	if (err)
 		return err;
 
@@ -6554,15 +6700,15 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		mark_reg_not_init(env, regs, caller_saved[i]);
 
 	/* Check return type */
-	t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
+	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
 	if (btf_type_is_scalar(t)) {
 		mark_reg_unknown(env, regs, BPF_REG_0);
 		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 	} else if (btf_type_is_ptr(t)) {
-		ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
+		ptr_type = btf_type_skip_modifiers(desc_btf, t->type,
 						   &ptr_type_id);
 		if (!btf_type_is_struct(ptr_type)) {
-			ptr_type_name = btf_name_by_offset(btf_vmlinux,
+			ptr_type_name = btf_name_by_offset(desc_btf,
 							   ptr_type->name_off);
 			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
 				func_name, btf_type_str(ptr_type),
@@ -6570,7 +6716,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 			return -EINVAL;
 		}
 		mark_reg_known_zero(env, regs, BPF_REG_0);
-		regs[BPF_REG_0].btf = btf_vmlinux;
+		regs[BPF_REG_0].btf = desc_btf;
 		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 		regs[BPF_REG_0].btf_id = ptr_type_id;
 		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
@@ -6581,7 +6727,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	for (i = 0; i < nargs; i++) {
 		u32 regno = i + 1;
 
-		t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
+		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
 		if (btf_type_is_ptr(t))
 			mark_btf_func_reg_size(env, regno, sizeof(void *));
 		else
@@ -11121,7 +11267,8 @@ static int do_check(struct bpf_verifier_env *env)
 			env->jmps_processed++;
 			if (opcode == BPF_CALL) {
 				if (BPF_SRC(insn->code) != BPF_K ||
-				    insn->off != 0 ||
+				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
+				     && insn->off != 0) ||
 				    (insn->src_reg != BPF_REG_0 &&
 				     insn->src_reg != BPF_PSEUDO_CALL &&
 				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
@@ -12477,6 +12624,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
 		func[i]->jit_requested = 1;
 		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
+		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
 		func[i]->aux->linfo = prog->aux->linfo;
 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
@@ -12665,7 +12813,7 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
 	/* insn->imm has the btf func_id. Replace it with
 	 * an address (relative to __bpf_base_call).
 	 */
-	desc = find_kfunc_desc(env->prog, insn->imm);
+	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
 	if (!desc) {
 		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
 			insn->imm);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 072f0c16c779..b1f6f5237de6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -241,7 +241,7 @@ BTF_ID(func, bpf_kfunc_call_test2)
 BTF_ID(func, bpf_kfunc_call_test3)
 BTF_SET_END(test_sk_kfunc_ids)
 
-bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
+bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
 {
 	return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
 }
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 0dcee9df1326..b3afd3361f34 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -255,7 +255,7 @@ BTF_ID(func, bbr_set_state)
 #endif	/* CONFIG_X86 */
 BTF_SET_END(bpf_tcp_ca_kfunc_ids)
 
-static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id)
+static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
 {
 	return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id);
 }
-- 
cgit v1.2.3


From 14f267d95fe4b08831a022c8e15a2eb8991edbf6 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 2 Oct 2021 06:47:51 +0530
Subject: bpf: btf: Introduce helpers for dynamic BTF set registration

This adds helpers for registering btf_id_set from modules and the
bpf_check_mod_kfunc_call callback that can be used to look them up.

With in kernel sets, the way this is supposed to work is, in kernel
callback looks up within the in-kernel kfunc whitelist, and then defers
to the dynamic BTF set lookup if it doesn't find the BTF id. If there is
no in-kernel BTF id set, this callback can be used directly.

Also fix includes for btf.h and bpfptr.h so that they can included in
isolation. This is in preparation for their usage in tcp_bbr, tcp_cubic
and tcp_dctcp modules in the next patch.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211002011757.311265-4-memxor@gmail.com
---
 include/linux/bpfptr.h |  1 +
 include/linux/btf.h    | 36 ++++++++++++++++++++++++++++++++++
 kernel/bpf/btf.c       | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpfptr.h b/include/linux/bpfptr.h
index 546e27fc6d46..46e1757d06a3 100644
--- a/include/linux/bpfptr.h
+++ b/include/linux/bpfptr.h
@@ -3,6 +3,7 @@
 #ifndef _LINUX_BPFPTR_H
 #define _LINUX_BPFPTR_H
 
+#include <linux/mm.h>
 #include <linux/sockptr.h>
 
 typedef sockptr_t bpfptr_t;
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 214fde93214b..6c4c61d821d7 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -5,6 +5,7 @@
 #define _LINUX_BTF_H 1
 
 #include <linux/types.h>
+#include <linux/bpfptr.h>
 #include <uapi/linux/btf.h>
 #include <uapi/linux/bpf.h>
 
@@ -238,4 +239,39 @@ static inline const char *btf_name_by_offset(const struct btf *btf,
 }
 #endif
 
+struct kfunc_btf_id_set {
+	struct list_head list;
+	struct btf_id_set *set;
+	struct module *owner;
+};
+
+struct kfunc_btf_id_list;
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+			       struct kfunc_btf_id_set *s);
+void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+				 struct kfunc_btf_id_set *s);
+bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
+			      struct module *owner);
+#else
+static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+					     struct kfunc_btf_id_set *s)
+{
+}
+static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+					       struct kfunc_btf_id_set *s)
+{
+}
+static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
+					    u32 kfunc_id, struct module *owner)
+{
+	return false;
+}
+#endif
+
+#define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
+	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
+					 THIS_MODULE }
+
 #endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c3d605b22473..62cbeb4951eb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6343,3 +6343,55 @@ const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
 };
 
 BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
+
+/* BTF ID set registration API for modules */
+
+struct kfunc_btf_id_list {
+	struct list_head list;
+	struct mutex mutex;
+};
+
+#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
+
+void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+			       struct kfunc_btf_id_set *s)
+{
+	mutex_lock(&l->mutex);
+	list_add(&s->list, &l->list);
+	mutex_unlock(&l->mutex);
+}
+EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set);
+
+void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
+				 struct kfunc_btf_id_set *s)
+{
+	mutex_lock(&l->mutex);
+	list_del_init(&s->list);
+	mutex_unlock(&l->mutex);
+}
+EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set);
+
+bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
+			      struct module *owner)
+{
+	struct kfunc_btf_id_set *s;
+
+	if (!owner)
+		return false;
+	mutex_lock(&klist->mutex);
+	list_for_each_entry(s, &klist->list, list) {
+		if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) {
+			mutex_unlock(&klist->mutex);
+			return true;
+		}
+	}
+	mutex_unlock(&klist->mutex);
+	return false;
+}
+
+#endif
+
+#define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
+	struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
+					  __MUTEX_INITIALIZER(name.mutex) };   \
+	EXPORT_SYMBOL_GPL(name)
-- 
cgit v1.2.3


From 0e32dfc80bae53b05e9eda7eaf259f30ab9ba43a Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 2 Oct 2021 06:47:53 +0530
Subject: bpf: Enable TCP congestion control kfunc from modules

This commit moves BTF ID lookup into the newly added registration
helper, in a way that the bbr, cubic, and dctcp implementation set up
their sets in the bpf_tcp_ca kfunc_btf_set list, while the ones not
dependent on modules are looked up from the wrapper function.

This lifts the restriction for them to be compiled as built in objects,
and can be loaded as modules if required. Also modify Makefile.modfinal
to call resolve_btfids for each module.

Note that since kernel kfunc_ids never overlap with module kfunc_ids, we
only match the owner for module btf id sets.

See following commits for background on use of:

 CONFIG_X86 ifdef:
 569c484f9995 (bpf: Limit static tcp-cc functions in the .BTF_ids list to x86)

 CONFIG_DYNAMIC_FTRACE ifdef:
 7aae231ac93b (bpf: tcp: Limit calling some tcp cc functions to CONFIG_DYNAMIC_FTRACE)

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211002011757.311265-6-memxor@gmail.com
---
 include/linux/btf.h       |  2 ++
 kernel/bpf/btf.c          |  2 ++
 net/ipv4/bpf_tcp_ca.c     | 34 +++-------------------------------
 net/ipv4/tcp_bbr.c        | 28 +++++++++++++++++++++++++++-
 net/ipv4/tcp_cubic.c      | 26 +++++++++++++++++++++++++-
 net/ipv4/tcp_dctcp.c      | 26 +++++++++++++++++++++++++-
 scripts/Makefile.modfinal |  1 +
 7 files changed, 85 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 6c4c61d821d7..1d56cd2bb362 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -274,4 +274,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
 	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
 					 THIS_MODULE }
 
+extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
+
 #endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 62cbeb4951eb..1460dff3c154 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6395,3 +6395,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 	struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
 					  __MUTEX_INITIALIZER(name.mutex) };   \
 	EXPORT_SYMBOL_GPL(name)
+
+DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index b3afd3361f34..57709ac09fb2 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -223,41 +223,13 @@ BTF_ID(func, tcp_reno_cong_avoid)
 BTF_ID(func, tcp_reno_undo_cwnd)
 BTF_ID(func, tcp_slow_start)
 BTF_ID(func, tcp_cong_avoid_ai)
-#ifdef CONFIG_X86
-#ifdef CONFIG_DYNAMIC_FTRACE
-#if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC)
-BTF_ID(func, cubictcp_init)
-BTF_ID(func, cubictcp_recalc_ssthresh)
-BTF_ID(func, cubictcp_cong_avoid)
-BTF_ID(func, cubictcp_state)
-BTF_ID(func, cubictcp_cwnd_event)
-BTF_ID(func, cubictcp_acked)
-#endif
-#if IS_BUILTIN(CONFIG_TCP_CONG_DCTCP)
-BTF_ID(func, dctcp_init)
-BTF_ID(func, dctcp_update_alpha)
-BTF_ID(func, dctcp_cwnd_event)
-BTF_ID(func, dctcp_ssthresh)
-BTF_ID(func, dctcp_cwnd_undo)
-BTF_ID(func, dctcp_state)
-#endif
-#if IS_BUILTIN(CONFIG_TCP_CONG_BBR)
-BTF_ID(func, bbr_init)
-BTF_ID(func, bbr_main)
-BTF_ID(func, bbr_sndbuf_expand)
-BTF_ID(func, bbr_undo_cwnd)
-BTF_ID(func, bbr_cwnd_event)
-BTF_ID(func, bbr_ssthresh)
-BTF_ID(func, bbr_min_tso_segs)
-BTF_ID(func, bbr_set_state)
-#endif
-#endif  /* CONFIG_DYNAMIC_FTRACE */
-#endif	/* CONFIG_X86 */
 BTF_SET_END(bpf_tcp_ca_kfunc_ids)
 
 static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner)
 {
-	return btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id);
+	if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id))
+		return true;
+	return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner);
 }
 
 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = {
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 6274462b86b4..ec5550089b4d 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -56,6 +56,8 @@
  * otherwise TCP stack falls back to an internal pacing using one high
  * resolution timer per TCP socket and may use more resources.
  */
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/module.h>
 #include <net/tcp.h>
 #include <linux/inet_diag.h>
@@ -1152,14 +1154,38 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = {
 	.set_state	= bbr_set_state,
 };
 
+BTF_SET_START(tcp_bbr_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, bbr_init)
+BTF_ID(func, bbr_main)
+BTF_ID(func, bbr_sndbuf_expand)
+BTF_ID(func, bbr_undo_cwnd)
+BTF_ID(func, bbr_cwnd_event)
+BTF_ID(func, bbr_ssthresh)
+BTF_ID(func, bbr_min_tso_segs)
+BTF_ID(func, bbr_set_state)
+#endif
+#endif
+BTF_SET_END(tcp_bbr_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set);
+
 static int __init bbr_register(void)
 {
+	int ret;
+
 	BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE);
-	return tcp_register_congestion_control(&tcp_bbr_cong_ops);
+	ret = tcp_register_congestion_control(&tcp_bbr_cong_ops);
+	if (ret)
+		return ret;
+	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
+	return 0;
 }
 
 static void __exit bbr_unregister(void)
 {
+	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set);
 	tcp_unregister_congestion_control(&tcp_bbr_cong_ops);
 }
 
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4a30deaa9a37..5e9d9c51164c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -25,6 +25,8 @@
  */
 
 #include <linux/mm.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/module.h>
 #include <linux/math64.h>
 #include <net/tcp.h>
@@ -482,8 +484,25 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
 	.name		= "cubic",
 };
 
+BTF_SET_START(tcp_cubic_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, cubictcp_init)
+BTF_ID(func, cubictcp_recalc_ssthresh)
+BTF_ID(func, cubictcp_cong_avoid)
+BTF_ID(func, cubictcp_state)
+BTF_ID(func, cubictcp_cwnd_event)
+BTF_ID(func, cubictcp_acked)
+#endif
+#endif
+BTF_SET_END(tcp_cubic_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set);
+
 static int __init cubictcp_register(void)
 {
+	int ret;
+
 	BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
 
 	/* Precompute a bunch of the scaling factors that are used per-packet
@@ -514,11 +533,16 @@ static int __init cubictcp_register(void)
 	/* divide by bic_scale and by constant Srtt (100ms) */
 	do_div(cube_factor, bic_scale * 10);
 
-	return tcp_register_congestion_control(&cubictcp);
+	ret = tcp_register_congestion_control(&cubictcp);
+	if (ret)
+		return ret;
+	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
+	return 0;
 }
 
 static void __exit cubictcp_unregister(void)
 {
+	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set);
 	tcp_unregister_congestion_control(&cubictcp);
 }
 
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 79f705450c16..0d7ab3cc7b61 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -36,6 +36,8 @@
  *	Glenn Judd <glenn.judd@morganstanley.com>
  */
 
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <net/tcp.h>
@@ -236,14 +238,36 @@ static struct tcp_congestion_ops dctcp_reno __read_mostly = {
 	.name		= "dctcp-reno",
 };
 
+BTF_SET_START(tcp_dctcp_kfunc_ids)
+#ifdef CONFIG_X86
+#ifdef CONFIG_DYNAMIC_FTRACE
+BTF_ID(func, dctcp_init)
+BTF_ID(func, dctcp_update_alpha)
+BTF_ID(func, dctcp_cwnd_event)
+BTF_ID(func, dctcp_ssthresh)
+BTF_ID(func, dctcp_cwnd_undo)
+BTF_ID(func, dctcp_state)
+#endif
+#endif
+BTF_SET_END(tcp_dctcp_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set);
+
 static int __init dctcp_register(void)
 {
+	int ret;
+
 	BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE);
-	return tcp_register_congestion_control(&dctcp);
+	ret = tcp_register_congestion_control(&dctcp);
+	if (ret)
+		return ret;
+	register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
+	return 0;
 }
 
 static void __exit dctcp_unregister(void)
 {
+	unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set);
 	tcp_unregister_congestion_control(&dctcp);
 }
 
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index ff805777431c..1fb45b011e4b 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -41,6 +41,7 @@ quiet_cmd_btf_ko = BTF [M] $@
       cmd_btf_ko = 							\
 	if [ -f vmlinux ]; then						\
 		LLVM_OBJCOPY="$(OBJCOPY)" $(PAHOLE) -J --btf_base vmlinux $@; \
+		$(RESOLVE_BTFIDS) -b vmlinux $@; 			\
 	else								\
 		printf "Skipping BTF generation for %s due to unavailability of vmlinux\n" $@ 1>&2; \
 	fi;
-- 
cgit v1.2.3


From c48e51c8b07aba8a18125221cb67a40cb1256bf2 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Sat, 2 Oct 2021 06:47:57 +0530
Subject: bpf: selftests: Add selftests for module kfunc support

This adds selftests that tests the success and failure path for modules
kfuncs (in presence of invalid kfunc calls) for both libbpf and
gen_loader. It also adds a prog_test kfunc_btf_id_list so that we can
add module BTF ID set from bpf_testmod.

This also introduces  a couple of test cases to verifier selftests for
validating whether we get an error or not depending on if invalid kfunc
call remains after elimination of unreachable instructions.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211002011757.311265-10-memxor@gmail.com
---
 include/linux/btf.h                                |  1 +
 kernel/bpf/btf.c                                   |  1 +
 net/bpf/test_run.c                                 |  5 ++-
 tools/testing/selftests/bpf/Makefile               |  7 ++--
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        | 23 ++++++++++-
 .../selftests/bpf/prog_tests/ksyms_module.c        | 29 +++++++-------
 .../selftests/bpf/prog_tests/ksyms_module_libbpf.c | 28 +++++++++++++
 .../selftests/bpf/progs/test_ksyms_module.c        | 46 ++++++++++++++++------
 tools/testing/selftests/bpf/verifier/calls.c       | 23 +++++++++++
 9 files changed, 132 insertions(+), 31 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 1d56cd2bb362..203eef993d76 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -275,5 +275,6 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
 					 THIS_MODULE }
 
 extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
+extern struct kfunc_btf_id_list prog_test_kfunc_list;
 
 #endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 1460dff3c154..2ebffb9f57eb 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6397,3 +6397,4 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 	EXPORT_SYMBOL_GPL(name)
 
 DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
+DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index b1f6f5237de6..529608784aa8 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2017 Facebook
  */
 #include <linux/bpf.h>
+#include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -243,7 +244,9 @@ BTF_SET_END(test_sk_kfunc_ids)
 
 bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner)
 {
-	return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
+	if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id))
+		return true;
+	return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner);
 }
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5a94d0900d1b..c5c9a9f50d8d 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -315,8 +315,9 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
-	test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c \
-	trace_vprintk.c
+	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c
+# Generate both light skeleton and libbpf skeleton for these
+LSKELS_EXTRA := test_ksyms_module.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
@@ -346,7 +347,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
 					       $$(TRUNNER_BPF_SRCS)))
-TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
 TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 50fc5561110a..b892948dc134 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/error-injection.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -13,6 +15,12 @@
 
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
 
+noinline void
+bpf_testmod_test_mod_kfunc(int i)
+{
+	*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
+}
+
 noinline int bpf_testmod_loop_test(int n)
 {
 	int i, sum = 0;
@@ -71,13 +79,26 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 	.write = bpf_testmod_test_write,
 };
 
+BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_ID(func, bpf_testmod_test_mod_kfunc)
+BTF_SET_END(bpf_testmod_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+
 static int bpf_testmod_init(void)
 {
-	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+	int ret;
+
+	ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+	if (ret)
+		return ret;
+	register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
+	return 0;
 }
 
 static void bpf_testmod_exit(void)
 {
+	unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
 	return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 2cd5cded543f..831447878d7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -2,30 +2,29 @@
 /* Copyright (c) 2021 Facebook */
 
 #include <test_progs.h>
-#include <bpf/libbpf.h>
-#include <bpf/btf.h>
+#include <network_helpers.h>
 #include "test_ksyms_module.lskel.h"
 
-static int duration;
-
 void test_ksyms_module(void)
 {
-	struct test_ksyms_module* skel;
+	struct test_ksyms_module *skel;
+	int retval;
 	int err;
 
-	skel = test_ksyms_module__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!env.has_testmod) {
+		test__skip();
 		return;
+	}
 
-	err = test_ksyms_module__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	skel = test_ksyms_module__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open_and_load"))
+		return;
+	err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
-
-	usleep(1);
-
-	ASSERT_EQ(skel->bss->triggered, true, "triggered");
-	ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val");
-
+	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c
new file mode 100644
index 000000000000..e6343ef63af9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module_libbpf.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_ksyms_module.skel.h"
+
+void test_ksyms_module_libbpf(void)
+{
+	struct test_ksyms_module *skel;
+	int retval, err;
+
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
+
+	skel = test_ksyms_module__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
+		return;
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4,
+				sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL);
+	if (!ASSERT_OK(err, "bpf_prog_test_run"))
+		goto cleanup;
+	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
+cleanup:
+	test_ksyms_module__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
index d6a0b3086b90..0650d918c096 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
@@ -2,24 +2,48 @@
 /* Copyright (c) 2021 Facebook */
 
 #include "vmlinux.h"
-
 #include <bpf/bpf_helpers.h>
 
+#define X_0(x)
+#define X_1(x) x X_0(x)
+#define X_2(x) x X_1(x)
+#define X_3(x) x X_2(x)
+#define X_4(x) x X_3(x)
+#define X_5(x) x X_4(x)
+#define X_6(x) x X_5(x)
+#define X_7(x) x X_6(x)
+#define X_8(x) x X_7(x)
+#define X_9(x) x X_8(x)
+#define X_10(x) x X_9(x)
+#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y)
+
 extern const int bpf_testmod_ksym_percpu __ksym;
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak;
 
-int out_mod_ksym_global = 0;
-bool triggered = false;
+int out_bpf_testmod_ksym = 0;
+const volatile int x = 0;
 
-SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+SEC("tc")
+int load(struct __sk_buff *skb)
 {
-	int *val;
-	__u32 cpu;
-
-	val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
-	out_mod_ksym_global = *val;
-	triggered = true;
+	/* This will be kept by clang, but removed by verifier. Since it is
+	 * marked as __weak, libbpf and gen_loader don't error out if BTF ID
+	 * is not found for it, instead imm and off is set to 0 for it.
+	 */
+	if (x)
+		bpf_testmod_invalid_mod_kfunc();
+	bpf_testmod_test_mod_kfunc(42);
+	out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+	return 0;
+}
 
+SEC("tc")
+int load_256(struct __sk_buff *skb)
+{
+	/* this will fail if kfunc doesn't reuse its own btf fd index */
+	REPEAT_256(bpf_testmod_test_mod_kfunc(42););
+	bpf_testmod_test_mod_kfunc(42);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 336a749673d1..d7b74eb28333 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1,3 +1,26 @@
+{
+	"calls: invalid kfunc call not eliminated",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result  = REJECT,
+	.errstr = "invalid kernel function call not eliminated in verifier pass",
+},
+{
+	"calls: invalid kfunc call unreachable",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result  = ACCEPT,
+},
 {
 	"calls: basic sanity",
 	.insns = {
-- 
cgit v1.2.3


From 4f627af8e6068892cafe031df6c14e8a0aaaa426 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 2 Sep 2021 16:10:11 -0500
Subject: ptrace: Remove the unnecessary arguments from arch_ptrace_stop

Both arch_ptrace_stop_needed and arch_ptrace_stop are called with an
exit_code and a siginfo structure.  Neither argument is used by any of
the implementations so just remove the unneeded arguments.

The two arechitectures that implement arch_ptrace_stop are ia64 and
sparc.  Both architectures flush their register stacks before a
ptrace_stack so that all of the register information can be accessed
by debuggers.

As the question of if a register stack needs to be flushed is
independent of why ptrace is stopping not needing arguments make sense.

Cc: David Miller <davem@davemloft.net>
Cc: sparclinux@vger.kernel.org
Link: https://lkml.kernel.org/r/87lf3mx290.fsf@disp2133
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/ia64/include/asm/ptrace.h  |  4 ++--
 arch/sparc/include/asm/ptrace.h |  8 ++++----
 include/linux/ptrace.h          | 22 +++++++++-------------
 kernel/signal.c                 |  4 ++--
 4 files changed, 17 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h
index 08179135905c..f15504f75f10 100644
--- a/arch/ia64/include/asm/ptrace.h
+++ b/arch/ia64/include/asm/ptrace.h
@@ -129,9 +129,9 @@ static inline long regs_return_value(struct pt_regs *regs)
   extern void ia64_decrement_ip (struct pt_regs *pt);
 
   extern void ia64_ptrace_stop(void);
-  #define arch_ptrace_stop(code, info) \
+  #define arch_ptrace_stop() \
 	ia64_ptrace_stop()
-  #define arch_ptrace_stop_needed(code, info) \
+  #define arch_ptrace_stop_needed() \
 	(!test_thread_flag(TIF_RESTORE_RSE))
 
   extern void ptrace_attach_sync_user_rbs (struct task_struct *);
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index 71dd82b43cc5..d1419e669027 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -26,12 +26,12 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
 	return (regs->tstate &= ~TSTATE_SYSCALL);
 }
 
-#define arch_ptrace_stop_needed(exit_code, info) \
+#define arch_ptrace_stop_needed() \
 ({	flush_user_windows(); \
 	get_thread_wsaved() != 0; \
 })
 
-#define arch_ptrace_stop(exit_code, info) \
+#define arch_ptrace_stop() \
 	synchronize_user_stack()
 
 #define current_pt_regs() \
@@ -129,12 +129,12 @@ static inline bool pt_regs_clear_syscall(struct pt_regs *regs)
 	return (regs->psr &= ~PSR_SYSCALL);
 }
 
-#define arch_ptrace_stop_needed(exit_code, info) \
+#define arch_ptrace_stop_needed() \
 ({	flush_user_windows(); \
 	current_thread_info()->w_saved != 0;	\
 })
 
-#define arch_ptrace_stop(exit_code, info) \
+#define arch_ptrace_stop() \
 	synchronize_user_stack()
 
 #define current_pt_regs() \
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index b5ebf6c01292..8aee2945ff08 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -362,29 +362,25 @@ static inline void user_single_step_report(struct pt_regs *regs)
 #ifndef arch_ptrace_stop_needed
 /**
  * arch_ptrace_stop_needed - Decide whether arch_ptrace_stop() should be called
- * @code:	current->exit_code value ptrace will stop with
- * @info:	siginfo_t pointer (or %NULL) for signal ptrace will stop with
  *
  * This is called with the siglock held, to decide whether or not it's
- * necessary to release the siglock and call arch_ptrace_stop() with the
- * same @code and @info arguments.  It can be defined to a constant if
- * arch_ptrace_stop() is never required, or always is.  On machines where
- * this makes sense, it should be defined to a quick test to optimize out
- * calling arch_ptrace_stop() when it would be superfluous.  For example,
- * if the thread has not been back to user mode since the last stop, the
- * thread state might indicate that nothing needs to be done.
+ * necessary to release the siglock and call arch_ptrace_stop().  It can be
+ * defined to a constant if arch_ptrace_stop() is never required, or always
+ * is.  On machines where this makes sense, it should be defined to a quick
+ * test to optimize out calling arch_ptrace_stop() when it would be
+ * superfluous.  For example, if the thread has not been back to user mode
+ * since the last stop, the thread state might indicate that nothing needs
+ * to be done.
  *
  * This is guaranteed to be invoked once before a task stops for ptrace and
  * may include arch-specific operations necessary prior to a ptrace stop.
  */
-#define arch_ptrace_stop_needed(code, info)	(0)
+#define arch_ptrace_stop_needed()	(0)
 #endif
 
 #ifndef arch_ptrace_stop
 /**
  * arch_ptrace_stop - Do machine-specific work before stopping for ptrace
- * @code:	current->exit_code value ptrace will stop with
- * @info:	siginfo_t pointer (or %NULL) for signal ptrace will stop with
  *
  * This is called with no locks held when arch_ptrace_stop_needed() has
  * just returned nonzero.  It is allowed to block, e.g. for user memory
@@ -394,7 +390,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
  * we only do it when the arch requires it for this particular stop, as
  * indicated by arch_ptrace_stop_needed().
  */
-#define arch_ptrace_stop(code, info)		do { } while (0)
+#define arch_ptrace_stop()		do { } while (0)
 #endif
 
 #ifndef current_pt_regs
diff --git a/kernel/signal.c b/kernel/signal.c
index 9f2dc9cf3208..c9759ff2cb43 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2200,7 +2200,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 {
 	bool gstop_done = false;
 
-	if (arch_ptrace_stop_needed(exit_code, info)) {
+	if (arch_ptrace_stop_needed()) {
 		/*
 		 * The arch code has something special to do before a
 		 * ptrace stop.  This is allowed to block, e.g. for faults
@@ -2210,7 +2210,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 		 * any signal bookkeeping like checking group_stop_count.
 		 */
 		spin_unlock_irq(&current->sighand->siglock);
-		arch_ptrace_stop(exit_code, info);
+		arch_ptrace_stop();
 		spin_lock_irq(&current->sighand->siglock);
 	}
 
-- 
cgit v1.2.3


From 92307383082daff5df884a25df9e283efb7ef261 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 1 Sep 2021 11:33:50 -0500
Subject: coredump:  Don't perform any cleanups before dumping core

Rename coredump_exit_mm to coredump_task_exit and call it from do_exit
before PTRACE_EVENT_EXIT, and before any cleanup work for a task
happens.  This ensures that an accurate copy of the process can be
captured in the coredump as no cleanup for the process happens before
the coredump completes.  This also ensures that PTRACE_EVENT_EXIT
will not be visited by any thread until the coredump is complete.

Add a new flag PF_POSTCOREDUMP so that tasks that have passed through
coredump_task_exit can be recognized and ignored in zap_process.

Now that all of the coredumping happens before exit_mm remove code to
test for a coredump in progress from mm_release.

Replace "may_ptrace_stop()" with a simple test of "current->ptrace".
The other tests in may_ptrace_stop all concern avoiding stopping
during a coredump.  These tests are no longer necessary as it is now
guaranteed that fatal_signal_pending will be set if the code enters
ptrace_stop during a coredump.  The code in ptrace_stop is guaranteed
not to stop if fatal_signal_pending returns true.

Until this change "ptrace_event(PTRACE_EVENT_EXIT)" could call
ptrace_stop without fatal_signal_pending being true, as signals are
dequeued in get_signal before calling do_exit.  This is no longer
an issue as "ptrace_event(PTRACE_EVENT_EXIT)" is no longer reached
until after the coredump completes.

Link: https://lkml.kernel.org/r/874kaax26c.fsf@disp2133
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/coredump.c         |  8 ++++----
 include/linux/sched.h |  1 +
 kernel/exit.c         | 19 ++++++++++++-------
 kernel/fork.c         |  3 +--
 kernel/signal.c       | 27 +--------------------------
 mm/oom_kill.c         |  2 +-
 6 files changed, 20 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/coredump.c b/fs/coredump.c
index 5e0e08a7fb9b..d576287fb88b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -359,7 +359,7 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
 
 	for_each_thread(start, t) {
 		task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
-		if (t != current && t->mm) {
+		if (t != current && !(t->flags & PF_POSTCOREDUMP)) {
 			sigaddset(&t->pending.signal, SIGKILL);
 			signal_wake_up(t, 1);
 			nr++;
@@ -404,8 +404,8 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
 	 *
 	 * do_exit:
 	 *	The caller holds mm->mmap_lock. This means that the task which
-	 *	uses this mm can't pass coredump_exit_mm(), so it can't exit or
-	 *	clear its ->mm.
+	 *	uses this mm can't pass coredump_task_exit(), so it can't exit
+	 *	or clear its ->mm.
 	 *
 	 * de_thread:
 	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
@@ -500,7 +500,7 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
 		next = curr->next;
 		task = curr->task;
 		/*
-		 * see coredump_exit_mm(), curr->task must not see
+		 * see coredump_task_exit(), curr->task must not see
 		 * ->task == NULL before we read ->next.
 		 */
 		smp_mb();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e12b524426b0..f3741f23935e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1664,6 +1664,7 @@ extern struct pid *cad_pid;
 #define PF_VCPU			0x00000001	/* I'm a virtual CPU */
 #define PF_IDLE			0x00000002	/* I am an IDLE thread */
 #define PF_EXITING		0x00000004	/* Getting shut down */
+#define PF_POSTCOREDUMP		0x00000008	/* Coredumps should ignore this task */
 #define PF_IO_WORKER		0x00000010	/* Task is an IO worker */
 #define PF_WQ_WORKER		0x00000020	/* I'm a workqueue worker */
 #define PF_FORKNOEXEC		0x00000040	/* Forked but didn't exec */
diff --git a/kernel/exit.c b/kernel/exit.c
index cb1619d8fd64..774e6b5061b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -339,23 +339,29 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 	}
 }
 
-static void coredump_exit_mm(struct mm_struct *mm)
+static void coredump_task_exit(struct task_struct *tsk)
 {
 	struct core_state *core_state;
+	struct mm_struct *mm;
+
+	mm = tsk->mm;
+	if (!mm)
+		return;
 
 	/*
 	 * Serialize with any possible pending coredump.
 	 * We must hold mmap_lock around checking core_state
-	 * and clearing tsk->mm.  The core-inducing thread
+	 * and setting PF_POSTCOREDUMP.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
-	 * group with ->mm != NULL.
+	 * group without PF_POSTCOREDUMP set.
 	 */
+	mmap_read_lock(mm);
+	tsk->flags |= PF_POSTCOREDUMP;
 	core_state = mm->core_state;
+	mmap_read_unlock(mm);
 	if (core_state) {
 		struct core_thread self;
 
-		mmap_read_unlock(mm);
-
 		self.task = current;
 		if (self.task->flags & PF_SIGNALED)
 			self.next = xchg(&core_state->dumper.next, &self);
@@ -375,7 +381,6 @@ static void coredump_exit_mm(struct mm_struct *mm)
 			freezable_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
-		mmap_read_lock(mm);
 	}
 }
 
@@ -480,7 +485,6 @@ static void exit_mm(void)
 		return;
 	sync_mm_rss(mm);
 	mmap_read_lock(mm);
-	coredump_exit_mm(mm);
 	mmgrab(mm);
 	BUG_ON(mm != current->active_mm);
 	/* more a memory barrier than a real lock */
@@ -768,6 +772,7 @@ void __noreturn do_exit(long code)
 	profile_task_exit(tsk);
 	kcov_task_exit(tsk);
 
+	coredump_task_exit(tsk);
 	ptrace_event(PTRACE_EVENT_EXIT, code);
 
 	validate_creds_for_do_exit(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..9bd9f2da9e41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1392,8 +1392,7 @@ static void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 	 * purposes.
 	 */
 	if (tsk->clear_child_tid) {
-		if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
-		    atomic_read(&mm->mm_users) > 1) {
+		if (atomic_read(&mm->mm_users) > 1) {
 			/*
 			 * We don't check the error code - if userspace has
 			 * not set up a proper pointer then tough luck.
diff --git a/kernel/signal.c b/kernel/signal.c
index c9759ff2cb43..b0db80acc6ef 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2158,31 +2158,6 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
 	spin_unlock_irqrestore(&sighand->siglock, flags);
 }
 
-static inline bool may_ptrace_stop(void)
-{
-	if (!likely(current->ptrace))
-		return false;
-	/*
-	 * Are we in the middle of do_coredump?
-	 * If so and our tracer is also part of the coredump stopping
-	 * is a deadlock situation, and pointless because our tracer
-	 * is dead so don't allow us to stop.
-	 * If SIGKILL was already sent before the caller unlocked
-	 * ->siglock we must see ->core_state != NULL. Otherwise it
-	 * is safe to enter schedule().
-	 *
-	 * This is almost outdated, a task with the pending SIGKILL can't
-	 * block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
-	 * after SIGKILL was already dequeued.
-	 */
-	if (unlikely(current->mm->core_state) &&
-	    unlikely(current->mm == current->parent->mm))
-		return false;
-
-	return true;
-}
-
-
 /*
  * This must be called with current->sighand->siglock held.
  *
@@ -2263,7 +2238,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
 
 	spin_unlock_irq(&current->sighand->siglock);
 	read_lock(&tasklist_lock);
-	if (may_ptrace_stop()) {
+	if (likely(current->ptrace)) {
 		/*
 		 * Notify parents of the stop.
 		 *
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 295c8bdfd6c8..7877c755ab37 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -788,7 +788,7 @@ static inline bool __task_will_free_mem(struct task_struct *task)
 
 	/*
 	 * A coredumping process may sleep for an extended period in
-	 * coredump_exit_mm(), so the oom killer cannot assume that
+	 * coredump_task_exit(), so the oom killer cannot assume that
 	 * the process will promptly exit and release memory.
 	 */
 	if (sig->flags & SIGNAL_GROUP_COREDUMP)
-- 
cgit v1.2.3


From 353407d917b2d87cd8104a0453d012439c6ca4be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 6 Oct 2021 13:46:42 +0300
Subject: ethtool: Add ability to control transceiver modules' power mode

Add a pair of new ethtool messages, 'ETHTOOL_MSG_MODULE_SET' and
'ETHTOOL_MSG_MODULE_GET', that can be used to control transceiver
modules parameters and retrieve their status.

The first parameter to control is the power mode of the module. It is
only relevant for paged memory modules, as flat memory modules always
operate in low power mode.

When a paged memory module is in low power mode, its power consumption
is reduced to the minimum, the management interface towards the host is
available and the data path is deactivated.

User space can choose to put modules that are not currently in use in
low power mode and transition them to high power mode before putting the
associated ports administratively up. This is useful for user space that
favors reduced power consumption and lower temperatures over reduced
link up times. In QSFP-DD modules the transition from low power mode to
high power mode can take a few seconds and this transition is only
expected to get longer with future / more complex modules.

User space can control the power mode of the module via the power mode
policy attribute ('ETHTOOL_A_MODULE_POWER_MODE_POLICY'). Possible
values:

* high: Module is always in high power mode.

* auto: Module is transitioned by the host to high power mode when the
  first port using it is put administratively up and to low power mode
  when the last port using it is put administratively down.

The operational power mode of the module is available to user space via
the 'ETHTOOL_A_MODULE_POWER_MODE' attribute. The attribute is not
reported to user space when a module is not plugged-in.

The user API is designed to be generic enough so that it could be used
for modules with different memory maps (e.g., SFF-8636, CMIS).

The only implementation of the device driver API in this series is for a
MAC driver (mlxsw) where the module is controlled by the device's
firmware, but it is designed to be generic enough so that it could also
be used by implementations where the module is controlled by the CPU.

CMIS testing
============

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x03 (ModuleReady)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : Off

The module is not in low power mode, as it is not forced by hardware
(LowPwrAllowRequestHW is off) or by software (LowPwrRequestSW is off).

The power mode can be queried from the kernel. In case
LowPwrAllowRequestHW was on, the kernel would need to take into account
the state of the LowPwrRequestHW signal, which is not visible to user
space.

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy high
 power-mode high

Change the power mode policy to 'auto':

 # ethtool --set-module swp11 power-mode-policy auto

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x01 (ModuleLowPwr)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : On

Put the associated port administratively up which will instruct the host
to transition the module to high power mode:

 # ip link set dev swp11 up

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode high

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x03 (ModuleReady)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : Off

Put the associated port administratively down which will instruct the
host to transition the module to low power mode:

 # ip link set dev swp11 down

Query the power mode again:

 $ ethtool --show-module swp11
 Module parameters for swp11:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp11
 Identifier                                : 0x18 (QSFP-DD Double Density 8X Pluggable Transceiver (INF-8628))
 ...
 Module State                              : 0x01 (ModuleLowPwr)
 LowPwrAllowRequestHW                      : Off
 LowPwrRequestSW                           : On

SFF-8636 testing
================

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) enabled
 Power set                                 : Off
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.7733 mW / -1.12 dBm
 Transmit avg optical power (Channel 2)    : 0.7649 mW / -1.16 dBm
 Transmit avg optical power (Channel 3)    : 0.7790 mW / -1.08 dBm
 Transmit avg optical power (Channel 4)    : 0.7837 mW / -1.06 dBm
 Rcvr signal avg optical power(Channel 1)  : 0.9302 mW / -0.31 dBm
 Rcvr signal avg optical power(Channel 2)  : 0.9079 mW / -0.42 dBm
 Rcvr signal avg optical power(Channel 3)  : 0.8993 mW / -0.46 dBm
 Rcvr signal avg optical power(Channel 4)  : 0.8778 mW / -0.57 dBm

The module is not in low power mode, as it is not forced by hardware
(Power override is on) or by software (Power set is off).

The power mode can be queried from the kernel. In case Power override
was off, the kernel would need to take into account the state of the
LPMode signal, which is not visible to user space.

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy high
 power-mode high

Change the power mode policy to 'auto':

 # ethtool --set-module swp13 power-mode-policy auto

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) not enabled
 Power set                                 : On
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 2)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 3)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 4)    : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 1)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 2)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 3)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 4)  : 0.0000 mW / -inf dBm

Put the associated port administratively up which will instruct the host
to transition the module to high power mode:

 # ip link set dev swp13 up

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode high

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) enabled
 Power set                                 : Off
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.7934 mW / -1.01 dBm
 Transmit avg optical power (Channel 2)    : 0.7859 mW / -1.05 dBm
 Transmit avg optical power (Channel 3)    : 0.7885 mW / -1.03 dBm
 Transmit avg optical power (Channel 4)    : 0.7985 mW / -0.98 dBm
 Rcvr signal avg optical power(Channel 1)  : 0.9325 mW / -0.30 dBm
 Rcvr signal avg optical power(Channel 2)  : 0.9034 mW / -0.44 dBm
 Rcvr signal avg optical power(Channel 3)  : 0.9086 mW / -0.42 dBm
 Rcvr signal avg optical power(Channel 4)  : 0.8885 mW / -0.51 dBm

Put the associated port administratively down which will instruct the
host to transition the module to low power mode:

 # ip link set dev swp13 down

Query the power mode again:

 $ ethtool --show-module swp13
 Module parameters for swp13:
 power-mode-policy auto
 power-mode low

Verify with the data read from the EEPROM:

 # ethtool -m swp13
 Identifier                                : 0x11 (QSFP28)
 ...
 Extended identifier description           : 5.0W max. Power consumption,  High Power Class (> 3.5 W) not enabled
 Power set                                 : On
 Power override                            : On
 ...
 Transmit avg optical power (Channel 1)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 2)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 3)    : 0.0000 mW / -inf dBm
 Transmit avg optical power (Channel 4)    : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 1)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 2)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 3)  : 0.0000 mW / -inf dBm
 Rcvr signal avg optical power(Channel 4)  : 0.0000 mW / -inf dBm

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst |  71 ++++++++++-
 include/linux/ethtool.h                      |  22 ++++
 include/uapi/linux/ethtool.h                 |  23 ++++
 include/uapi/linux/ethtool_netlink.h         |  17 +++
 net/ethtool/Makefile                         |   2 +-
 net/ethtool/module.c                         | 180 +++++++++++++++++++++++++++
 net/ethtool/netlink.c                        |  19 +++
 net/ethtool/netlink.h                        |   4 +
 8 files changed, 335 insertions(+), 3 deletions(-)
 create mode 100644 net/ethtool/module.c

(limited to 'include/linux')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index d9b55b7a1a4d..d6fd4b2e243c 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -41,6 +41,11 @@ In the message structure descriptions below, if an attribute name is suffixed
 with "+", parent nest can contain multiple attributes of the same type. This
 implements an array of entries.
 
+Attributes that need to be filled-in by device drivers and that are dumped to
+user space based on whether they are valid or not should not use zero as a
+valid value. This avoids the need to explicitly signal the validity of the
+attribute in the device driver API.
+
 
 Request header
 ==============
@@ -179,7 +184,7 @@ according to message purpose:
 
 Userspace to kernel:
 
-  ===================================== ================================
+  ===================================== =================================
   ``ETHTOOL_MSG_STRSET_GET``            get string set
   ``ETHTOOL_MSG_LINKINFO_GET``          get link settings
   ``ETHTOOL_MSG_LINKINFO_SET``          set link settings
@@ -213,7 +218,9 @@ Userspace to kernel:
   ``ETHTOOL_MSG_MODULE_EEPROM_GET``     read SFP module EEPROM
   ``ETHTOOL_MSG_STATS_GET``             get standard statistics
   ``ETHTOOL_MSG_PHC_VCLOCKS_GET``       get PHC virtual clocks info
-  ===================================== ================================
+  ``ETHTOOL_MSG_MODULE_SET``            set transceiver module parameters
+  ``ETHTOOL_MSG_MODULE_GET``            get transceiver module parameters
+  ===================================== =================================
 
 Kernel to userspace:
 
@@ -252,6 +259,7 @@ Kernel to userspace:
   ``ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY``  read SFP module EEPROM
   ``ETHTOOL_MSG_STATS_GET_REPLY``          standard statistics
   ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY``    PHC virtual clocks info
+  ``ETHTOOL_MSG_MODULE_GET_REPLY``         transceiver module parameters
   ======================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -1521,6 +1529,63 @@ Kernel response contents:
   ``ETHTOOL_A_PHC_VCLOCKS_INDEX``       s32     PHC index array
   ====================================  ======  ==========================
 
+MODULE_GET
+==========
+
+Gets transceiver module parameters.
+
+Request contents:
+
+  =====================================  ======  ==========================
+  ``ETHTOOL_A_MODULE_HEADER``            nested  request header
+  =====================================  ======  ==========================
+
+Kernel response contents:
+
+  ======================================  ======  ==========================
+  ``ETHTOOL_A_MODULE_HEADER``             nested  reply header
+  ``ETHTOOL_A_MODULE_POWER_MODE_POLICY``  u8      power mode policy
+  ``ETHTOOL_A_MODULE_POWER_MODE``         u8      operational power mode
+  ======================================  ======  ==========================
+
+The optional ``ETHTOOL_A_MODULE_POWER_MODE_POLICY`` attribute encodes the
+transceiver module power mode policy enforced by the host. The default policy
+is driver-dependent, but "auto" is the recommended default and it should be
+implemented by new drivers and drivers where conformance to a legacy behavior
+is not critical.
+
+The optional ``ETHTHOOL_A_MODULE_POWER_MODE`` attribute encodes the operational
+power mode policy of the transceiver module. It is only reported when a module
+is plugged-in. Possible values are:
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+    :identifiers: ethtool_module_power_mode
+
+MODULE_SET
+==========
+
+Sets transceiver module parameters.
+
+Request contents:
+
+  ======================================  ======  ==========================
+  ``ETHTOOL_A_MODULE_HEADER``             nested  request header
+  ``ETHTOOL_A_MODULE_POWER_MODE_POLICY``  u8      power mode policy
+  ======================================  ======  ==========================
+
+When set, the optional ``ETHTOOL_A_MODULE_POWER_MODE_POLICY`` attribute is used
+to set the transceiver module power policy enforced by the host. Possible
+values are:
+
+.. kernel-doc:: include/uapi/linux/ethtool.h
+    :identifiers: ethtool_module_power_mode_policy
+
+For SFF-8636 modules, low power mode is forced by the host according to table
+6-10 in revision 2.10a of the specification.
+
+For CMIS modules, low power mode is forced by the host according to table 6-12
+in revision 5.0 of the specification.
+
 Request translation
 ===================
 
@@ -1620,4 +1685,6 @@ are netlink only.
   n/a                                 ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``
   n/a                                 ``ETHTOOL_MSG_TUNNEL_INFO_GET``
   n/a                                 ``ETHTOOL_MSG_PHC_VCLOCKS_GET``
+  n/a                                 ``ETHTOOL_MSG_MODULE_GET``
+  n/a                                 ``ETHTOOL_MSG_MODULE_SET``
   =================================== =====================================
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 849524b55d89..9adf8d2c3144 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -415,6 +415,17 @@ struct ethtool_module_eeprom {
 	u8	*data;
 };
 
+/**
+ * struct ethtool_module_power_mode_params - module power mode parameters
+ * @policy: The power mode policy enforced by the host for the plug-in module.
+ * @mode: The operational power mode of the plug-in module. Should be filled by
+ *	device drivers on get operations.
+ */
+struct ethtool_module_power_mode_params {
+	enum ethtool_module_power_mode_policy policy;
+	enum ethtool_module_power_mode mode;
+};
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @cap_link_lanes_supported: indicates if the driver supports lanes
@@ -580,6 +591,11 @@ struct ethtool_module_eeprom {
  * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics.
  * @get_rmon_stats: Query some of the RMON (RFC 2819) statistics.
  *	Set %ranges to a pointer to zero-terminated array of byte ranges.
+ * @get_module_power_mode: Get the power mode policy for the plug-in module
+ *	used by the network device and its operational power mode, if
+ *	plugged-in.
+ * @set_module_power_mode: Set the power mode policy for the plug-in module
+ *	used by the network device.
  *
  * All operations are optional (i.e. the function pointer may be set
  * to %NULL) and callers must take this into account.  Callers must
@@ -705,6 +721,12 @@ struct ethtool_ops {
 	void	(*get_rmon_stats)(struct net_device *dev,
 				  struct ethtool_rmon_stats *rmon_stats,
 				  const struct ethtool_rmon_hist_range **ranges);
+	int	(*get_module_power_mode)(struct net_device *dev,
+					 struct ethtool_module_power_mode_params *params,
+					 struct netlink_ext_ack *extack);
+	int	(*set_module_power_mode)(struct net_device *dev,
+					 const struct ethtool_module_power_mode_params *params,
+					 struct netlink_ext_ack *extack);
 };
 
 int ethtool_check_ops(const struct ethtool_ops *ops);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index b6db6590baf0..6de61d53ca5d 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -706,6 +706,29 @@ enum ethtool_stringset {
 	ETH_SS_COUNT
 };
 
+/**
+ * enum ethtool_module_power_mode_policy - plug-in module power mode policy
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host
+ *	to high power mode when the first port using it is put administratively
+ *	up and to low power mode when the last port using it is put
+ *	administratively down.
+ */
+enum ethtool_module_power_mode_policy {
+	ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1,
+	ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO,
+};
+
+/**
+ * enum ethtool_module_power_mode - plug-in module power mode
+ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode.
+ */
+enum ethtool_module_power_mode {
+	ETHTOOL_MODULE_POWER_MODE_LOW = 1,
+	ETHTOOL_MODULE_POWER_MODE_HIGH,
+};
+
 /**
  * struct ethtool_gstrings - string set for data tagging
  * @cmd: Command number = %ETHTOOL_GSTRINGS
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 5545f1ca9237..ca5fbb59fa42 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -47,6 +47,8 @@ enum {
 	ETHTOOL_MSG_MODULE_EEPROM_GET,
 	ETHTOOL_MSG_STATS_GET,
 	ETHTOOL_MSG_PHC_VCLOCKS_GET,
+	ETHTOOL_MSG_MODULE_GET,
+	ETHTOOL_MSG_MODULE_SET,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -90,6 +92,8 @@ enum {
 	ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY,
 	ETHTOOL_MSG_STATS_GET_REPLY,
 	ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY,
+	ETHTOOL_MSG_MODULE_GET_REPLY,
+	ETHTOOL_MSG_MODULE_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -833,6 +837,19 @@ enum {
 	ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
 };
 
+/* MODULE */
+
+enum {
+	ETHTOOL_A_MODULE_UNSPEC,
+	ETHTOOL_A_MODULE_HEADER,		/* nest - _A_HEADER_* */
+	ETHTOOL_A_MODULE_POWER_MODE_POLICY,	/* u8 */
+	ETHTOOL_A_MODULE_POWER_MODE,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_MODULE_CNT,
+	ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 0a19470efbfb..b76432e70e6b 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
 		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
-		   tunnels.o fec.o eeprom.o stats.o phc_vclocks.o
+		   tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
new file mode 100644
index 000000000000..bc2cef11bbda
--- /dev/null
+++ b/net/ethtool/module.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct module_req_info {
+	struct ethnl_req_info base;
+};
+
+struct module_reply_data {
+	struct ethnl_reply_data	base;
+	struct ethtool_module_power_mode_params power;
+};
+
+#define MODULE_REPDATA(__reply_base) \
+	container_of(__reply_base, struct module_reply_data, base)
+
+/* MODULE_GET */
+
+const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1] = {
+	[ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int module_get_power_mode(struct net_device *dev,
+				 struct module_reply_data *data,
+				 struct netlink_ext_ack *extack)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+
+	if (!ops->get_module_power_mode)
+		return 0;
+
+	return ops->get_module_power_mode(dev, &data->power, extack);
+}
+
+static int module_prepare_data(const struct ethnl_req_info *req_base,
+			       struct ethnl_reply_data *reply_base,
+			       struct genl_info *info)
+{
+	struct module_reply_data *data = MODULE_REPDATA(reply_base);
+	struct netlink_ext_ack *extack = info ? info->extack : NULL;
+	struct net_device *dev = reply_base->dev;
+	int ret;
+
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = module_get_power_mode(dev, data, extack);
+	if (ret < 0)
+		goto out_complete;
+
+out_complete:
+	ethnl_ops_complete(dev);
+	return ret;
+}
+
+static int module_reply_size(const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	struct module_reply_data *data = MODULE_REPDATA(reply_base);
+	int len = 0;
+
+	if (data->power.policy)
+		len += nla_total_size(sizeof(u8));	/* _MODULE_POWER_MODE_POLICY */
+
+	if (data->power.mode)
+		len += nla_total_size(sizeof(u8));	/* _MODULE_POWER_MODE */
+
+	return len;
+}
+
+static int module_fill_reply(struct sk_buff *skb,
+			     const struct ethnl_req_info *req_base,
+			     const struct ethnl_reply_data *reply_base)
+{
+	const struct module_reply_data *data = MODULE_REPDATA(reply_base);
+
+	if (data->power.policy &&
+	    nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE_POLICY,
+		       data->power.policy))
+		return -EMSGSIZE;
+
+	if (data->power.mode &&
+	    nla_put_u8(skb, ETHTOOL_A_MODULE_POWER_MODE, data->power.mode))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+const struct ethnl_request_ops ethnl_module_request_ops = {
+	.request_cmd		= ETHTOOL_MSG_MODULE_GET,
+	.reply_cmd		= ETHTOOL_MSG_MODULE_GET_REPLY,
+	.hdr_attr		= ETHTOOL_A_MODULE_HEADER,
+	.req_info_size		= sizeof(struct module_req_info),
+	.reply_data_size	= sizeof(struct module_reply_data),
+
+	.prepare_data		= module_prepare_data,
+	.reply_size		= module_reply_size,
+	.fill_reply		= module_fill_reply,
+};
+
+/* MODULE_SET */
+
+const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = {
+	[ETHTOOL_A_MODULE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+	[ETHTOOL_A_MODULE_POWER_MODE_POLICY] =
+		NLA_POLICY_RANGE(NLA_U8, ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH,
+				 ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO),
+};
+
+static int module_set_power_mode(struct net_device *dev, struct nlattr **tb,
+				 bool *p_mod, struct netlink_ext_ack *extack)
+{
+	struct ethtool_module_power_mode_params power = {};
+	struct ethtool_module_power_mode_params power_new;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+
+	if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
+		return 0;
+
+	if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
+		NL_SET_ERR_MSG_ATTR(extack,
+				    tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
+				    "Setting power mode policy is not supported by this device");
+		return -EOPNOTSUPP;
+	}
+
+	power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
+	ret = ops->get_module_power_mode(dev, &power, extack);
+	if (ret < 0)
+		return ret;
+
+	if (power_new.policy == power.policy)
+		return 0;
+	*p_mod = true;
+
+	return ops->set_module_power_mode(dev, &power_new, extack);
+}
+
+int ethnl_set_module(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ethnl_req_info req_info = {};
+	struct nlattr **tb = info->attrs;
+	struct net_device *dev;
+	bool mod = false;
+	int ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MODULE_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+	dev = req_info.dev;
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+
+	ret = module_set_power_mode(dev, tb, &mod, info->extack);
+	if (ret < 0)
+		goto out_ops;
+
+	if (!mod)
+		goto out_ops;
+
+	ethtool_notify(dev, ETHTOOL_MSG_MODULE_NTF, NULL);
+
+out_ops:
+	ethnl_ops_complete(dev);
+out_rtnl:
+	rtnl_unlock();
+	dev_put(dev);
+	return ret;
+}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 1797a0a90019..38b44c0291b1 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -282,6 +282,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
 	[ETHTOOL_MSG_MODULE_EEPROM_GET]	= &ethnl_module_eeprom_request_ops,
 	[ETHTOOL_MSG_STATS_GET]		= &ethnl_stats_request_ops,
 	[ETHTOOL_MSG_PHC_VCLOCKS_GET]	= &ethnl_phc_vclocks_request_ops,
+	[ETHTOOL_MSG_MODULE_GET]	= &ethnl_module_request_ops,
 };
 
 static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -593,6 +594,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
 	[ETHTOOL_MSG_PAUSE_NTF]		= &ethnl_pause_request_ops,
 	[ETHTOOL_MSG_EEE_NTF]		= &ethnl_eee_request_ops,
 	[ETHTOOL_MSG_FEC_NTF]		= &ethnl_fec_request_ops,
+	[ETHTOOL_MSG_MODULE_NTF]	= &ethnl_module_request_ops,
 };
 
 /* default notification handler */
@@ -686,6 +688,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
 	[ETHTOOL_MSG_PAUSE_NTF]		= ethnl_default_notify,
 	[ETHTOOL_MSG_EEE_NTF]		= ethnl_default_notify,
 	[ETHTOOL_MSG_FEC_NTF]		= ethnl_default_notify,
+	[ETHTOOL_MSG_MODULE_NTF]	= ethnl_default_notify,
 };
 
 void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -999,6 +1002,22 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.policy = ethnl_phc_vclocks_get_policy,
 		.maxattr = ARRAY_SIZE(ethnl_phc_vclocks_get_policy) - 1,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_MODULE_GET,
+		.doit	= ethnl_default_doit,
+		.start	= ethnl_default_start,
+		.dumpit	= ethnl_default_dumpit,
+		.done	= ethnl_default_done,
+		.policy = ethnl_module_get_policy,
+		.maxattr = ARRAY_SIZE(ethnl_module_get_policy) - 1,
+	},
+	{
+		.cmd	= ETHTOOL_MSG_MODULE_SET,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_set_module,
+		.policy = ethnl_module_set_policy,
+		.maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index e8987e28036f..836ee7157848 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -337,6 +337,7 @@ extern const struct ethnl_request_ops ethnl_fec_request_ops;
 extern const struct ethnl_request_ops ethnl_module_eeprom_request_ops;
 extern const struct ethnl_request_ops ethnl_stats_request_ops;
 extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
+extern const struct ethnl_request_ops ethnl_module_request_ops;
 
 extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
 extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -373,6 +374,8 @@ extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
 extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
 extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
 extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
+extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1];
+extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
 
 int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
@@ -391,6 +394,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
 int ethnl_tunnel_info_start(struct netlink_callback *cb);
 int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
 int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_module(struct sk_buff *skb, struct genl_info *info);
 
 extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
 extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
-- 
cgit v1.2.3


From 3dfb51126064b594470b9c0b278188fbc9194709 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 6 Oct 2021 13:46:46 +0300
Subject: ethtool: Add transceiver module extended state

Add an extended state and sub-state to describe link issues related to
transceiver modules.

The 'ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY' extended sub-state
tells user space that port is unable to gain a carrier because the CMIS
Module State Machine did not reach the ModuleReady (Fully Operational)
state. For example, if the module is stuck at ModuleLowPwr or
ModuleFault state. In case of the latter, user space can read the fault
reason from the module's EEPROM and potentially reset it.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst | 10 ++++++++++
 include/linux/ethtool.h                      |  1 +
 include/uapi/linux/ethtool.h                 |  6 ++++++
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index d6fd4b2e243c..7b598c7e3912 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -528,6 +528,8 @@ Link extended states:
                                                         power required from cable or module
 
   ``ETHTOOL_LINK_EXT_STATE_OVERHEAT``                   The module is overheated
+
+  ``ETHTOOL_LINK_EXT_STATE_MODULE``                     Transceiver module issue
   ================================================      ============================================
 
 Link extended substates:
@@ -621,6 +623,14 @@ Link extended substates:
   ``ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE``   Cable test failure
   ===================================================   ============================================
 
+  Transceiver module issue substates:
+
+  ===================================================   ============================================
+  ``ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY``   The CMIS Module State Machine did not reach
+                                                        the ModuleReady state. For example, if the
+                                                        module is stuck at ModuleFault state
+  ===================================================   ============================================
+
 DEBUG_GET
 =========
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9adf8d2c3144..845a0ffc16ee 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -94,6 +94,7 @@ struct ethtool_link_ext_state_info {
 		enum ethtool_link_ext_substate_link_logical_mismatch link_logical_mismatch;
 		enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity;
 		enum ethtool_link_ext_substate_cable_issue cable_issue;
+		enum ethtool_link_ext_substate_module module;
 		u8 __link_ext_substate;
 	};
 };
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 6de61d53ca5d..a2223b685451 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -603,6 +603,7 @@ enum ethtool_link_ext_state {
 	ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
 	ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
 	ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+	ETHTOOL_LINK_EXT_STATE_MODULE,
 };
 
 /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
@@ -649,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue {
 	ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
 };
 
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */
+enum ethtool_link_ext_substate_module {
+	ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1,
+};
+
 #define ETH_GSTRING_LEN		32
 
 /**
-- 
cgit v1.2.3


From 79365f36d1de87286bb4fc0abcb2a01678ef4bef Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Wed, 6 Oct 2021 13:19:20 +0100
Subject: net: mdio: add mdiobus_modify_changed()

Add mdiobus_modify_changed() helper to reflect the phylib and similar
equivalents. This will avoid this functionality being open-coded, as
has already happened in phylink, and it looks like other users will be
appearing soon.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mdio_bus.c | 22 ++++++++++++++++++++++
 include/linux/mdio.h       |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 6f4b4e5df639..d8b68145f6b4 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -926,6 +926,28 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask, u16 set)
 }
 EXPORT_SYMBOL_GPL(mdiobus_modify);
 
+/**
+ * mdiobus_modify_changed - Convenience function for modifying a given mdio
+ *	device register and returning if it changed
+ * @bus: the mii_bus struct
+ * @addr: the phy address
+ * @regnum: register number to write
+ * @mask: bit mask of bits to clear
+ * @set: bit mask of bits to set
+ */
+int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
+			   u16 mask, u16 set)
+{
+	int err;
+
+	mutex_lock(&bus->mdio_lock);
+	err = __mdiobus_modify_changed(bus, addr, regnum, mask, set);
+	mutex_unlock(&bus->mdio_lock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mdiobus_modify_changed);
+
 /**
  * mdio_bus_match - determine if given MDIO driver supports the given
  *		    MDIO device
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 5e6dc38f418e..f622888a4ba8 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -349,6 +349,8 @@ int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
 		   u16 set);
+int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
+			   u16 mask, u16 set);
 
 static inline u32 mdiobus_c45_addr(int devad, u16 regnum)
 {
-- 
cgit v1.2.3


From af8cc9600bbf2251b04c56139f7c83f87c3f878a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 23 Sep 2021 14:10:51 -0300
Subject: futex: Split out syscalls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Put the syscalls in their own little file.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: André Almeida <andrealmeid@collabora.com>
Link: https://lore.kernel.org/r/20210923171111.300673-3-andrealmeid@collabora.com
---
 include/linux/syscalls.h |   2 +-
 kernel/futex/Makefile    |   2 +-
 kernel/futex/core.c      | 532 ++++++++++-------------------------------------
 kernel/futex/futex.h     |  58 ++++++
 kernel/futex/syscalls.c  | 279 +++++++++++++++++++++++++
 kernel/sys_ni.c          |   2 +-
 6 files changed, 455 insertions(+), 420 deletions(-)
 create mode 100644 kernel/futex/futex.h
 create mode 100644 kernel/futex/syscalls.c

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 252243c7783d..25979682ade5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -610,7 +610,7 @@ asmlinkage long sys_waitid(int which, pid_t pid,
 asmlinkage long sys_set_tid_address(int __user *tidptr);
 asmlinkage long sys_unshare(unsigned long unshare_flags);
 
-/* kernel/futex.c */
+/* kernel/futex/syscalls.c */
 asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
 			  const struct __kernel_timespec __user *utime,
 			  u32 __user *uaddr2, u32 val3);
diff --git a/kernel/futex/Makefile b/kernel/futex/Makefile
index b89ba3fba343..ff9a9605a8d6 100644
--- a/kernel/futex/Makefile
+++ b/kernel/futex/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 
-obj-y += core.o
+obj-y += core.o syscalls.o
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index f9bc9aa0ce1e..69d98929f2f5 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -34,14 +34,12 @@
 #include <linux/compat.h>
 #include <linux/jhash.h>
 #include <linux/pagemap.h>
-#include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/memblock.h>
 #include <linux/fault-inject.h>
-#include <linux/time_namespace.h>
-
-#include <asm/futex.h>
+#include <linux/slab.h>
 
+#include "futex.h"
 #include "../locking/rtmutex_common.h"
 
 /*
@@ -144,27 +142,10 @@
  * double_lock_hb() and double_unlock_hb(), respectively.
  */
 
-#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
-#define futex_cmpxchg_enabled 1
-#else
-static int  __read_mostly futex_cmpxchg_enabled;
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
+int  __read_mostly futex_cmpxchg_enabled;
 #endif
 
-/*
- * Futex flags used to encode options to functions and preserve them across
- * restarts.
- */
-#ifdef CONFIG_MMU
-# define FLAGS_SHARED		0x01
-#else
-/*
- * NOMMU does not have per process address space. Let the compiler optimize
- * code away.
- */
-# define FLAGS_SHARED		0x00
-#endif
-#define FLAGS_CLOCKRT		0x02
-#define FLAGS_HAS_TIMEOUT	0x04
 
 /*
  * Priority Inheritance state:
@@ -329,7 +310,7 @@ static int __init setup_fail_futex(char *str)
 }
 __setup("fail_futex=", setup_fail_futex);
 
-static bool should_fail_futex(bool fshared)
+bool should_fail_futex(bool fshared)
 {
 	if (fail_futex.ignore_private && !fshared)
 		return false;
@@ -358,17 +339,8 @@ late_initcall(fail_futex_debugfs);
 
 #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
 
-#else
-static inline bool should_fail_futex(bool fshared)
-{
-	return false;
-}
 #endif /* CONFIG_FAIL_FUTEX */
 
-#ifdef CONFIG_COMPAT
-static void compat_exit_robust_list(struct task_struct *curr);
-#endif
-
 /*
  * Reflects a new waiter being added to the waitqueue.
  */
@@ -1647,8 +1619,7 @@ double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
 /*
  * Wake up waiters matching bitset queued on this futex (uaddr).
  */
-static int
-futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
+int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
 {
 	struct futex_hash_bucket *hb;
 	struct futex_q *this, *next;
@@ -1743,9 +1714,8 @@ static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr)
  * Wake up all waiters hashed on the physical page that is mapped
  * to this virtual address:
  */
-static int
-futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
-	      int nr_wake, int nr_wake2, int op)
+int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
+		  int nr_wake, int nr_wake2, int op)
 {
 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	struct futex_hash_bucket *hb1, *hb2;
@@ -2124,9 +2094,8 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
  *  - >=0 - on success, the number of tasks requeued or woken;
  *  -  <0 - on error
  */
-static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
-			 u32 __user *uaddr2, int nr_wake, int nr_requeue,
-			 u32 *cmpval, int requeue_pi)
+int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
+		  int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi)
 {
 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
 	int task_count = 0, ret;
@@ -2926,8 +2895,7 @@ retry_private:
 	return ret;
 }
 
-static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
-		      ktime_t *abs_time, u32 bitset)
+int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
 {
 	struct hrtimer_sleeper timeout, *to;
 	struct restart_block *restart;
@@ -3015,8 +2983,7 @@ static long futex_wait_restart(struct restart_block *restart)
  *
  * Also serves as futex trylock_pi()'ing, and due semantics.
  */
-static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
-			 ktime_t *time, int trylock)
+int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock)
 {
 	struct hrtimer_sleeper timeout, *to;
 	struct task_struct *exiting = NULL;
@@ -3186,7 +3153,7 @@ uaddr_faulted:
  * This is the in-kernel slowpath: we look up the PI state (if any),
  * and do the rt-mutex unlock.
  */
-static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
+int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
 {
 	u32 curval, uval, vpid = task_pid_vnr(current);
 	union futex_key key = FUTEX_KEY_INIT;
@@ -3403,9 +3370,9 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
  *  -  0 - On success;
  *  - <0 - On error
  */
-static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
-				 u32 val, ktime_t *abs_time, u32 bitset,
-				 u32 __user *uaddr2)
+int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
+			  u32 val, ktime_t *abs_time, u32 bitset,
+			  u32 __user *uaddr2)
 {
 	struct hrtimer_sleeper timeout, *to;
 	struct rt_mutex_waiter rt_waiter;
@@ -3539,87 +3506,6 @@ out:
 	return ret;
 }
 
-/*
- * Support for robust futexes: the kernel cleans up held futexes at
- * thread exit time.
- *
- * Implementation: user-space maintains a per-thread list of locks it
- * is holding. Upon do_exit(), the kernel carefully walks this list,
- * and marks all locks that are owned by this thread with the
- * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
- * always manipulated with the lock held, so the list is private and
- * per-thread. Userspace also maintains a per-thread 'list_op_pending'
- * field, to allow the kernel to clean up if the thread dies after
- * acquiring the lock, but just before it could have added itself to
- * the list. There can only be one such pending lock.
- */
-
-/**
- * sys_set_robust_list() - Set the robust-futex list head of a task
- * @head:	pointer to the list-head
- * @len:	length of the list-head, as userspace expects
- */
-SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
-		size_t, len)
-{
-	if (!futex_cmpxchg_enabled)
-		return -ENOSYS;
-	/*
-	 * The kernel knows only one size for now:
-	 */
-	if (unlikely(len != sizeof(*head)))
-		return -EINVAL;
-
-	current->robust_list = head;
-
-	return 0;
-}
-
-/**
- * sys_get_robust_list() - Get the robust-futex list head of a task
- * @pid:	pid of the process [zero for current task]
- * @head_ptr:	pointer to a list-head pointer, the kernel fills it in
- * @len_ptr:	pointer to a length field, the kernel fills in the header size
- */
-SYSCALL_DEFINE3(get_robust_list, int, pid,
-		struct robust_list_head __user * __user *, head_ptr,
-		size_t __user *, len_ptr)
-{
-	struct robust_list_head __user *head;
-	unsigned long ret;
-	struct task_struct *p;
-
-	if (!futex_cmpxchg_enabled)
-		return -ENOSYS;
-
-	rcu_read_lock();
-
-	ret = -ESRCH;
-	if (!pid)
-		p = current;
-	else {
-		p = find_task_by_vpid(pid);
-		if (!p)
-			goto err_unlock;
-	}
-
-	ret = -EPERM;
-	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-		goto err_unlock;
-
-	head = p->robust_list;
-	rcu_read_unlock();
-
-	if (put_user(sizeof(*head), len_ptr))
-		return -EFAULT;
-	return put_user(head, head_ptr);
-
-err_unlock:
-	rcu_read_unlock();
-
-	return ret;
-}
-
 /* Constants for the pending_op argument of handle_futex_death */
 #define HANDLE_DEATH_PENDING	true
 #define HANDLE_DEATH_LIST	false
@@ -3821,227 +3707,16 @@ static void exit_robust_list(struct task_struct *curr)
 	}
 }
 
-static void futex_cleanup(struct task_struct *tsk)
-{
-	if (unlikely(tsk->robust_list)) {
-		exit_robust_list(tsk);
-		tsk->robust_list = NULL;
-	}
-
 #ifdef CONFIG_COMPAT
-	if (unlikely(tsk->compat_robust_list)) {
-		compat_exit_robust_list(tsk);
-		tsk->compat_robust_list = NULL;
-	}
-#endif
-
-	if (unlikely(!list_empty(&tsk->pi_state_list)))
-		exit_pi_state_list(tsk);
-}
-
-/**
- * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
- * @tsk:	task to set the state on
- *
- * Set the futex exit state of the task lockless. The futex waiter code
- * observes that state when a task is exiting and loops until the task has
- * actually finished the futex cleanup. The worst case for this is that the
- * waiter runs through the wait loop until the state becomes visible.
- *
- * This is called from the recursive fault handling path in do_exit().
- *
- * This is best effort. Either the futex exit code has run already or
- * not. If the OWNER_DIED bit has been set on the futex then the waiter can
- * take it over. If not, the problem is pushed back to user space. If the
- * futex exit code did not run yet, then an already queued waiter might
- * block forever, but there is nothing which can be done about that.
- */
-void futex_exit_recursive(struct task_struct *tsk)
-{
-	/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
-	if (tsk->futex_state == FUTEX_STATE_EXITING)
-		mutex_unlock(&tsk->futex_exit_mutex);
-	tsk->futex_state = FUTEX_STATE_DEAD;
-}
-
-static void futex_cleanup_begin(struct task_struct *tsk)
-{
-	/*
-	 * Prevent various race issues against a concurrent incoming waiter
-	 * including live locks by forcing the waiter to block on
-	 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
-	 * attach_to_pi_owner().
-	 */
-	mutex_lock(&tsk->futex_exit_mutex);
-
-	/*
-	 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
-	 *
-	 * This ensures that all subsequent checks of tsk->futex_state in
-	 * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
-	 * tsk->pi_lock held.
-	 *
-	 * It guarantees also that a pi_state which was queued right before
-	 * the state change under tsk->pi_lock by a concurrent waiter must
-	 * be observed in exit_pi_state_list().
-	 */
-	raw_spin_lock_irq(&tsk->pi_lock);
-	tsk->futex_state = FUTEX_STATE_EXITING;
-	raw_spin_unlock_irq(&tsk->pi_lock);
-}
-
-static void futex_cleanup_end(struct task_struct *tsk, int state)
-{
-	/*
-	 * Lockless store. The only side effect is that an observer might
-	 * take another loop until it becomes visible.
-	 */
-	tsk->futex_state = state;
-	/*
-	 * Drop the exit protection. This unblocks waiters which observed
-	 * FUTEX_STATE_EXITING to reevaluate the state.
-	 */
-	mutex_unlock(&tsk->futex_exit_mutex);
-}
-
-void futex_exec_release(struct task_struct *tsk)
-{
-	/*
-	 * The state handling is done for consistency, but in the case of
-	 * exec() there is no way to prevent further damage as the PID stays
-	 * the same. But for the unlikely and arguably buggy case that a
-	 * futex is held on exec(), this provides at least as much state
-	 * consistency protection which is possible.
-	 */
-	futex_cleanup_begin(tsk);
-	futex_cleanup(tsk);
-	/*
-	 * Reset the state to FUTEX_STATE_OK. The task is alive and about
-	 * exec a new binary.
-	 */
-	futex_cleanup_end(tsk, FUTEX_STATE_OK);
-}
-
-void futex_exit_release(struct task_struct *tsk)
-{
-	futex_cleanup_begin(tsk);
-	futex_cleanup(tsk);
-	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
-}
-
-long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
-		u32 __user *uaddr2, u32 val2, u32 val3)
-{
-	int cmd = op & FUTEX_CMD_MASK;
-	unsigned int flags = 0;
-
-	if (!(op & FUTEX_PRIVATE_FLAG))
-		flags |= FLAGS_SHARED;
-
-	if (op & FUTEX_CLOCK_REALTIME) {
-		flags |= FLAGS_CLOCKRT;
-		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
-		    cmd != FUTEX_LOCK_PI2)
-			return -ENOSYS;
-	}
-
-	switch (cmd) {
-	case FUTEX_LOCK_PI:
-	case FUTEX_LOCK_PI2:
-	case FUTEX_UNLOCK_PI:
-	case FUTEX_TRYLOCK_PI:
-	case FUTEX_WAIT_REQUEUE_PI:
-	case FUTEX_CMP_REQUEUE_PI:
-		if (!futex_cmpxchg_enabled)
-			return -ENOSYS;
-	}
-
-	switch (cmd) {
-	case FUTEX_WAIT:
-		val3 = FUTEX_BITSET_MATCH_ANY;
-		fallthrough;
-	case FUTEX_WAIT_BITSET:
-		return futex_wait(uaddr, flags, val, timeout, val3);
-	case FUTEX_WAKE:
-		val3 = FUTEX_BITSET_MATCH_ANY;
-		fallthrough;
-	case FUTEX_WAKE_BITSET:
-		return futex_wake(uaddr, flags, val, val3);
-	case FUTEX_REQUEUE:
-		return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
-	case FUTEX_CMP_REQUEUE:
-		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
-	case FUTEX_WAKE_OP:
-		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
-	case FUTEX_LOCK_PI:
-		flags |= FLAGS_CLOCKRT;
-		fallthrough;
-	case FUTEX_LOCK_PI2:
-		return futex_lock_pi(uaddr, flags, timeout, 0);
-	case FUTEX_UNLOCK_PI:
-		return futex_unlock_pi(uaddr, flags);
-	case FUTEX_TRYLOCK_PI:
-		return futex_lock_pi(uaddr, flags, NULL, 1);
-	case FUTEX_WAIT_REQUEUE_PI:
-		val3 = FUTEX_BITSET_MATCH_ANY;
-		return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
-					     uaddr2);
-	case FUTEX_CMP_REQUEUE_PI:
-		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
-	}
-	return -ENOSYS;
-}
-
-static __always_inline bool futex_cmd_has_timeout(u32 cmd)
-{
-	switch (cmd) {
-	case FUTEX_WAIT:
-	case FUTEX_LOCK_PI:
-	case FUTEX_LOCK_PI2:
-	case FUTEX_WAIT_BITSET:
-	case FUTEX_WAIT_REQUEUE_PI:
-		return true;
-	}
-	return false;
-}
-
-static __always_inline int
-futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
-{
-	if (!timespec64_valid(ts))
-		return -EINVAL;
-
-	*t = timespec64_to_ktime(*ts);
-	if (cmd == FUTEX_WAIT)
-		*t = ktime_add_safe(ktime_get(), *t);
-	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
-		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
-	return 0;
-}
-
-SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-		const struct __kernel_timespec __user *, utime,
-		u32 __user *, uaddr2, u32, val3)
+static void __user *futex_uaddr(struct robust_list __user *entry,
+				compat_long_t futex_offset)
 {
-	int ret, cmd = op & FUTEX_CMD_MASK;
-	ktime_t t, *tp = NULL;
-	struct timespec64 ts;
-
-	if (utime && futex_cmd_has_timeout(cmd)) {
-		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
-			return -EFAULT;
-		if (get_timespec64(&ts, utime))
-			return -EFAULT;
-		ret = futex_init_timeout(cmd, op, &ts, &t);
-		if (ret)
-			return ret;
-		tp = &t;
-	}
+	compat_uptr_t base = ptr_to_compat(entry);
+	void __user *uaddr = compat_ptr(base + futex_offset);
 
-	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+	return uaddr;
 }
 
-#ifdef CONFIG_COMPAT
 /*
  * Fetch a robust-list pointer. Bit 0 signals PI futexes:
  */
@@ -4058,15 +3733,6 @@ compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **ent
 	return 0;
 }
 
-static void __user *futex_uaddr(struct robust_list __user *entry,
-				compat_long_t futex_offset)
-{
-	compat_uptr_t base = ptr_to_compat(entry);
-	void __user *uaddr = compat_ptr(base + futex_offset);
-
-	return uaddr;
-}
-
 /*
  * Walk curr->robust_list (very carefully, it's a userspace list!)
  * and mark any locks found there dead, and notify any waiters.
@@ -4143,83 +3809,115 @@ static void compat_exit_robust_list(struct task_struct *curr)
 		handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING);
 	}
 }
+#endif
 
-COMPAT_SYSCALL_DEFINE2(set_robust_list,
-		struct compat_robust_list_head __user *, head,
-		compat_size_t, len)
+static void futex_cleanup(struct task_struct *tsk)
 {
-	if (!futex_cmpxchg_enabled)
-		return -ENOSYS;
-
-	if (unlikely(len != sizeof(*head)))
-		return -EINVAL;
+	if (unlikely(tsk->robust_list)) {
+		exit_robust_list(tsk);
+		tsk->robust_list = NULL;
+	}
 
-	current->compat_robust_list = head;
+#ifdef CONFIG_COMPAT
+	if (unlikely(tsk->compat_robust_list)) {
+		compat_exit_robust_list(tsk);
+		tsk->compat_robust_list = NULL;
+	}
+#endif
 
-	return 0;
+	if (unlikely(!list_empty(&tsk->pi_state_list)))
+		exit_pi_state_list(tsk);
 }
 
-COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
-			compat_uptr_t __user *, head_ptr,
-			compat_size_t __user *, len_ptr)
+/**
+ * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD
+ * @tsk:	task to set the state on
+ *
+ * Set the futex exit state of the task lockless. The futex waiter code
+ * observes that state when a task is exiting and loops until the task has
+ * actually finished the futex cleanup. The worst case for this is that the
+ * waiter runs through the wait loop until the state becomes visible.
+ *
+ * This is called from the recursive fault handling path in do_exit().
+ *
+ * This is best effort. Either the futex exit code has run already or
+ * not. If the OWNER_DIED bit has been set on the futex then the waiter can
+ * take it over. If not, the problem is pushed back to user space. If the
+ * futex exit code did not run yet, then an already queued waiter might
+ * block forever, but there is nothing which can be done about that.
+ */
+void futex_exit_recursive(struct task_struct *tsk)
 {
-	struct compat_robust_list_head __user *head;
-	unsigned long ret;
-	struct task_struct *p;
-
-	if (!futex_cmpxchg_enabled)
-		return -ENOSYS;
-
-	rcu_read_lock();
-
-	ret = -ESRCH;
-	if (!pid)
-		p = current;
-	else {
-		p = find_task_by_vpid(pid);
-		if (!p)
-			goto err_unlock;
-	}
-
-	ret = -EPERM;
-	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
-		goto err_unlock;
-
-	head = p->compat_robust_list;
-	rcu_read_unlock();
-
-	if (put_user(sizeof(*head), len_ptr))
-		return -EFAULT;
-	return put_user(ptr_to_compat(head), head_ptr);
+	/* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */
+	if (tsk->futex_state == FUTEX_STATE_EXITING)
+		mutex_unlock(&tsk->futex_exit_mutex);
+	tsk->futex_state = FUTEX_STATE_DEAD;
+}
 
-err_unlock:
-	rcu_read_unlock();
+static void futex_cleanup_begin(struct task_struct *tsk)
+{
+	/*
+	 * Prevent various race issues against a concurrent incoming waiter
+	 * including live locks by forcing the waiter to block on
+	 * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in
+	 * attach_to_pi_owner().
+	 */
+	mutex_lock(&tsk->futex_exit_mutex);
 
-	return ret;
+	/*
+	 * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock.
+	 *
+	 * This ensures that all subsequent checks of tsk->futex_state in
+	 * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with
+	 * tsk->pi_lock held.
+	 *
+	 * It guarantees also that a pi_state which was queued right before
+	 * the state change under tsk->pi_lock by a concurrent waiter must
+	 * be observed in exit_pi_state_list().
+	 */
+	raw_spin_lock_irq(&tsk->pi_lock);
+	tsk->futex_state = FUTEX_STATE_EXITING;
+	raw_spin_unlock_irq(&tsk->pi_lock);
 }
-#endif /* CONFIG_COMPAT */
 
-#ifdef CONFIG_COMPAT_32BIT_TIME
-SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
-		const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
-		u32, val3)
+static void futex_cleanup_end(struct task_struct *tsk, int state)
 {
-	int ret, cmd = op & FUTEX_CMD_MASK;
-	ktime_t t, *tp = NULL;
-	struct timespec64 ts;
+	/*
+	 * Lockless store. The only side effect is that an observer might
+	 * take another loop until it becomes visible.
+	 */
+	tsk->futex_state = state;
+	/*
+	 * Drop the exit protection. This unblocks waiters which observed
+	 * FUTEX_STATE_EXITING to reevaluate the state.
+	 */
+	mutex_unlock(&tsk->futex_exit_mutex);
+}
 
-	if (utime && futex_cmd_has_timeout(cmd)) {
-		if (get_old_timespec32(&ts, utime))
-			return -EFAULT;
-		ret = futex_init_timeout(cmd, op, &ts, &t);
-		if (ret)
-			return ret;
-		tp = &t;
-	}
+void futex_exec_release(struct task_struct *tsk)
+{
+	/*
+	 * The state handling is done for consistency, but in the case of
+	 * exec() there is no way to prevent further damage as the PID stays
+	 * the same. But for the unlikely and arguably buggy case that a
+	 * futex is held on exec(), this provides at least as much state
+	 * consistency protection which is possible.
+	 */
+	futex_cleanup_begin(tsk);
+	futex_cleanup(tsk);
+	/*
+	 * Reset the state to FUTEX_STATE_OK. The task is alive and about
+	 * exec a new binary.
+	 */
+	futex_cleanup_end(tsk, FUTEX_STATE_OK);
+}
 
-	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+void futex_exit_release(struct task_struct *tsk)
+{
+	futex_cleanup_begin(tsk);
+	futex_cleanup(tsk);
+	futex_cleanup_end(tsk, FUTEX_STATE_DEAD);
 }
-#endif /* CONFIG_COMPAT_32BIT_TIME */
 
 static void __init futex_detect_cmpxchg(void)
 {
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
new file mode 100644
index 000000000000..7bb4ca8bf32f
--- /dev/null
+++ b/kernel/futex/futex.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <asm/futex.h>
+
+/*
+ * Futex flags used to encode options to functions and preserve them across
+ * restarts.
+ */
+#ifdef CONFIG_MMU
+# define FLAGS_SHARED		0x01
+#else
+/*
+ * NOMMU does not have per process address space. Let the compiler optimize
+ * code away.
+ */
+# define FLAGS_SHARED		0x00
+#endif
+#define FLAGS_CLOCKRT		0x02
+#define FLAGS_HAS_TIMEOUT	0x04
+
+#ifdef CONFIG_HAVE_FUTEX_CMPXCHG
+#define futex_cmpxchg_enabled 1
+#else
+extern int  __read_mostly futex_cmpxchg_enabled;
+#endif
+
+#ifdef CONFIG_FAIL_FUTEX
+extern bool should_fail_futex(bool fshared);
+#else
+static inline bool should_fail_futex(bool fshared)
+{
+	return false;
+}
+#endif
+
+extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
+				 val, ktime_t *abs_time, u32 bitset, u32 __user
+				 *uaddr2);
+
+extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
+			 u32 __user *uaddr2, int nr_wake, int nr_requeue,
+			 u32 *cmpval, int requeue_pi);
+
+extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
+		      ktime_t *abs_time, u32 bitset);
+
+extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
+
+extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
+			 u32 __user *uaddr2, int nr_wake, int nr_wake2, int op);
+
+extern int futex_unlock_pi(u32 __user *uaddr, unsigned int flags);
+
+extern int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock);
+
+#endif /* _FUTEX_H */
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
new file mode 100644
index 000000000000..6e7e36c640a1
--- /dev/null
+++ b/kernel/futex/syscalls.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/time_namespace.h>
+
+#include "futex.h"
+
+/*
+ * Support for robust futexes: the kernel cleans up held futexes at
+ * thread exit time.
+ *
+ * Implementation: user-space maintains a per-thread list of locks it
+ * is holding. Upon do_exit(), the kernel carefully walks this list,
+ * and marks all locks that are owned by this thread with the
+ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
+ * always manipulated with the lock held, so the list is private and
+ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
+ * field, to allow the kernel to clean up if the thread dies after
+ * acquiring the lock, but just before it could have added itself to
+ * the list. There can only be one such pending lock.
+ */
+
+/**
+ * sys_set_robust_list() - Set the robust-futex list head of a task
+ * @head:	pointer to the list-head
+ * @len:	length of the list-head, as userspace expects
+ */
+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
+		size_t, len)
+{
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+	/*
+	 * The kernel knows only one size for now:
+	 */
+	if (unlikely(len != sizeof(*head)))
+		return -EINVAL;
+
+	current->robust_list = head;
+
+	return 0;
+}
+
+/**
+ * sys_get_robust_list() - Get the robust-futex list head of a task
+ * @pid:	pid of the process [zero for current task]
+ * @head_ptr:	pointer to a list-head pointer, the kernel fills it in
+ * @len_ptr:	pointer to a length field, the kernel fills in the header size
+ */
+SYSCALL_DEFINE3(get_robust_list, int, pid,
+		struct robust_list_head __user * __user *, head_ptr,
+		size_t __user *, len_ptr)
+{
+	struct robust_list_head __user *head;
+	unsigned long ret;
+	struct task_struct *p;
+
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+
+	rcu_read_lock();
+
+	ret = -ESRCH;
+	if (!pid)
+		p = current;
+	else {
+		p = find_task_by_vpid(pid);
+		if (!p)
+			goto err_unlock;
+	}
+
+	ret = -EPERM;
+	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+		goto err_unlock;
+
+	head = p->robust_list;
+	rcu_read_unlock();
+
+	if (put_user(sizeof(*head), len_ptr))
+		return -EFAULT;
+	return put_user(head, head_ptr);
+
+err_unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+		u32 __user *uaddr2, u32 val2, u32 val3)
+{
+	int cmd = op & FUTEX_CMD_MASK;
+	unsigned int flags = 0;
+
+	if (!(op & FUTEX_PRIVATE_FLAG))
+		flags |= FLAGS_SHARED;
+
+	if (op & FUTEX_CLOCK_REALTIME) {
+		flags |= FLAGS_CLOCKRT;
+		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
+		    cmd != FUTEX_LOCK_PI2)
+			return -ENOSYS;
+	}
+
+	switch (cmd) {
+	case FUTEX_LOCK_PI:
+	case FUTEX_LOCK_PI2:
+	case FUTEX_UNLOCK_PI:
+	case FUTEX_TRYLOCK_PI:
+	case FUTEX_WAIT_REQUEUE_PI:
+	case FUTEX_CMP_REQUEUE_PI:
+		if (!futex_cmpxchg_enabled)
+			return -ENOSYS;
+	}
+
+	switch (cmd) {
+	case FUTEX_WAIT:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		fallthrough;
+	case FUTEX_WAIT_BITSET:
+		return futex_wait(uaddr, flags, val, timeout, val3);
+	case FUTEX_WAKE:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		fallthrough;
+	case FUTEX_WAKE_BITSET:
+		return futex_wake(uaddr, flags, val, val3);
+	case FUTEX_REQUEUE:
+		return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
+	case FUTEX_CMP_REQUEUE:
+		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
+	case FUTEX_WAKE_OP:
+		return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
+	case FUTEX_LOCK_PI:
+		flags |= FLAGS_CLOCKRT;
+		fallthrough;
+	case FUTEX_LOCK_PI2:
+		return futex_lock_pi(uaddr, flags, timeout, 0);
+	case FUTEX_UNLOCK_PI:
+		return futex_unlock_pi(uaddr, flags);
+	case FUTEX_TRYLOCK_PI:
+		return futex_lock_pi(uaddr, flags, NULL, 1);
+	case FUTEX_WAIT_REQUEUE_PI:
+		val3 = FUTEX_BITSET_MATCH_ANY;
+		return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+					     uaddr2);
+	case FUTEX_CMP_REQUEUE_PI:
+		return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
+	}
+	return -ENOSYS;
+}
+
+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
+{
+	switch (cmd) {
+	case FUTEX_WAIT:
+	case FUTEX_LOCK_PI:
+	case FUTEX_LOCK_PI2:
+	case FUTEX_WAIT_BITSET:
+	case FUTEX_WAIT_REQUEUE_PI:
+		return true;
+	}
+	return false;
+}
+
+static __always_inline int
+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
+{
+	if (!timespec64_valid(ts))
+		return -EINVAL;
+
+	*t = timespec64_to_ktime(*ts);
+	if (cmd == FUTEX_WAIT)
+		*t = ktime_add_safe(ktime_get(), *t);
+	else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
+		*t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
+	return 0;
+}
+
+SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+		const struct __kernel_timespec __user *, utime,
+		u32 __user *, uaddr2, u32, val3)
+{
+	int ret, cmd = op & FUTEX_CMD_MASK;
+	ktime_t t, *tp = NULL;
+	struct timespec64 ts;
+
+	if (utime && futex_cmd_has_timeout(cmd)) {
+		if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+			return -EFAULT;
+		if (get_timespec64(&ts, utime))
+			return -EFAULT;
+		ret = futex_init_timeout(cmd, op, &ts, &t);
+		if (ret)
+			return ret;
+		tp = &t;
+	}
+
+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+		struct compat_robust_list_head __user *, head,
+		compat_size_t, len)
+{
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+
+	if (unlikely(len != sizeof(*head)))
+		return -EINVAL;
+
+	current->compat_robust_list = head;
+
+	return 0;
+}
+
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+			compat_uptr_t __user *, head_ptr,
+			compat_size_t __user *, len_ptr)
+{
+	struct compat_robust_list_head __user *head;
+	unsigned long ret;
+	struct task_struct *p;
+
+	if (!futex_cmpxchg_enabled)
+		return -ENOSYS;
+
+	rcu_read_lock();
+
+	ret = -ESRCH;
+	if (!pid)
+		p = current;
+	else {
+		p = find_task_by_vpid(pid);
+		if (!p)
+			goto err_unlock;
+	}
+
+	ret = -EPERM;
+	if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+		goto err_unlock;
+
+	head = p->compat_robust_list;
+	rcu_read_unlock();
+
+	if (put_user(sizeof(*head), len_ptr))
+		return -EFAULT;
+	return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
+		const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+		u32, val3)
+{
+	int ret, cmd = op & FUTEX_CMD_MASK;
+	ktime_t t, *tp = NULL;
+	struct timespec64 ts;
+
+	if (utime && futex_cmd_has_timeout(cmd)) {
+		if (get_old_timespec32(&ts, utime))
+			return -EFAULT;
+		ret = futex_init_timeout(cmd, op, &ts, &t);
+		if (ret)
+			return ret;
+		tp = &t;
+	}
+
+	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+}
+#endif /* CONFIG_COMPAT_32BIT_TIME */
+
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index f43d89d92860..13ee8334ab6e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -143,7 +143,7 @@ COND_SYSCALL(capset);
 /* __ARCH_WANT_SYS_CLONE3 */
 COND_SYSCALL(clone3);
 
-/* kernel/futex.c */
+/* kernel/futex/syscalls.c */
 COND_SYSCALL(futex);
 COND_SYSCALL(futex_time32);
 COND_SYSCALL(set_robust_list);
-- 
cgit v1.2.3


From bf69bad38cf63d980e8a603f8d1bd1f85b5ed3d9 Mon Sep 17 00:00:00 2001
From: André Almeida <andrealmeid@collabora.com>
Date: Thu, 23 Sep 2021 14:11:05 -0300
Subject: futex: Implement sys_futex_waitv()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add support to wait on multiple futexes. This is the interface
implemented by this syscall:

futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes,
	    unsigned int flags, struct timespec *timeout, clockid_t clockid)

struct futex_waitv {
	__u64 val;
	__u64 uaddr;
	__u32 flags;
	__u32 __reserved;
};

Given an array of struct futex_waitv, wait on each uaddr. The thread
wakes if a futex_wake() is performed at any uaddr. The syscall returns
immediately if any waiter has *uaddr != val. *timeout is an optional
absolute timeout value for the operation. This syscall supports only
64bit sized timeout structs. The flags argument of the syscall should be
empty, but it can be used for future extensions. Flags for shared
futexes, sizes, etc. should be used on the individual flags of each
waiter.

__reserved is used for explicit padding and should be 0, but it might be
used for future extensions. If the userspace uses 32-bit pointers, it
should make sure to explicitly cast it when assigning to waitv::uaddr.

Returns the array index of one of the woken futexes. There’s no given
information of how many were woken, or any particular attribute of it
(if it’s the first woken, if it is of the smaller index...).

Signed-off-by: André Almeida <andrealmeid@collabora.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210923171111.300673-17-andrealmeid@collabora.com
---
 MAINTAINERS                       |   1 +
 include/linux/syscalls.h          |   5 +
 include/uapi/asm-generic/unistd.h |   5 +-
 include/uapi/linux/futex.h        |  25 +++++
 kernel/futex/futex.h              |  15 +++
 kernel/futex/syscalls.c           | 119 ++++++++++++++++++++++
 kernel/futex/waitwake.c           | 201 ++++++++++++++++++++++++++++++++++++++
 kernel/sys_ni.c                   |   1 +
 8 files changed, 371 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index b3094cb1afdd..310fb018acf9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7718,6 +7718,7 @@ M:	Ingo Molnar <mingo@redhat.com>
 R:	Peter Zijlstra <peterz@infradead.org>
 R:	Darren Hart <dvhart@infradead.org>
 R:	Davidlohr Bueso <dave@stgolabs.net>
+R:	André Almeida <andrealmeid@collabora.com>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 25979682ade5..528a478dbda8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -58,6 +58,7 @@ struct mq_attr;
 struct compat_stat;
 struct old_timeval32;
 struct robust_list_head;
+struct futex_waitv;
 struct getcpu_cache;
 struct old_linux_dirent;
 struct perf_event_attr;
@@ -623,6 +624,10 @@ asmlinkage long sys_get_robust_list(int pid,
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 
+asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
+				unsigned int nr_futexes, unsigned int flags,
+				struct __kernel_timespec __user *timeout, clockid_t clockid);
+
 /* kernel/hrtimer.c */
 asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
 			      struct __kernel_timespec __user *rmtp);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 1c5fb86d455a..4557a8b6086f 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #define __NR_process_mrelease 448
 __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
 #undef __NR_syscalls
-#define __NR_syscalls 449
+#define __NR_syscalls 450
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/futex.h b/include/uapi/linux/futex.h
index 235e5b2facaa..71a5df8d2689 100644
--- a/include/uapi/linux/futex.h
+++ b/include/uapi/linux/futex.h
@@ -43,6 +43,31 @@
 #define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
 					 FUTEX_PRIVATE_FLAG)
 
+/*
+ * Flags to specify the bit length of the futex word for futex2 syscalls.
+ * Currently, only 32 is supported.
+ */
+#define FUTEX_32		2
+
+/*
+ * Max numbers of elements in a futex_waitv array
+ */
+#define FUTEX_WAITV_MAX		128
+
+/**
+ * struct futex_waitv - A waiter for vectorized wait
+ * @val:	Expected value at uaddr
+ * @uaddr:	User address to wait on
+ * @flags:	Flags for this waiter
+ * @__reserved:	Reserved member to preserve data alignment. Should be 0.
+ */
+struct futex_waitv {
+	__u64 val;
+	__u64 uaddr;
+	__u32 flags;
+	__u32 __reserved;
+};
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h
index 465f7bd5caef..948fcf317681 100644
--- a/kernel/futex/futex.h
+++ b/kernel/futex/futex.h
@@ -268,6 +268,21 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
 extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
 		      ktime_t *abs_time, u32 bitset);
 
+/**
+ * struct futex_vector - Auxiliary struct for futex_waitv()
+ * @w: Userspace provided data
+ * @q: Kernel side data
+ *
+ * Struct used to build an array with all data need for futex_waitv()
+ */
+struct futex_vector {
+	struct futex_waitv w;
+	struct futex_q q;
+};
+
+extern int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+			       struct hrtimer_sleeper *to);
+
 extern int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset);
 
 extern int futex_wake_op(u32 __user *uaddr1, unsigned int flags,
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
index 6e7e36c640a1..6f91a07a6a83 100644
--- a/kernel/futex/syscalls.c
+++ b/kernel/futex/syscalls.c
@@ -199,6 +199,125 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
 	return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
 }
 
+/* Mask of available flags for each futex in futex_waitv list */
+#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
+
+/**
+ * futex_parse_waitv - Parse a waitv array from userspace
+ * @futexv:	Kernel side list of waiters to be filled
+ * @uwaitv:     Userspace list to be parsed
+ * @nr_futexes: Length of futexv
+ *
+ * Return: Error code on failure, 0 on success
+ */
+static int futex_parse_waitv(struct futex_vector *futexv,
+			     struct futex_waitv __user *uwaitv,
+			     unsigned int nr_futexes)
+{
+	struct futex_waitv aux;
+	unsigned int i;
+
+	for (i = 0; i < nr_futexes; i++) {
+		if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
+			return -EFAULT;
+
+		if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
+			return -EINVAL;
+
+		if (!(aux.flags & FUTEX_32))
+			return -EINVAL;
+
+		futexv[i].w.flags = aux.flags;
+		futexv[i].w.val = aux.val;
+		futexv[i].w.uaddr = aux.uaddr;
+		futexv[i].q = futex_q_init;
+	}
+
+	return 0;
+}
+
+/**
+ * sys_futex_waitv - Wait on a list of futexes
+ * @waiters:    List of futexes to wait on
+ * @nr_futexes: Length of futexv
+ * @flags:      Flag for timeout (monotonic/realtime)
+ * @timeout:	Optional absolute timeout.
+ * @clockid:	Clock to be used for the timeout, realtime or monotonic.
+ *
+ * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes
+ * if a futex_wake() is performed at any uaddr. The syscall returns immediately
+ * if any waiter has *uaddr != val. *timeout is an optional timeout value for
+ * the operation. Each waiter has individual flags. The `flags` argument for
+ * the syscall should be used solely for specifying the timeout as realtime, if
+ * needed. Flags for private futexes, sizes, etc. should be used on the
+ * individual flags of each waiter.
+ *
+ * Returns the array index of one of the woken futexes. No further information
+ * is provided: any number of other futexes may also have been woken by the
+ * same event, and if more than one futex was woken, the retrned index may
+ * refer to any one of them. (It is not necessaryily the futex with the
+ * smallest index, nor the one most recently woken, nor...)
+ */
+
+SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
+		unsigned int, nr_futexes, unsigned int, flags,
+		struct __kernel_timespec __user *, timeout, clockid_t, clockid)
+{
+	struct hrtimer_sleeper to;
+	struct futex_vector *futexv;
+	struct timespec64 ts;
+	ktime_t time;
+	int ret;
+
+	/* This syscall supports no flags for now */
+	if (flags)
+		return -EINVAL;
+
+	if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
+		return -EINVAL;
+
+	if (timeout) {
+		int flag_clkid = 0, flag_init = 0;
+
+		if (clockid == CLOCK_REALTIME) {
+			flag_clkid = FLAGS_CLOCKRT;
+			flag_init = FUTEX_CLOCK_REALTIME;
+		}
+
+		if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
+			return -EINVAL;
+
+		if (get_timespec64(&ts, timeout))
+			return -EFAULT;
+
+		/*
+		 * Since there's no opcode for futex_waitv, use
+		 * FUTEX_WAIT_BITSET that uses absolute timeout as well
+		 */
+		ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
+		if (ret)
+			return ret;
+
+		futex_setup_timer(&time, &to, flag_clkid, 0);
+	}
+
+	futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
+	if (!futexv)
+		return -ENOMEM;
+
+	ret = futex_parse_waitv(futexv, waiters, nr_futexes);
+	if (!ret)
+		ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL);
+
+	if (timeout) {
+		hrtimer_cancel(&to.timer);
+		destroy_hrtimer_on_stack(&to.timer);
+	}
+
+	kfree(futexv);
+	return ret;
+}
+
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(set_robust_list,
 		struct compat_robust_list_head __user *, head,
diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c
index 36880784aa11..4ce0923f1ce3 100644
--- a/kernel/futex/waitwake.c
+++ b/kernel/futex/waitwake.c
@@ -357,6 +357,207 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q,
 	__set_current_state(TASK_RUNNING);
 }
 
+/**
+ * unqueue_multiple - Remove various futexes from their hash bucket
+ * @v:	   The list of futexes to unqueue
+ * @count: Number of futexes in the list
+ *
+ * Helper to unqueue a list of futexes. This can't fail.
+ *
+ * Return:
+ *  - >=0 - Index of the last futex that was awoken;
+ *  - -1  - No futex was awoken
+ */
+static int unqueue_multiple(struct futex_vector *v, int count)
+{
+	int ret = -1, i;
+
+	for (i = 0; i < count; i++) {
+		if (!futex_unqueue(&v[i].q))
+			ret = i;
+	}
+
+	return ret;
+}
+
+/**
+ * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes
+ * @vs:		The futex list to wait on
+ * @count:	The size of the list
+ * @woken:	Index of the last woken futex, if any. Used to notify the
+ *		caller that it can return this index to userspace (return parameter)
+ *
+ * Prepare multiple futexes in a single step and enqueue them. This may fail if
+ * the futex list is invalid or if any futex was already awoken. On success the
+ * task is ready to interruptible sleep.
+ *
+ * Return:
+ *  -  1 - One of the futexes was woken by another thread
+ *  -  0 - Success
+ *  - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL
+ */
+static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken)
+{
+	struct futex_hash_bucket *hb;
+	bool retry = false;
+	int ret, i;
+	u32 uval;
+
+	/*
+	 * Enqueuing multiple futexes is tricky, because we need to enqueue
+	 * each futex on the list before dealing with the next one to avoid
+	 * deadlocking on the hash bucket. But, before enqueuing, we need to
+	 * make sure that current->state is TASK_INTERRUPTIBLE, so we don't
+	 * lose any wake events, which cannot be done before the get_futex_key
+	 * of the next key, because it calls get_user_pages, which can sleep.
+	 * Thus, we fetch the list of futexes keys in two steps, by first
+	 * pinning all the memory keys in the futex key, and only then we read
+	 * each key and queue the corresponding futex.
+	 *
+	 * Private futexes doesn't need to recalculate hash in retry, so skip
+	 * get_futex_key() when retrying.
+	 */
+retry:
+	for (i = 0; i < count; i++) {
+		if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
+			continue;
+
+		ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
+				    !(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
+				    &vs[i].q.key, FUTEX_READ);
+
+		if (unlikely(ret))
+			return ret;
+	}
+
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	for (i = 0; i < count; i++) {
+		u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
+		struct futex_q *q = &vs[i].q;
+		u32 val = (u32)vs[i].w.val;
+
+		hb = futex_q_lock(q);
+		ret = futex_get_value_locked(&uval, uaddr);
+
+		if (!ret && uval == val) {
+			/*
+			 * The bucket lock can't be held while dealing with the
+			 * next futex. Queue each futex at this moment so hb can
+			 * be unlocked.
+			 */
+			futex_queue(q, hb);
+			continue;
+		}
+
+		futex_q_unlock(hb);
+		__set_current_state(TASK_RUNNING);
+
+		/*
+		 * Even if something went wrong, if we find out that a futex
+		 * was woken, we don't return error and return this index to
+		 * userspace
+		 */
+		*woken = unqueue_multiple(vs, i);
+		if (*woken >= 0)
+			return 1;
+
+		if (ret) {
+			/*
+			 * If we need to handle a page fault, we need to do so
+			 * without any lock and any enqueued futex (otherwise
+			 * we could lose some wakeup). So we do it here, after
+			 * undoing all the work done so far. In success, we
+			 * retry all the work.
+			 */
+			if (get_user(uval, uaddr))
+				return -EFAULT;
+
+			retry = true;
+			goto retry;
+		}
+
+		if (uval != val)
+			return -EWOULDBLOCK;
+	}
+
+	return 0;
+}
+
+/**
+ * futex_sleep_multiple - Check sleeping conditions and sleep
+ * @vs:    List of futexes to wait for
+ * @count: Length of vs
+ * @to:    Timeout
+ *
+ * Sleep if and only if the timeout hasn't expired and no futex on the list has
+ * been woken up.
+ */
+static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count,
+				 struct hrtimer_sleeper *to)
+{
+	if (to && !to->task)
+		return;
+
+	for (; count; count--, vs++) {
+		if (!READ_ONCE(vs->q.lock_ptr))
+			return;
+	}
+
+	freezable_schedule();
+}
+
+/**
+ * futex_wait_multiple - Prepare to wait on and enqueue several futexes
+ * @vs:		The list of futexes to wait on
+ * @count:	The number of objects
+ * @to:		Timeout before giving up and returning to userspace
+ *
+ * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function
+ * sleeps on a group of futexes and returns on the first futex that is
+ * wake, or after the timeout has elapsed.
+ *
+ * Return:
+ *  - >=0 - Hint to the futex that was awoken
+ *  - <0  - On error
+ */
+int futex_wait_multiple(struct futex_vector *vs, unsigned int count,
+			struct hrtimer_sleeper *to)
+{
+	int ret, hint = 0;
+
+	if (to)
+		hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
+
+	while (1) {
+		ret = futex_wait_multiple_setup(vs, count, &hint);
+		if (ret) {
+			if (ret > 0) {
+				/* A futex was woken during setup */
+				ret = hint;
+			}
+			return ret;
+		}
+
+		futex_sleep_multiple(vs, count, to);
+
+		__set_current_state(TASK_RUNNING);
+
+		ret = unqueue_multiple(vs, count);
+		if (ret >= 0)
+			return ret;
+
+		if (to && !to->task)
+			return -ETIMEDOUT;
+		else if (signal_pending(current))
+			return -ERESTARTSYS;
+		/*
+		 * The final case is a spurious wakeup, for
+		 * which just retry.
+		 */
+	}
+}
+
 /**
  * futex_wait_setup() - Prepare to wait on a futex
  * @uaddr:	the futex userspace address
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 13ee8334ab6e..d1944258cfc0 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -150,6 +150,7 @@ COND_SYSCALL(set_robust_list);
 COND_SYSCALL_COMPAT(set_robust_list);
 COND_SYSCALL(get_robust_list);
 COND_SYSCALL_COMPAT(get_robust_list);
+COND_SYSCALL(futex_waitv);
 
 /* kernel/hrtimer.c */
 
-- 
cgit v1.2.3


From 9b3c4ab3045e953670c7de9c1165fae5358a7237 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 21 Sep 2021 21:54:32 +0200
Subject: sched,rcu: Rework try_invoke_on_locked_down_task()

Give try_invoke_on_locked_down_task() a saner name and have it return
an int so that the caller might distinguish between different reasons
of failure.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Tested-by: Vasily Gorbik <gor@linux.ibm.com> # on s390
Link: https://lkml.kernel.org/r/20210929152428.649944917@infradead.org
---
 include/linux/wait.h    |  3 ++-
 kernel/rcu/tasks.h      | 12 ++++++------
 kernel/rcu/tree_stall.h |  8 ++++----
 kernel/sched/core.c     |  6 +++---
 4 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 93dab0e9580f..2d0df57c9902 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1160,6 +1160,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
 		(wait)->flags = 0;						\
 	} while (0)
 
-bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg);
+typedef int (*task_call_f)(struct task_struct *p, void *arg);
+extern int task_call_func(struct task_struct *p, task_call_f func, void *arg);
 
 #endif /* _LINUX_WAIT_H */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 806160c44b17..171bc848e8e3 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -928,7 +928,7 @@ reset_ipi:
 }
 
 /* Callback function for scheduler to check locked-down task.  */
-static bool trc_inspect_reader(struct task_struct *t, void *arg)
+static int trc_inspect_reader(struct task_struct *t, void *arg)
 {
 	int cpu = task_cpu(t);
 	bool in_qs = false;
@@ -939,7 +939,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
 
 		// If no chance of heavyweight readers, do it the hard way.
 		if (!ofl && !IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB))
-			return false;
+			return -EINVAL;
 
 		// If heavyweight readers are enabled on the remote task,
 		// we can inspect its state despite its currently running.
@@ -947,7 +947,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
 		n_heavy_reader_attempts++;
 		if (!ofl && // Check for "running" idle tasks on offline CPUs.
 		    !rcu_dynticks_zero_in_eqs(cpu, &t->trc_reader_nesting))
-			return false; // No quiescent state, do it the hard way.
+			return -EINVAL; // No quiescent state, do it the hard way.
 		n_heavy_reader_updates++;
 		if (ofl)
 			n_heavy_reader_ofl_updates++;
@@ -962,7 +962,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
 	t->trc_reader_checked = true;
 
 	if (in_qs)
-		return true;  // Already in quiescent state, done!!!
+		return 0;  // Already in quiescent state, done!!!
 
 	// The task is in a read-side critical section, so set up its
 	// state so that it will awaken the grace-period kthread upon exit
@@ -970,7 +970,7 @@ static bool trc_inspect_reader(struct task_struct *t, void *arg)
 	atomic_inc(&trc_n_readers_need_end); // One more to wait on.
 	WARN_ON_ONCE(READ_ONCE(t->trc_reader_special.b.need_qs));
 	WRITE_ONCE(t->trc_reader_special.b.need_qs, true);
-	return true;
+	return 0;
 }
 
 /* Attempt to extract the state for the specified task. */
@@ -992,7 +992,7 @@ static void trc_wait_for_one_reader(struct task_struct *t,
 
 	// Attempt to nail down the task for inspection.
 	get_task_struct(t);
-	if (try_invoke_on_locked_down_task(t, trc_inspect_reader, NULL)) {
+	if (!task_call_func(t, trc_inspect_reader, NULL)) {
 		put_task_struct(t);
 		return;
 	}
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index 677ee3d8671b..5e2fa6fd97f1 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -240,16 +240,16 @@ struct rcu_stall_chk_rdr {
  * Report out the state of a not-running task that is stalling the
  * current RCU grace period.
  */
-static bool check_slow_task(struct task_struct *t, void *arg)
+static int check_slow_task(struct task_struct *t, void *arg)
 {
 	struct rcu_stall_chk_rdr *rscrp = arg;
 
 	if (task_curr(t))
-		return false; // It is running, so decline to inspect it.
+		return -EBUSY; // It is running, so decline to inspect it.
 	rscrp->nesting = t->rcu_read_lock_nesting;
 	rscrp->rs = t->rcu_read_unlock_special;
 	rscrp->on_blkd_list = !list_empty(&t->rcu_node_entry);
-	return true;
+	return 0;
 }
 
 /*
@@ -283,7 +283,7 @@ static int rcu_print_task_stall(struct rcu_node *rnp, unsigned long flags)
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 	while (i) {
 		t = ts[--i];
-		if (!try_invoke_on_locked_down_task(t, check_slow_task, &rscr))
+		if (task_call_func(t, check_slow_task, &rscr))
 			pr_cont(" P%d", t->pid);
 		else
 			pr_cont(" P%d/%d:%c%c%c%c",
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8a9aeac3469f..74db3c3afa6d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4110,7 +4110,7 @@ out:
 }
 
 /**
- * try_invoke_on_locked_down_task - Invoke a function on task in fixed state
+ * task_call_func - Invoke a function on task in fixed state
  * @p: Process for which the function is to be invoked, can be @current.
  * @func: Function to invoke.
  * @arg: Argument to function.
@@ -4123,12 +4123,12 @@ out:
  * Returns:
  *   Whatever @func returns
  */
-bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg)
+int task_call_func(struct task_struct *p, task_call_f func, void *arg)
 {
 	struct rq *rq = NULL;
 	unsigned int state;
 	struct rq_flags rf;
-	bool ret = false;
+	int ret;
 
 	raw_spin_lock_irqsave(&p->pi_lock, rf.flags);
 
-- 
cgit v1.2.3


From e330fb14590c5c80f7195c3d8c9b4bcf79e1a5cd Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:06:54 -0700
Subject: of: net: move of_net under net/

Rob suggests to move of_net.c from under drivers/of/ somewhere
to the networking code.

Suggested-by: Rob Herring <robh@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/Kconfig    |   2 +-
 drivers/net/ethernet/arc/Kconfig    |   4 +-
 drivers/net/ethernet/ezchip/Kconfig |   2 +-
 drivers/net/ethernet/litex/Kconfig  |   2 +-
 drivers/net/ethernet/mscc/Kconfig   |   2 +-
 drivers/of/Kconfig                  |   4 -
 drivers/of/Makefile                 |   1 -
 drivers/of/of_net.c                 | 145 ------------------------------------
 include/linux/of_net.h              |   2 +-
 net/core/Makefile                   |   1 +
 net/core/net-sysfs.c                |   2 +-
 net/core/of_net.c                   | 145 ++++++++++++++++++++++++++++++++++++
 12 files changed, 154 insertions(+), 158 deletions(-)
 delete mode 100644 drivers/of/of_net.c
 create mode 100644 net/core/of_net.c

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 4786f0504691..899c8a2a34b6 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -168,7 +168,7 @@ config SUNLANCE
 
 config AMD_XGBE
 	tristate "AMD 10GbE Ethernet driver"
-	depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM
+	depends on (OF_ADDRESS || ACPI || PCI) && HAS_IOMEM
 	depends on X86 || ARM64 || COMPILE_TEST
 	depends on PTP_1588_CLOCK_OPTIONAL
 	select BITREVERSE
diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
index 37a41773dd43..840a9ce7ba1c 100644
--- a/drivers/net/ethernet/arc/Kconfig
+++ b/drivers/net/ethernet/arc/Kconfig
@@ -25,7 +25,7 @@ config ARC_EMAC_CORE
 config ARC_EMAC
 	tristate "ARC EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET
+	depends on OF_IRQ
 	depends on ARC || COMPILE_TEST
 	help
 	  On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
@@ -35,7 +35,7 @@ config ARC_EMAC
 config EMAC_ROCKCHIP
 	tristate "Rockchip EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET && REGULATOR
+	depends on OF_IRQ && REGULATOR
 	depends on ARCH_ROCKCHIP || COMPILE_TEST
 	help
 	  Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers.
diff --git a/drivers/net/ethernet/ezchip/Kconfig b/drivers/net/ethernet/ezchip/Kconfig
index 38aa824efb25..9241b9b1c7a3 100644
--- a/drivers/net/ethernet/ezchip/Kconfig
+++ b/drivers/net/ethernet/ezchip/Kconfig
@@ -18,7 +18,7 @@ if NET_VENDOR_EZCHIP
 
 config EZCHIP_NPS_MANAGEMENT_ENET
 	tristate "EZchip NPS management enet support"
-	depends on OF_IRQ && OF_NET
+	depends on OF_IRQ
 	depends on HAS_IOMEM
 	help
 	  Simple LAN device for debug or management purposes.
diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig
index 63bf01d28f0c..f99adbf26ab4 100644
--- a/drivers/net/ethernet/litex/Kconfig
+++ b/drivers/net/ethernet/litex/Kconfig
@@ -17,7 +17,7 @@ if NET_VENDOR_LITEX
 
 config LITEX_LITEETH
 	tristate "LiteX Ethernet support"
-	depends on OF_NET
+	depends on OF
 	help
 	  If you wish to compile a kernel for hardware with a LiteX LiteEth
 	  device then you should answer Y to this.
diff --git a/drivers/net/ethernet/mscc/Kconfig b/drivers/net/ethernet/mscc/Kconfig
index b6a73d151dec..8dd8c7f425d2 100644
--- a/drivers/net/ethernet/mscc/Kconfig
+++ b/drivers/net/ethernet/mscc/Kconfig
@@ -28,7 +28,7 @@ config MSCC_OCELOT_SWITCH
 	depends on BRIDGE || BRIDGE=n
 	depends on NET_SWITCHDEV
 	depends on HAS_IOMEM
-	depends on OF_NET
+	depends on OF
 	select MSCC_OCELOT_SWITCH_LIB
 	select GENERIC_PHY
 	help
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 3dfeae8912df..80b5fd44ab1c 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -70,10 +70,6 @@ config OF_IRQ
 	def_bool y
 	depends on !SPARC && IRQ_DOMAIN
 
-config OF_NET
-	depends on NETDEVICES
-	def_bool y
-
 config OF_RESERVED_MEM
 	def_bool OF_EARLY_FLATTREE
 
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index c13b982084a3..e0360a44306e 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
 obj-$(CONFIG_OF_PROMTREE) += pdt.o
 obj-$(CONFIG_OF_ADDRESS)  += address.o
 obj-$(CONFIG_OF_IRQ)    += irq.o
-obj-$(CONFIG_OF_NET)	+= of_net.o
 obj-$(CONFIG_OF_UNITTEST) += unittest.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
 obj-$(CONFIG_OF_RESOLVE)  += resolver.o
diff --git a/drivers/of/of_net.c b/drivers/of/of_net.c
deleted file mode 100644
index dbac3a172a11..000000000000
--- a/drivers/of/of_net.c
+++ /dev/null
@@ -1,145 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * OF helpers for network devices.
- *
- * Initially copied out of arch/powerpc/kernel/prom_parse.c
- */
-#include <linux/etherdevice.h>
-#include <linux/kernel.h>
-#include <linux/of_net.h>
-#include <linux/of_platform.h>
-#include <linux/phy.h>
-#include <linux/export.h>
-#include <linux/device.h>
-#include <linux/nvmem-consumer.h>
-
-/**
- * of_get_phy_mode - Get phy mode for given device_node
- * @np:	Pointer to the given device_node
- * @interface: Pointer to the result
- *
- * The function gets phy interface string from property 'phy-mode' or
- * 'phy-connection-type'. The index in phy_modes table is set in
- * interface and 0 returned. In case of error interface is set to
- * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
- */
-int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
-{
-	const char *pm;
-	int err, i;
-
-	*interface = PHY_INTERFACE_MODE_NA;
-
-	err = of_property_read_string(np, "phy-mode", &pm);
-	if (err < 0)
-		err = of_property_read_string(np, "phy-connection-type", &pm);
-	if (err < 0)
-		return err;
-
-	for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
-		if (!strcasecmp(pm, phy_modes(i))) {
-			*interface = i;
-			return 0;
-		}
-
-	return -ENODEV;
-}
-EXPORT_SYMBOL_GPL(of_get_phy_mode);
-
-static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
-{
-	struct property *pp = of_find_property(np, name, NULL);
-
-	if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
-		memcpy(addr, pp->value, ETH_ALEN);
-		return 0;
-	}
-	return -ENODEV;
-}
-
-static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
-{
-	struct platform_device *pdev = of_find_device_by_node(np);
-	struct nvmem_cell *cell;
-	const void *mac;
-	size_t len;
-	int ret;
-
-	/* Try lookup by device first, there might be a nvmem_cell_lookup
-	 * associated with a given device.
-	 */
-	if (pdev) {
-		ret = nvmem_get_mac_address(&pdev->dev, addr);
-		put_device(&pdev->dev);
-		return ret;
-	}
-
-	cell = of_nvmem_cell_get(np, "mac-address");
-	if (IS_ERR(cell))
-		return PTR_ERR(cell);
-
-	mac = nvmem_cell_read(cell, &len);
-	nvmem_cell_put(cell);
-
-	if (IS_ERR(mac))
-		return PTR_ERR(mac);
-
-	if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
-		kfree(mac);
-		return -EINVAL;
-	}
-
-	memcpy(addr, mac, ETH_ALEN);
-	kfree(mac);
-
-	return 0;
-}
-
-/**
- * of_get_mac_address()
- * @np:		Caller's Device Node
- * @addr:	Pointer to a six-byte array for the result
- *
- * Search the device tree for the best MAC address to use.  'mac-address' is
- * checked first, because that is supposed to contain to "most recent" MAC
- * address. If that isn't set, then 'local-mac-address' is checked next,
- * because that is the default address. If that isn't set, then the obsolete
- * 'address' is checked, just in case we're using an old device tree. If any
- * of the above isn't set, then try to get MAC address from nvmem cell named
- * 'mac-address'.
- *
- * Note that the 'address' property is supposed to contain a virtual address of
- * the register set, but some DTS files have redefined that property to be the
- * MAC address.
- *
- * All-zero MAC addresses are rejected, because those could be properties that
- * exist in the device tree, but were not set by U-Boot.  For example, the
- * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
- * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
- * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
- * but is all zeros.
- *
- * Return: 0 on success and errno in case of error.
-*/
-int of_get_mac_address(struct device_node *np, u8 *addr)
-{
-	int ret;
-
-	if (!np)
-		return -ENODEV;
-
-	ret = of_get_mac_addr(np, "mac-address", addr);
-	if (!ret)
-		return 0;
-
-	ret = of_get_mac_addr(np, "local-mac-address", addr);
-	if (!ret)
-		return 0;
-
-	ret = of_get_mac_addr(np, "address", addr);
-	if (!ret)
-		return 0;
-
-	return of_get_mac_addr_nvmem(np, addr);
-}
-EXPORT_SYMBOL(of_get_mac_address);
diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index daef3b0d9270..cf31188329b5 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -8,7 +8,7 @@
 
 #include <linux/phy.h>
 
-#ifdef CONFIG_OF_NET
+#ifdef CONFIG_OF
 #include <linux/of.h>
 
 struct net_device;
diff --git a/net/core/Makefile b/net/core/Makefile
index 35ced6201814..4268846f2f47 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -36,3 +36,4 @@ obj-$(CONFIG_FAILOVER) += failover.o
 obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_BPF_SYSCALL) += sock_map.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
+obj-$(CONFIG_OF)	+= of_net.o
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6197774048b..ae001c2ca2af 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1869,7 +1869,7 @@ static struct class net_class __ro_after_init = {
 	.get_ownership = net_get_ownership,
 };
 
-#ifdef CONFIG_OF_NET
+#ifdef CONFIG_OF
 static int of_dev_node_match(struct device *dev, const void *data)
 {
 	for (; dev; dev = dev->parent) {
diff --git a/net/core/of_net.c b/net/core/of_net.c
new file mode 100644
index 000000000000..dbac3a172a11
--- /dev/null
+++ b/net/core/of_net.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * OF helpers for network devices.
+ *
+ * Initially copied out of arch/powerpc/kernel/prom_parse.c
+ */
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/phy.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/nvmem-consumer.h>
+
+/**
+ * of_get_phy_mode - Get phy mode for given device_node
+ * @np:	Pointer to the given device_node
+ * @interface: Pointer to the result
+ *
+ * The function gets phy interface string from property 'phy-mode' or
+ * 'phy-connection-type'. The index in phy_modes table is set in
+ * interface and 0 returned. In case of error interface is set to
+ * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV.
+ */
+int of_get_phy_mode(struct device_node *np, phy_interface_t *interface)
+{
+	const char *pm;
+	int err, i;
+
+	*interface = PHY_INTERFACE_MODE_NA;
+
+	err = of_property_read_string(np, "phy-mode", &pm);
+	if (err < 0)
+		err = of_property_read_string(np, "phy-connection-type", &pm);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++)
+		if (!strcasecmp(pm, phy_modes(i))) {
+			*interface = i;
+			return 0;
+		}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(of_get_phy_mode);
+
+static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr)
+{
+	struct property *pp = of_find_property(np, name, NULL);
+
+	if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) {
+		memcpy(addr, pp->value, ETH_ALEN);
+		return 0;
+	}
+	return -ENODEV;
+}
+
+static int of_get_mac_addr_nvmem(struct device_node *np, u8 *addr)
+{
+	struct platform_device *pdev = of_find_device_by_node(np);
+	struct nvmem_cell *cell;
+	const void *mac;
+	size_t len;
+	int ret;
+
+	/* Try lookup by device first, there might be a nvmem_cell_lookup
+	 * associated with a given device.
+	 */
+	if (pdev) {
+		ret = nvmem_get_mac_address(&pdev->dev, addr);
+		put_device(&pdev->dev);
+		return ret;
+	}
+
+	cell = of_nvmem_cell_get(np, "mac-address");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	mac = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(mac))
+		return PTR_ERR(mac);
+
+	if (len != ETH_ALEN || !is_valid_ether_addr(mac)) {
+		kfree(mac);
+		return -EINVAL;
+	}
+
+	memcpy(addr, mac, ETH_ALEN);
+	kfree(mac);
+
+	return 0;
+}
+
+/**
+ * of_get_mac_address()
+ * @np:		Caller's Device Node
+ * @addr:	Pointer to a six-byte array for the result
+ *
+ * Search the device tree for the best MAC address to use.  'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address. If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree. If any
+ * of the above isn't set, then try to get MAC address from nvmem cell named
+ * 'mac-address'.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the device tree, but were not set by U-Boot.  For example, the
+ * DTS could define 'mac-address' and 'local-mac-address', with zero MAC
+ * addresses.  Some older U-Boots only initialized 'local-mac-address'.  In
+ * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists
+ * but is all zeros.
+ *
+ * Return: 0 on success and errno in case of error.
+*/
+int of_get_mac_address(struct device_node *np, u8 *addr)
+{
+	int ret;
+
+	if (!np)
+		return -ENODEV;
+
+	ret = of_get_mac_addr(np, "mac-address", addr);
+	if (!ret)
+		return 0;
+
+	ret = of_get_mac_addr(np, "local-mac-address", addr);
+	if (!ret)
+		return 0;
+
+	ret = of_get_mac_addr(np, "address", addr);
+	if (!ret)
+		return 0;
+
+	return of_get_mac_addr_nvmem(np, addr);
+}
+EXPORT_SYMBOL(of_get_mac_address);
-- 
cgit v1.2.3


From d466effe282ddbab6acb6c3120c1de0ee1b86d57 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:06:55 -0700
Subject: of: net: add a helper for loading netdev->dev_addr

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. To maintain netdev->dev_addr in this tree we need to make all
the writes to it got through appropriate helpers.

There are roughly 40 places where netdev->dev_addr is passed
as the destination to a of_get_mac_address() call. Add a helper
which takes a dev pointer instead, so it can call an appropriate
helper.

Note that of_get_mac_address() already assumes the address is
6 bytes long (ETH_ALEN) so use eth_hw_addr_set().

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/of_net.h |  6 ++++++
 net/core/of_net.c      | 25 +++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index cf31188329b5..0797e2edb8c2 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -14,6 +14,7 @@
 struct net_device;
 extern int of_get_phy_mode(struct device_node *np, phy_interface_t *interface);
 extern int of_get_mac_address(struct device_node *np, u8 *mac);
+int of_get_ethdev_address(struct device_node *np, struct net_device *dev);
 extern struct net_device *of_find_net_device_by_node(struct device_node *np);
 #else
 static inline int of_get_phy_mode(struct device_node *np,
@@ -27,6 +28,11 @@ static inline int of_get_mac_address(struct device_node *np, u8 *mac)
 	return -ENODEV;
 }
 
+static inline int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
+{
+	return -ENODEV;
+}
+
 static inline struct net_device *of_find_net_device_by_node(struct device_node *np)
 {
 	return NULL;
diff --git a/net/core/of_net.c b/net/core/of_net.c
index dbac3a172a11..f1a9bf7578e7 100644
--- a/net/core/of_net.c
+++ b/net/core/of_net.c
@@ -143,3 +143,28 @@ int of_get_mac_address(struct device_node *np, u8 *addr)
 	return of_get_mac_addr_nvmem(np, addr);
 }
 EXPORT_SYMBOL(of_get_mac_address);
+
+/**
+ * of_get_ethdev_address()
+ * @np:		Caller's Device Node
+ * @dev:	Pointer to netdevice which address will be updated
+ *
+ * Search the device tree for the best MAC address to use.
+ * If found set @dev->dev_addr to that address.
+ *
+ * See documentation of of_get_mac_address() for more information on how
+ * the best address is determined.
+ *
+ * Return: 0 on success and errno in case of error.
+ */
+int of_get_ethdev_address(struct device_node *np, struct net_device *dev)
+{
+	u8 addr[ETH_ALEN];
+	int ret;
+
+	ret = of_get_mac_address(np, addr);
+	if (!ret)
+		eth_hw_addr_set(dev, addr);
+	return ret;
+}
+EXPORT_SYMBOL(of_get_ethdev_address);
-- 
cgit v1.2.3


From 433baf0719d6a81d0587ea27545a120a3880abf6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:06:57 -0700
Subject: device property: move mac addr helpers to eth.c

Move the mac address helpers out, eth.c already contains
a bunch of similar helpers.

Suggested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/property.c     | 63 ---------------------------------------------
 include/linux/etherdevice.h |  6 +++++
 include/linux/property.h    |  4 ---
 net/ethernet/eth.c          | 63 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 453918eb7390..f1f35b48ab8b 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -15,7 +15,6 @@
 #include <linux/of_graph.h>
 #include <linux/of_irq.h>
 #include <linux/property.h>
-#include <linux/etherdevice.h>
 #include <linux/phy.h>
 
 struct fwnode_handle *dev_fwnode(struct device *dev)
@@ -935,68 +934,6 @@ int device_get_phy_mode(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_phy_mode);
 
-static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
-				 const char *name, char *addr,
-				 int alen)
-{
-	int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
-
-	if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
-		return addr;
-	return NULL;
-}
-
-/**
- * fwnode_get_mac_address - Get the MAC from the firmware node
- * @fwnode:	Pointer to the firmware node
- * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
- *
- * Search the firmware node for the best MAC address to use.  'mac-address' is
- * checked first, because that is supposed to contain to "most recent" MAC
- * address. If that isn't set, then 'local-mac-address' is checked next,
- * because that is the default address.  If that isn't set, then the obsolete
- * 'address' is checked, just in case we're using an old device tree.
- *
- * Note that the 'address' property is supposed to contain a virtual address of
- * the register set, but some DTS files have redefined that property to be the
- * MAC address.
- *
- * All-zero MAC addresses are rejected, because those could be properties that
- * exist in the firmware tables, but were not updated by the firmware.  For
- * example, the DTS could define 'mac-address' and 'local-mac-address', with
- * zero MAC addresses.  Some older U-Boots only initialized 'local-mac-address'.
- * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
- * exists but is all zeros.
-*/
-void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
-{
-	char *res;
-
-	res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
-	if (res)
-		return res;
-
-	res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
-	if (res)
-		return res;
-
-	return fwnode_get_mac_addr(fwnode, "address", addr, alen);
-}
-EXPORT_SYMBOL(fwnode_get_mac_address);
-
-/**
- * device_get_mac_address - Get the MAC for a given device
- * @dev:	Pointer to the device
- * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
- */
-void *device_get_mac_address(struct device *dev, char *addr, int alen)
-{
-	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
-}
-EXPORT_SYMBOL(device_get_mac_address);
-
 /**
  * fwnode_irq_get - Get IRQ directly from a fwnode
  * @fwnode:	Pointer to the firmware node
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e7b2e5fd8d24..39f0758274ae 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -26,9 +26,15 @@
 
 #ifdef __KERNEL__
 struct device;
+struct fwnode_handle;
+
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
+void *device_get_mac_address(struct device *dev, char *addr, int alen);
+void *fwnode_get_mac_address(struct fwnode_handle *fwnode,
+			     char *addr, int alen);
+
 u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
diff --git a/include/linux/property.h b/include/linux/property.h
index 357513a977e5..4fb081684255 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -389,11 +389,7 @@ const void *device_get_match_data(struct device *dev);
 
 int device_get_phy_mode(struct device *dev);
 
-void *device_get_mac_address(struct device *dev, char *addr, int alen);
-
 int fwnode_get_phy_mode(struct fwnode_handle *fwnode);
-void *fwnode_get_mac_address(struct fwnode_handle *fwnode,
-			     char *addr, int alen);
 struct fwnode_handle *fwnode_graph_get_next_endpoint(
 	const struct fwnode_handle *fwnode, struct fwnode_handle *prev);
 struct fwnode_handle *
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index b57530c231a6..9ea45aae04ee 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -51,6 +51,7 @@
 #include <linux/if_ether.h>
 #include <linux/of_net.h>
 #include <linux/pci.h>
+#include <linux/property.h>
 #include <net/dst.h>
 #include <net/arp.h>
 #include <net/sock.h>
@@ -558,3 +559,65 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf)
 	return 0;
 }
 EXPORT_SYMBOL(nvmem_get_mac_address);
+
+static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
+				 const char *name, char *addr,
+				 int alen)
+{
+	int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
+
+	if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
+		return addr;
+	return NULL;
+}
+
+/**
+ * fwnode_get_mac_address - Get the MAC from the firmware node
+ * @fwnode:	Pointer to the firmware node
+ * @addr:	Address of buffer to store the MAC in
+ * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
+ *
+ * Search the firmware node for the best MAC address to use.  'mac-address' is
+ * checked first, because that is supposed to contain to "most recent" MAC
+ * address. If that isn't set, then 'local-mac-address' is checked next,
+ * because that is the default address.  If that isn't set, then the obsolete
+ * 'address' is checked, just in case we're using an old device tree.
+ *
+ * Note that the 'address' property is supposed to contain a virtual address of
+ * the register set, but some DTS files have redefined that property to be the
+ * MAC address.
+ *
+ * All-zero MAC addresses are rejected, because those could be properties that
+ * exist in the firmware tables, but were not updated by the firmware.  For
+ * example, the DTS could define 'mac-address' and 'local-mac-address', with
+ * zero MAC addresses.  Some older U-Boots only initialized 'local-mac-address'.
+ * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
+ * exists but is all zeros.
+ */
+void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
+{
+	char *res;
+
+	res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
+	if (res)
+		return res;
+
+	res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
+	if (res)
+		return res;
+
+	return fwnode_get_mac_addr(fwnode, "address", addr, alen);
+}
+EXPORT_SYMBOL(fwnode_get_mac_address);
+
+/**
+ * device_get_mac_address - Get the MAC for a given device
+ * @dev:	Pointer to the device
+ * @addr:	Address of buffer to store the MAC in
+ * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
+ */
+void *device_get_mac_address(struct device *dev, char *addr, int alen)
+{
+	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
+}
+EXPORT_SYMBOL(device_get_mac_address);
-- 
cgit v1.2.3


From 8017c4d8173cfe086420dc5710d631cabd03ef67 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:06:58 -0700
Subject: eth: fwnode: change the return type of mac address helpers

fwnode_get_mac_address() and device_get_mac_address()
return a pointer to the buffer that was passed to them
on success or NULL on failure. None of the callers
care about the actual value, only if it's NULL or not.

These semantics differ from of_get_mac_address() which
returns an int so to avoid confusion make the device
helpers return an errno.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene-v2/main.c          |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c  |  2 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c    |  2 +-
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c |  6 ++--
 drivers/net/ethernet/faraday/ftgmac100.c          |  4 +--
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  2 +-
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c   |  2 +-
 drivers/net/ethernet/microchip/enc28j60.c         |  2 +-
 drivers/net/ethernet/qualcomm/emac/emac.c         |  2 +-
 drivers/net/ethernet/socionext/netsec.c           | 10 +++---
 include/linux/etherdevice.h                       |  5 ++-
 net/ethernet/eth.c                                | 39 ++++++++++++-----------
 12 files changed, 38 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 80399c8980bd..c7253ecc0fa5 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -36,7 +36,7 @@ static int xge_get_resources(struct xge_pdata *pdata)
 		return -ENOMEM;
 	}
 
-	if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
+	if (device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
 		eth_hw_addr_random(ndev);
 
 	memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 5f1fc6582d74..268e099aa5e1 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1731,7 +1731,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 		xgene_get_port_id_acpi(dev, pdata);
 #endif
 
-	if (!device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
+	if (device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
 		eth_hw_addr_random(ndev);
 
 	memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 02fe98cbabb0..30c5dcaea802 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4084,7 +4084,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	if (pd && !IS_ERR_OR_NULL(pd->mac_address))
 		eth_hw_addr_set(dev, pd->mac_address);
 	else
-		if (!device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
+		if (device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
 			if (has_acpi_companion(&pdev->dev))
 				bcmgenet_get_hw_addr(priv, dev->dev_addr);
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index db66d4beb28a..77ce81633cdc 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1387,10 +1387,10 @@ static int acpi_get_mac_address(struct device *dev, struct acpi_device *adev,
 				u8 *dst)
 {
 	u8 mac[ETH_ALEN];
-	u8 *addr;
+	int ret;
 
-	addr = fwnode_get_mac_address(acpi_fwnode_handle(adev), mac, ETH_ALEN);
-	if (!addr) {
+	ret = fwnode_get_mac_address(acpi_fwnode_handle(adev), mac, ETH_ALEN);
+	if (ret) {
 		dev_err(dev, "MAC address invalid: %pM\n", mac);
 		return -EINVAL;
 	}
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ab9267225573..8de9c99a18fb 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -182,10 +182,8 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
 	u8 mac[ETH_ALEN];
 	unsigned int m;
 	unsigned int l;
-	void *addr;
 
-	addr = device_get_mac_address(priv->dev, mac, ETH_ALEN);
-	if (addr) {
+	if (!device_get_mac_address(priv->dev, mac, ETH_ALEN)) {
 		eth_hw_addr_set(priv->netdev, mac);
 		dev_info(priv->dev, "Read MAC address %pM from device tree\n",
 			 mac);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 2c4801e49aa1..12b916399ba7 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1212,7 +1212,7 @@ static void hns_init_mac_addr(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 
-	if (!device_get_mac_address(priv->dev, ndev->dev_addr, ETH_ALEN)) {
+	if (device_get_mac_address(priv->dev, ndev->dev_addr, ETH_ALEN)) {
 		eth_hw_addr_random(ndev);
 		dev_warn(priv->dev, "No valid mac, use random mac %pM",
 			 ndev->dev_addr);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 3197526455d9..b84f8b6fe9f4 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6081,7 +6081,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	char hw_mac_addr[ETH_ALEN] = {0};
 	char fw_mac_addr[ETH_ALEN];
 
-	if (fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
+	if (!fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
 		*mac_from = "firmware node";
 		eth_hw_addr_set(dev, fw_mac_addr);
 		return;
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index bf77e8adffbf..fa62311d326a 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1572,7 +1572,7 @@ static int enc28j60_probe(struct spi_device *spi)
 		goto error_irq;
 	}
 
-	if (device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
+	if (!device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
 		eth_hw_addr_set(dev, macaddr);
 	else
 		eth_hw_addr_random(dev);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index fbfabfc5cc51..2e913508fbeb 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -549,7 +549,7 @@ static int emac_probe_resources(struct platform_device *pdev,
 	int ret = 0;
 
 	/* get mac address */
-	if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
+	if (!device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
 		eth_hw_addr_set(netdev, maddr);
 	else
 		eth_hw_addr_random(netdev);
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index c7e56dc0a494..f8dd7fa5f632 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1978,10 +1978,10 @@ static int netsec_register_mdio(struct netsec_priv *priv, u32 phy_addr)
 static int netsec_probe(struct platform_device *pdev)
 {
 	struct resource *mmio_res, *eeprom_res, *irq_res;
-	u8 *mac, macbuf[ETH_ALEN];
 	struct netsec_priv *priv;
 	u32 hw_ver, phy_addr = 0;
 	struct net_device *ndev;
+	u8 macbuf[ETH_ALEN];
 	int ret;
 
 	mmio_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2034,12 +2034,12 @@ static int netsec_probe(struct platform_device *pdev)
 		goto free_ndev;
 	}
 
-	mac = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf));
-	if (mac)
-		eth_hw_addr_set(ndev, mac);
+	ret = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf));
+	if (!ret)
+		eth_hw_addr_set(ndev, macbuf);
 
 	if (priv->eeprom_base &&
-	    (!mac || !is_valid_ether_addr(ndev->dev_addr))) {
+	    (ret || !is_valid_ether_addr(ndev->dev_addr))) {
 		void __iomem *macp = priv->eeprom_base +
 					NETSEC_EEPROM_MAC_ADDRESS;
 
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 39f0758274ae..8299f1cd9175 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -31,9 +31,8 @@ struct fwnode_handle;
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-void *device_get_mac_address(struct device *dev, char *addr, int alen);
-void *fwnode_get_mac_address(struct fwnode_handle *fwnode,
-			     char *addr, int alen);
+int device_get_mac_address(struct device *dev, char *addr, int alen);
+int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen);
 
 u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 9ea45aae04ee..70692f5b514c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -560,15 +560,21 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf)
 }
 EXPORT_SYMBOL(nvmem_get_mac_address);
 
-static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
-				 const char *name, char *addr,
-				 int alen)
+static int fwnode_get_mac_addr(struct fwnode_handle *fwnode,
+			       const char *name, char *addr, int alen)
 {
-	int ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
+	int ret;
 
-	if (ret == 0 && alen == ETH_ALEN && is_valid_ether_addr(addr))
-		return addr;
-	return NULL;
+	if (alen != ETH_ALEN)
+		return -EINVAL;
+
+	ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
+	if (ret)
+		return ret;
+
+	if (!is_valid_ether_addr(addr))
+		return -EINVAL;
+	return 0;
 }
 
 /**
@@ -594,19 +600,14 @@ static void *fwnode_get_mac_addr(struct fwnode_handle *fwnode,
  * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
  * exists but is all zeros.
  */
-void *fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
+int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
 {
-	char *res;
-
-	res = fwnode_get_mac_addr(fwnode, "mac-address", addr, alen);
-	if (res)
-		return res;
-
-	res = fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen);
-	if (res)
-		return res;
+	if (!fwnode_get_mac_addr(fwnode, "mac-address", addr, alen) ||
+	    !fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen) ||
+	    !fwnode_get_mac_addr(fwnode, "address", addr, alen))
+		return 0;
 
-	return fwnode_get_mac_addr(fwnode, "address", addr, alen);
+	return -ENOENT;
 }
 EXPORT_SYMBOL(fwnode_get_mac_address);
 
@@ -616,7 +617,7 @@ EXPORT_SYMBOL(fwnode_get_mac_address);
  * @addr:	Address of buffer to store the MAC in
  * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
  */
-void *device_get_mac_address(struct device *dev, char *addr, int alen)
+int device_get_mac_address(struct device *dev, char *addr, int alen)
 {
 	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
 }
-- 
cgit v1.2.3


From 0a14501ed818ff51eed237bbe5009d0d784e4450 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:06:59 -0700
Subject: eth: fwnode: remove the addr len from mac helpers

All callers pass in ETH_ALEN and the function itself
will return -EINVAL for any other address length.
Just assume it's ETH_ALEN like all other mac address
helpers (nvm, of, platform).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene-v2/main.c          |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_main.c  |  2 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c    |  2 +-
 drivers/net/ethernet/cavium/thunder/thunder_bgx.c |  2 +-
 drivers/net/ethernet/faraday/ftgmac100.c          |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_enet.c     |  2 +-
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c   |  2 +-
 drivers/net/ethernet/microchip/enc28j60.c         |  2 +-
 drivers/net/ethernet/qualcomm/emac/emac.c         |  2 +-
 drivers/net/ethernet/smsc/smsc911x.c              |  2 +-
 drivers/net/ethernet/socionext/netsec.c           |  2 +-
 drivers/net/wireless/ath/ath10k/core.c            |  2 +-
 include/linux/etherdevice.h                       |  4 ++--
 net/ethernet/eth.c                                | 21 ++++++++-------------
 14 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index c7253ecc0fa5..d1ebd153b7a8 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -36,7 +36,7 @@ static int xge_get_resources(struct xge_pdata *pdata)
 		return -ENOMEM;
 	}
 
-	if (device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
+	if (device_get_mac_address(dev, ndev->dev_addr))
 		eth_hw_addr_random(ndev);
 
 	memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 268e099aa5e1..4a5bf13ffae2 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -1731,7 +1731,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 		xgene_get_port_id_acpi(dev, pdata);
 #endif
 
-	if (device_get_mac_address(dev, ndev->dev_addr, ETH_ALEN))
+	if (device_get_mac_address(dev, ndev->dev_addr))
 		eth_hw_addr_random(ndev);
 
 	memcpy(ndev->perm_addr, ndev->dev_addr, ndev->addr_len);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 30c5dcaea802..e61b687d33ba 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -4084,7 +4084,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	if (pd && !IS_ERR_OR_NULL(pd->mac_address))
 		eth_hw_addr_set(dev, pd->mac_address);
 	else
-		if (device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
+		if (device_get_mac_address(&pdev->dev, dev->dev_addr))
 			if (has_acpi_companion(&pdev->dev))
 				bcmgenet_get_hw_addr(priv, dev->dev_addr);
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 77ce81633cdc..574a32f23f96 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -1389,7 +1389,7 @@ static int acpi_get_mac_address(struct device *dev, struct acpi_device *adev,
 	u8 mac[ETH_ALEN];
 	int ret;
 
-	ret = fwnode_get_mac_address(acpi_fwnode_handle(adev), mac, ETH_ALEN);
+	ret = fwnode_get_mac_address(acpi_fwnode_handle(adev), mac);
 	if (ret) {
 		dev_err(dev, "MAC address invalid: %pM\n", mac);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 8de9c99a18fb..86c2986395de 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -183,7 +183,7 @@ static void ftgmac100_initial_mac(struct ftgmac100 *priv)
 	unsigned int m;
 	unsigned int l;
 
-	if (!device_get_mac_address(priv->dev, mac, ETH_ALEN)) {
+	if (!device_get_mac_address(priv->dev, mac)) {
 		eth_hw_addr_set(priv->netdev, mac);
 		dev_info(priv->dev, "Read MAC address %pM from device tree\n",
 			 mac);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 12b916399ba7..1195f64fb161 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1212,7 +1212,7 @@ static void hns_init_mac_addr(struct net_device *ndev)
 {
 	struct hns_nic_priv *priv = netdev_priv(ndev);
 
-	if (device_get_mac_address(priv->dev, ndev->dev_addr, ETH_ALEN)) {
+	if (device_get_mac_address(priv->dev, ndev->dev_addr)) {
 		eth_hw_addr_random(ndev);
 		dev_warn(priv->dev, "No valid mac, use random mac %pM",
 			 ndev->dev_addr);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index b84f8b6fe9f4..ad3be55cce68 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -6081,7 +6081,7 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
 	char hw_mac_addr[ETH_ALEN] = {0};
 	char fw_mac_addr[ETH_ALEN];
 
-	if (!fwnode_get_mac_address(fwnode, fw_mac_addr, ETH_ALEN)) {
+	if (!fwnode_get_mac_address(fwnode, fw_mac_addr)) {
 		*mac_from = "firmware node";
 		eth_hw_addr_set(dev, fw_mac_addr);
 		return;
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index fa62311d326a..cca8aa70cfc9 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1572,7 +1572,7 @@ static int enc28j60_probe(struct spi_device *spi)
 		goto error_irq;
 	}
 
-	if (!device_get_mac_address(&spi->dev, macaddr, sizeof(macaddr)))
+	if (!device_get_mac_address(&spi->dev, macaddr))
 		eth_hw_addr_set(dev, macaddr);
 	else
 		eth_hw_addr_random(dev);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 2e913508fbeb..b1b324f45fe7 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -549,7 +549,7 @@ static int emac_probe_resources(struct platform_device *pdev,
 	int ret = 0;
 
 	/* get mac address */
-	if (!device_get_mac_address(&pdev->dev, maddr, ETH_ALEN))
+	if (!device_get_mac_address(&pdev->dev, maddr))
 		eth_hw_addr_set(netdev, maddr);
 	else
 		eth_hw_addr_random(netdev);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index d47308ace075..fa387510c189 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -2375,7 +2375,7 @@ static int smsc911x_probe_config(struct smsc911x_platform_config *config,
 		phy_interface = PHY_INTERFACE_MODE_NA;
 	config->phy_interface = phy_interface;
 
-	device_get_mac_address(dev, config->mac, ETH_ALEN);
+	device_get_mac_address(dev, config->mac);
 
 	err = device_property_read_u32(dev, "reg-io-width", &width);
 	if (err == -ENXIO)
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index f8dd7fa5f632..7e3dd07ac94e 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -2034,7 +2034,7 @@ static int netsec_probe(struct platform_device *pdev)
 		goto free_ndev;
 	}
 
-	ret = device_get_mac_address(&pdev->dev, macbuf, sizeof(macbuf));
+	ret = device_get_mac_address(&pdev->dev, macbuf);
 	if (!ret)
 		eth_hw_addr_set(ndev, macbuf);
 
diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 2f9be182fbfb..c21e05549f61 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -3224,7 +3224,7 @@ static int ath10k_core_probe_fw(struct ath10k *ar)
 		ath10k_debug_print_board_info(ar);
 	}
 
-	device_get_mac_address(ar->dev, ar->mac_addr, sizeof(ar->mac_addr));
+	device_get_mac_address(ar->dev, ar->mac_addr);
 
 	ret = ath10k_core_init_firmware_features(ar);
 	if (ret) {
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 8299f1cd9175..bb612c7382e3 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -31,8 +31,8 @@ struct fwnode_handle;
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
-int device_get_mac_address(struct device *dev, char *addr, int alen);
-int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen);
+int device_get_mac_address(struct device *dev, char *addr);
+int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr);
 
 u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 70692f5b514c..29447a61d3ec 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -561,14 +561,11 @@ int nvmem_get_mac_address(struct device *dev, void *addrbuf)
 EXPORT_SYMBOL(nvmem_get_mac_address);
 
 static int fwnode_get_mac_addr(struct fwnode_handle *fwnode,
-			       const char *name, char *addr, int alen)
+			       const char *name, char *addr)
 {
 	int ret;
 
-	if (alen != ETH_ALEN)
-		return -EINVAL;
-
-	ret = fwnode_property_read_u8_array(fwnode, name, addr, alen);
+	ret = fwnode_property_read_u8_array(fwnode, name, addr, ETH_ALEN);
 	if (ret)
 		return ret;
 
@@ -581,7 +578,6 @@ static int fwnode_get_mac_addr(struct fwnode_handle *fwnode,
  * fwnode_get_mac_address - Get the MAC from the firmware node
  * @fwnode:	Pointer to the firmware node
  * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
  *
  * Search the firmware node for the best MAC address to use.  'mac-address' is
  * checked first, because that is supposed to contain to "most recent" MAC
@@ -600,11 +596,11 @@ static int fwnode_get_mac_addr(struct fwnode_handle *fwnode,
  * In this case, the real MAC is in 'local-mac-address', and 'mac-address'
  * exists but is all zeros.
  */
-int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr, int alen)
+int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr)
 {
-	if (!fwnode_get_mac_addr(fwnode, "mac-address", addr, alen) ||
-	    !fwnode_get_mac_addr(fwnode, "local-mac-address", addr, alen) ||
-	    !fwnode_get_mac_addr(fwnode, "address", addr, alen))
+	if (!fwnode_get_mac_addr(fwnode, "mac-address", addr) ||
+	    !fwnode_get_mac_addr(fwnode, "local-mac-address", addr) ||
+	    !fwnode_get_mac_addr(fwnode, "address", addr))
 		return 0;
 
 	return -ENOENT;
@@ -615,10 +611,9 @@ EXPORT_SYMBOL(fwnode_get_mac_address);
  * device_get_mac_address - Get the MAC for a given device
  * @dev:	Pointer to the device
  * @addr:	Address of buffer to store the MAC in
- * @alen:	Length of the buffer pointed to by addr, should be ETH_ALEN
  */
-int device_get_mac_address(struct device *dev, char *addr, int alen)
+int device_get_mac_address(struct device *dev, char *addr)
 {
-	return fwnode_get_mac_address(dev_fwnode(dev), addr, alen);
+	return fwnode_get_mac_address(dev_fwnode(dev), addr);
 }
 EXPORT_SYMBOL(device_get_mac_address);
-- 
cgit v1.2.3


From d9eb44904e87c8ad1da0240849dbab638bacb799 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 6 Oct 2021 18:07:00 -0700
Subject: eth: fwnode: add a helper for loading netdev->dev_addr

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. To maintain netdev->dev_addr in this tree we need to make all
the writes to it got through appropriate helpers.

There is a handful of drivers which pass netdev->dev_addr as
the destination buffer to device_get_mac_address(). Add a helper
which takes a dev pointer instead, so it can call an appropriate
helper.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  1 +
 include/linux/property.h    |  1 +
 net/ethernet/eth.c          | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index bb612c7382e3..a8bb64cf4079 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -32,6 +32,7 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
 int device_get_mac_address(struct device *dev, char *addr);
+int device_get_ethdev_address(struct device *dev, struct net_device *netdev);
 int fwnode_get_mac_address(struct fwnode_handle *fwnode, char *addr);
 
 u32 eth_get_headlen(const struct net_device *dev, const void *data, u32 len);
diff --git a/include/linux/property.h b/include/linux/property.h
index 4fb081684255..88fa726a76df 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 
 struct device;
+struct net_device;
 
 enum dev_prop_type {
 	DEV_PROP_U8,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 29447a61d3ec..d7b8fa10fabb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -617,3 +617,23 @@ int device_get_mac_address(struct device *dev, char *addr)
 	return fwnode_get_mac_address(dev_fwnode(dev), addr);
 }
 EXPORT_SYMBOL(device_get_mac_address);
+
+/**
+ * device_get_ethdev_address - Set netdev's MAC address from a given device
+ * @dev:	Pointer to the device
+ * @netdev:	Pointer to netdev to write the address to
+ *
+ * Wrapper around device_get_mac_address() which writes the address
+ * directly to netdev->dev_addr.
+ */
+int device_get_ethdev_address(struct device *dev, struct net_device *netdev)
+{
+	u8 addr[ETH_ALEN];
+	int ret;
+
+	ret = device_get_mac_address(dev, addr);
+	if (!ret)
+		eth_hw_addr_set(netdev, addr);
+	return ret;
+}
+EXPORT_SYMBOL(device_get_ethdev_address);
-- 
cgit v1.2.3


From bdc7ca008e1f5539e891187032cb2cbbc3decb5e Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 7 Oct 2021 14:14:13 +0200
Subject: spi: Remove unused function spi_busnum_to_master()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The last user is gone since commit

	2962db71c703 ("staging/fbtft: Remove fbtft_device")

in 2019.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20211007121415.2401638-3-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 Documentation/spi/spi-summary.rst |  8 --------
 drivers/spi/spi.c                 | 35 -----------------------------------
 include/linux/spi/spi.h           |  2 --
 3 files changed, 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/spi/spi-summary.rst b/Documentation/spi/spi-summary.rst
index d4239025461d..aab5d07cb3d7 100644
--- a/Documentation/spi/spi-summary.rst
+++ b/Documentation/spi/spi-summary.rst
@@ -336,14 +336,6 @@ certainly includes SPI devices hooked up through the card connectors!
 Non-static Configurations
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Developer boards often play by different rules than product boards, and one
-example is the potential need to hotplug SPI devices and/or controllers.
-
-For those cases you might need to use spi_busnum_to_master() to look
-up the spi bus master, and will likely need spi_new_device() to provide the
-board info based on the board that was hotplugged.  Of course, you'd later
-call at least spi_unregister_device() when that board is removed.
-
 When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those
 configurations will also be dynamic.  Fortunately, such devices all support
 basic device identification probes, so they should hotplug normally.
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ff4254dc64af..cc4ac42aa93d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3033,41 +3033,6 @@ int spi_controller_resume(struct spi_controller *ctlr)
 }
 EXPORT_SYMBOL_GPL(spi_controller_resume);
 
-static int __spi_controller_match(struct device *dev, const void *data)
-{
-	struct spi_controller *ctlr;
-	const u16 *bus_num = data;
-
-	ctlr = container_of(dev, struct spi_controller, dev);
-	return ctlr->bus_num == *bus_num;
-}
-
-/**
- * spi_busnum_to_master - look up master associated with bus_num
- * @bus_num: the master's bus number
- * Context: can sleep
- *
- * This call may be used with devices that are registered after
- * arch init time.  It returns a refcounted pointer to the relevant
- * spi_controller (which the caller must release), or NULL if there is
- * no such master registered.
- *
- * Return: the SPI master structure on success, else NULL.
- */
-struct spi_controller *spi_busnum_to_master(u16 bus_num)
-{
-	struct device		*dev;
-	struct spi_controller	*ctlr = NULL;
-
-	dev = class_find_device(&spi_master_class, NULL, &bus_num,
-				__spi_controller_match);
-	if (dev)
-		ctlr = container_of(dev, struct spi_controller, dev);
-	/* reference got in class_find_device */
-	return ctlr;
-}
-EXPORT_SYMBOL_GPL(spi_busnum_to_master);
-
 /*-------------------------------------------------------------------------*/
 
 /* Core methods for SPI resource management */
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 8371bca13729..f8e322a46616 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -760,8 +760,6 @@ extern int devm_spi_register_controller(struct device *dev,
 					struct spi_controller *ctlr);
 extern void spi_unregister_controller(struct spi_controller *ctlr);
 
-extern struct spi_controller *spi_busnum_to_master(u16 busnum);
-
 /*
  * SPI resource management while processing a SPI message
  */
-- 
cgit v1.2.3


From da21fde0fdb393c2fbe0ae0735cc826cd55fd46f Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 7 Oct 2021 14:14:15 +0200
Subject: spi: Make several public functions private to spi.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All these functions have no callers apart from drivers/spi/spi.c. So
drop their declarations in include/linux/spi/spi.h and don't export
them.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20211007121415.2401638-5-u.kleine-koenig@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 41 +++++++++++++-------------------------
 include/linux/spi/spi.h | 53 -------------------------------------------------
 2 files changed, 14 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 397643104576..50591de16e0f 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -285,9 +285,9 @@ static const struct attribute_group *spi_master_groups[] = {
 	NULL,
 };
 
-void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
-				       struct spi_transfer *xfer,
-				       struct spi_controller *ctlr)
+static void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
+					      struct spi_transfer *xfer,
+					      struct spi_controller *ctlr)
 {
 	unsigned long flags;
 	int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1;
@@ -310,7 +310,6 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
 
 	spin_unlock_irqrestore(&stats->lock, flags);
 }
-EXPORT_SYMBOL_GPL(spi_statistics_add_transfer_stats);
 
 /* modalias support makes "modprobe $MODALIAS" new-style hotplug work,
  * and the sysfs version makes coldplug work too.
@@ -501,7 +500,7 @@ static DEFINE_MUTEX(spi_add_lock);
  *
  * Return: a pointer to the new device, or NULL.
  */
-struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
+static struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 {
 	struct spi_device	*spi;
 
@@ -526,7 +525,6 @@ struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
 	device_initialize(&spi->dev);
 	return spi;
 }
-EXPORT_SYMBOL_GPL(spi_alloc_device);
 
 static void spi_dev_set_name(struct spi_device *spi)
 {
@@ -621,7 +619,7 @@ static int __spi_add_device(struct spi_device *spi)
  *
  * Return: 0 on success; negative errno on failure
  */
-int spi_add_device(struct spi_device *spi)
+static int spi_add_device(struct spi_device *spi)
 {
 	struct spi_controller *ctlr = spi->controller;
 	struct device *dev = ctlr->dev.parent;
@@ -642,7 +640,6 @@ int spi_add_device(struct spi_device *spi)
 	mutex_unlock(&spi_add_lock);
 	return status;
 }
-EXPORT_SYMBOL_GPL(spi_add_device);
 
 static int spi_add_device_locked(struct spi_device *spi)
 {
@@ -833,9 +830,8 @@ int spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * This may get enhanced in the future to allocate from a memory pool
  * of the @spi_device or @spi_controller to avoid repeated allocations.
  */
-void *spi_res_alloc(struct spi_device *spi,
-		    spi_res_release_t release,
-		    size_t size, gfp_t gfp)
+static void *spi_res_alloc(struct spi_device *spi, spi_res_release_t release,
+			   size_t size, gfp_t gfp)
 {
 	struct spi_res *sres;
 
@@ -848,14 +844,13 @@ void *spi_res_alloc(struct spi_device *spi,
 
 	return sres->data;
 }
-EXPORT_SYMBOL_GPL(spi_res_alloc);
 
 /**
  * spi_res_free - free an spi resource
  * @res: pointer to the custom data of a resource
  *
  */
-void spi_res_free(void *res)
+static void spi_res_free(void *res)
 {
 	struct spi_res *sres = container_of(res, struct spi_res, data);
 
@@ -865,28 +860,26 @@ void spi_res_free(void *res)
 	WARN_ON(!list_empty(&sres->entry));
 	kfree(sres);
 }
-EXPORT_SYMBOL_GPL(spi_res_free);
 
 /**
  * spi_res_add - add a spi_res to the spi_message
  * @message: the spi message
  * @res:     the spi_resource
  */
-void spi_res_add(struct spi_message *message, void *res)
+static void spi_res_add(struct spi_message *message, void *res)
 {
 	struct spi_res *sres = container_of(res, struct spi_res, data);
 
 	WARN_ON(!list_empty(&sres->entry));
 	list_add_tail(&sres->entry, &message->resources);
 }
-EXPORT_SYMBOL_GPL(spi_res_add);
 
 /**
  * spi_res_release - release all spi resources for this message
  * @ctlr:  the @spi_controller
  * @message: the @spi_message
  */
-void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
+static void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
 {
 	struct spi_res *res, *tmp;
 
@@ -899,7 +892,6 @@ void spi_res_release(struct spi_controller *ctlr, struct spi_message *message)
 		kfree(res);
 	}
 }
-EXPORT_SYMBOL_GPL(spi_res_release);
 
 /*-------------------------------------------------------------------------*/
 
@@ -3157,7 +3149,7 @@ static void __spi_replace_transfers_release(struct spi_controller *ctlr,
  * Returns: pointer to @spi_replaced_transfers,
  *          PTR_ERR(...) in case of errors.
  */
-struct spi_replaced_transfers *spi_replace_transfers(
+static struct spi_replaced_transfers *spi_replace_transfers(
 	struct spi_message *msg,
 	struct spi_transfer *xfer_first,
 	size_t remove,
@@ -3249,7 +3241,6 @@ struct spi_replaced_transfers *spi_replace_transfers(
 
 	return rxfer;
 }
-EXPORT_SYMBOL_GPL(spi_replace_transfers);
 
 static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
 					struct spi_message *msg,
@@ -3799,7 +3790,7 @@ EXPORT_SYMBOL_GPL(spi_async);
  *
  * Return: zero on success, else a negative error code.
  */
-int spi_async_locked(struct spi_device *spi, struct spi_message *message)
+static int spi_async_locked(struct spi_device *spi, struct spi_message *message)
 {
 	struct spi_controller *ctlr = spi->controller;
 	int ret;
@@ -3818,7 +3809,6 @@ int spi_async_locked(struct spi_device *spi, struct spi_message *message)
 	return ret;
 
 }
-EXPORT_SYMBOL_GPL(spi_async_locked);
 
 /*-------------------------------------------------------------------------*/
 
@@ -4076,18 +4066,15 @@ EXPORT_SYMBOL_GPL(spi_write_then_read);
 
 /*-------------------------------------------------------------------------*/
 
-#if IS_ENABLED(CONFIG_OF)
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
 /* must call put_device() when done with returned spi_device device */
-struct spi_device *of_find_spi_device_by_node(struct device_node *node)
+static struct spi_device *of_find_spi_device_by_node(struct device_node *node)
 {
 	struct device *dev = bus_find_device_by_of_node(&spi_bus_type, node);
 
 	return dev ? to_spi_device(dev) : NULL;
 }
-EXPORT_SYMBOL_GPL(of_find_spi_device_by_node);
-#endif /* IS_ENABLED(CONFIG_OF) */
 
-#if IS_ENABLED(CONFIG_OF_DYNAMIC)
 /* the spi controllers are not using spi_bus, so we find it with another way */
 static struct spi_controller *of_find_spi_controller_by_node(struct device_node *node)
 {
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f8e322a46616..29e21d49aafc 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -78,10 +78,6 @@ struct spi_statistics {
 	unsigned long transfers_split_maxsize;
 };
 
-void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
-				       struct spi_transfer *xfer,
-				       struct spi_controller *ctlr);
-
 #define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count)	\
 	do {							\
 		unsigned long flags;				\
@@ -783,15 +779,6 @@ struct spi_res {
 	unsigned long long      data[]; /* guarantee ull alignment */
 };
 
-extern void *spi_res_alloc(struct spi_device *spi,
-			   spi_res_release_t release,
-			   size_t size, gfp_t gfp);
-extern void spi_res_add(struct spi_message *message, void *res);
-extern void spi_res_free(void *res);
-
-extern void spi_res_release(struct spi_controller *ctlr,
-			    struct spi_message *message);
-
 /*---------------------------------------------------------------------------*/
 
 /*
@@ -1109,8 +1096,6 @@ static inline void spi_message_free(struct spi_message *m)
 
 extern int spi_setup(struct spi_device *spi);
 extern int spi_async(struct spi_device *spi, struct spi_message *message);
-extern int spi_async_locked(struct spi_device *spi,
-			    struct spi_message *message);
 extern int spi_slave_abort(struct spi_device *spi);
 
 static inline size_t
@@ -1193,15 +1178,6 @@ struct spi_replaced_transfers {
 	struct spi_transfer inserted_transfers[];
 };
 
-extern struct spi_replaced_transfers *spi_replace_transfers(
-	struct spi_message *msg,
-	struct spi_transfer *xfer_first,
-	size_t remove,
-	size_t insert,
-	spi_replaced_release_t release,
-	size_t extradatasize,
-	gfp_t gfp);
-
 /*---------------------------------------------------------------------------*/
 
 /* SPI transfer transformation methods */
@@ -1473,19 +1449,7 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
  * normally that would be handled by spi_unregister_controller().
- *
- * You can also use spi_alloc_device() and spi_add_device() to use a two
- * stage registration sequence for each spi_device.  This gives the caller
- * some more control over the spi_device structure before it is registered,
- * but requires that caller to initialize fields that would otherwise
- * be defined using the board info.
  */
-extern struct spi_device *
-spi_alloc_device(struct spi_controller *ctlr);
-
-extern int
-spi_add_device(struct spi_device *spi);
-
 extern struct spi_device *
 spi_new_device(struct spi_controller *, struct spi_board_info *);
 
@@ -1500,23 +1464,6 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
 	return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers);
 }
 
-/* OF support code */
-#if IS_ENABLED(CONFIG_OF)
-
-/* must call put_device() when done with returned spi_device device */
-extern struct spi_device *
-of_find_spi_device_by_node(struct device_node *node);
-
-#else
-
-static inline struct spi_device *
-of_find_spi_device_by_node(struct device_node *node)
-{
-	return NULL;
-}
-
-#endif /* IS_ENABLED(CONFIG_OF) */
-
 /* Compatibility layer */
 #define spi_master			spi_controller
 
-- 
cgit v1.2.3


From 424953cf3c6657f1e67e1a2c5d6e3bb518ea4e9a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Sep 2021 09:50:27 +0200
Subject: qcom_scm: hide Kconfig symbol

Now that SCM can be a loadable module, we have to add another
dependency to avoid link failures when ipa or adreno-gpu are
built-in:

aarch64-linux-ld: drivers/net/ipa/ipa_main.o: in function `ipa_probe':
ipa_main.c:(.text+0xfc4): undefined reference to `qcom_scm_is_available'

ld.lld: error: undefined symbol: qcom_scm_is_available
>>> referenced by adreno_gpu.c
>>>               gpu/drm/msm/adreno/adreno_gpu.o:(adreno_zap_shader_load) in archive drivers/built-in.a

This can happen when CONFIG_ARCH_QCOM is disabled and we don't select
QCOM_MDT_LOADER, but some other module selects QCOM_SCM. Ideally we'd
use a similar dependency here to what we have for QCOM_RPROC_COMMON,
but that causes dependency loops from other things selecting QCOM_SCM.

This appears to be an endless problem, so try something different this
time:

 - CONFIG_QCOM_SCM becomes a hidden symbol that nothing 'depends on'
   but that is simply selected by all of its users

 - All the stubs in include/linux/qcom_scm.h can go away

 - arm-smccc.h needs to provide a stub for __arm_smccc_smc() to
   allow compile-testing QCOM_SCM on all architectures.

 - To avoid a circular dependency chain involving RESET_CONTROLLER
   and PINCTRL_SUNXI, drop the 'select RESET_CONTROLLER' statement.
   According to my testing this still builds fine, and the QCOM
   platform selects this symbol already.

Acked-by: Kalle Valo <kvalo@codeaurora.org>
Acked-by: Alex Elder <elder@linaro.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/firmware/Kconfig                   |  5 +--
 drivers/gpu/drm/msm/Kconfig                |  4 +-
 drivers/iommu/Kconfig                      |  3 +-
 drivers/iommu/arm/arm-smmu/Makefile        |  3 +-
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c |  3 +-
 drivers/media/platform/Kconfig             |  2 +-
 drivers/mmc/host/Kconfig                   |  2 +-
 drivers/net/ipa/Kconfig                    |  1 +
 drivers/net/wireless/ath/ath10k/Kconfig    |  2 +-
 drivers/pinctrl/qcom/Kconfig               |  3 +-
 include/linux/arm-smccc.h                  | 10 +++++
 include/linux/qcom_scm.h                   | 71 ------------------------------
 12 files changed, 24 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 220a58cf0a44..cda7d7162cbb 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -203,10 +203,7 @@ config INTEL_STRATIX10_RSU
 	  Say Y here if you want Intel RSU support.
 
 config QCOM_SCM
-	tristate "Qcom SCM driver"
-	depends on ARM || ARM64
-	depends on HAVE_ARM_SMCCC
-	select RESET_CONTROLLER
+	tristate
 
 config QCOM_SCM_DOWNLOAD_MODE_DEFAULT
 	bool "Qualcomm download mode enabled by default"
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index e9c6af78b1d7..3ddf739a6f9b 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -17,7 +17,7 @@ config DRM_MSM
 	select DRM_SCHED
 	select SHMEM
 	select TMPFS
-	select QCOM_SCM if ARCH_QCOM
+	select QCOM_SCM
 	select WANT_DEV_COREDUMP
 	select SND_SOC_HDMI_CODEC if SND_SOC
 	select SYNC_FILE
@@ -55,7 +55,7 @@ config DRM_MSM_GPU_SUDO
 
 config DRM_MSM_HDMI_HDCP
 	bool "Enable HDMI HDCP support in MSM DRM driver"
-	depends on DRM_MSM && QCOM_SCM
+	depends on DRM_MSM
 	default y
 	help
 	  Choose this option to enable HDCP state machine
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 124c41adeca1..c5c71b7ab7e8 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -308,7 +308,6 @@ config APPLE_DART
 config ARM_SMMU
 	tristate "ARM Ltd. System MMU (SMMU) Support"
 	depends on ARM64 || ARM || (COMPILE_TEST && !GENERIC_ATOMIC64)
-	depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU if ARM
@@ -438,7 +437,7 @@ config QCOM_IOMMU
 	# Note: iommu drivers cannot (yet?) be built as modules
 	bool "Qualcomm IOMMU Support"
 	depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64)
-	depends on QCOM_SCM=y
+	select QCOM_SCM
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select ARM_DMA_USE_IOMMU
diff --git a/drivers/iommu/arm/arm-smmu/Makefile b/drivers/iommu/arm/arm-smmu/Makefile
index e240a7bcf310..b0cc01aa20c9 100644
--- a/drivers/iommu/arm/arm-smmu/Makefile
+++ b/drivers/iommu/arm/arm-smmu/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
 obj-$(CONFIG_ARM_SMMU) += arm_smmu.o
-arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o arm-smmu-qcom.o
+arm_smmu-objs += arm-smmu.o arm-smmu-impl.o arm-smmu-nvidia.o
+arm_smmu-$(CONFIG_ARM_SMMU_QCOM) += arm-smmu-qcom.o
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 9f465e146799..2c25cce38060 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -215,7 +215,8 @@ struct arm_smmu_device *arm_smmu_impl_init(struct arm_smmu_device *smmu)
 	    of_device_is_compatible(np, "nvidia,tegra186-smmu"))
 		return nvidia_smmu_impl_init(smmu);
 
-	smmu = qcom_smmu_impl_init(smmu);
+	if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM))
+		smmu = qcom_smmu_impl_init(smmu);
 
 	if (of_device_is_compatible(np, "marvell,ap806-smmu-500"))
 		smmu->impl = &mrvl_mmu500_impl;
diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 157c924686e4..80321e03809a 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig
@@ -565,7 +565,7 @@ config VIDEO_QCOM_VENUS
 	depends on VIDEO_DEV && VIDEO_V4L2 && QCOM_SMEM
 	depends on (ARCH_QCOM && IOMMU_DMA) || COMPILE_TEST
 	select QCOM_MDT_LOADER if ARCH_QCOM
-	select QCOM_SCM if ARCH_QCOM
+	select QCOM_SCM
 	select VIDEOBUF2_DMA_CONTIG
 	select V4L2_MEM2MEM_DEV
 	help
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 71313961cc54..95b3511b0560 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -547,7 +547,7 @@ config MMC_SDHCI_MSM
 	depends on MMC_SDHCI_PLTFM
 	select MMC_SDHCI_IO_ACCESSORS
 	select MMC_CQHCI
-	select QCOM_SCM if MMC_CRYPTO && ARCH_QCOM
+	select QCOM_SCM if MMC_CRYPTO
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
 	  support present in Qualcomm SOCs. The controller supports
diff --git a/drivers/net/ipa/Kconfig b/drivers/net/ipa/Kconfig
index 8f99cfa14680..d037682fb7ad 100644
--- a/drivers/net/ipa/Kconfig
+++ b/drivers/net/ipa/Kconfig
@@ -4,6 +4,7 @@ config QCOM_IPA
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on QCOM_RPROC_COMMON || (QCOM_RPROC_COMMON=n && COMPILE_TEST)
 	select QCOM_MDT_LOADER if ARCH_QCOM
+	select QCOM_SCM
 	select QCOM_QMI_HELPERS
 	help
 	  Choose Y or M here to include support for the Qualcomm
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index 741289e385d5..ca007b800f75 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -44,7 +44,7 @@ config ATH10K_SNOC
 	tristate "Qualcomm ath10k SNOC support"
 	depends on ATH10K
 	depends on ARCH_QCOM || COMPILE_TEST
-	depends on QCOM_SCM || !QCOM_SCM #if QCOM_SCM=m this can't be =y
+	select QCOM_SCM
 	select QCOM_QMI_HELPERS
 	help
 	  This module adds support for integrated WCN3990 chip connected
diff --git a/drivers/pinctrl/qcom/Kconfig b/drivers/pinctrl/qcom/Kconfig
index 32ea2a8ec02b..5ff4207df66e 100644
--- a/drivers/pinctrl/qcom/Kconfig
+++ b/drivers/pinctrl/qcom/Kconfig
@@ -3,7 +3,8 @@ if (ARCH_QCOM || COMPILE_TEST)
 
 config PINCTRL_MSM
 	tristate "Qualcomm core pin controller driver"
-	depends on GPIOLIB && (QCOM_SCM || !QCOM_SCM) #if QCOM_SCM=m this can't be =y
+	depends on GPIOLIB
+	select QCOM_SCM
 	select PINMUX
 	select PINCONF
 	select GENERIC_PINCONF
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 7d1cabe15262..63ccb5252190 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -321,10 +321,20 @@ asmlinkage unsigned long __arm_smccc_sve_check(unsigned long x0);
  * from register 0 to 3 on return from the SMC instruction.  An optional
  * quirk structure provides vendor specific behavior.
  */
+#ifdef CONFIG_HAVE_ARM_SMCCC
 asmlinkage void __arm_smccc_smc(unsigned long a0, unsigned long a1,
 			unsigned long a2, unsigned long a3, unsigned long a4,
 			unsigned long a5, unsigned long a6, unsigned long a7,
 			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk);
+#else
+static inline void __arm_smccc_smc(unsigned long a0, unsigned long a1,
+			unsigned long a2, unsigned long a3, unsigned long a4,
+			unsigned long a5, unsigned long a6, unsigned long a7,
+			struct arm_smccc_res *res, struct arm_smccc_quirk *quirk)
+{
+	*res = (struct arm_smccc_res){};
+}
+#endif
 
 /**
  * __arm_smccc_hvc() - make HVC calls
diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index c0475d1c9885..81cad9e1e412 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -61,7 +61,6 @@ enum qcom_scm_ice_cipher {
 #define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE)
 #define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC)
 
-#if IS_ENABLED(CONFIG_QCOM_SCM)
 extern bool qcom_scm_is_available(void);
 
 extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus);
@@ -115,74 +114,4 @@ extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
 extern int qcom_scm_lmh_profile_change(u32 profile_id);
 extern bool qcom_scm_lmh_dcvsh_available(void);
 
-#else
-
-#include <linux/errno.h>
-
-static inline bool qcom_scm_is_available(void) { return false; }
-
-static inline int qcom_scm_set_cold_boot_addr(void *entry,
-		const cpumask_t *cpus) { return -ENODEV; }
-static inline int qcom_scm_set_warm_boot_addr(void *entry,
-		const cpumask_t *cpus) { return -ENODEV; }
-static inline void qcom_scm_cpu_power_down(u32 flags) {}
-static inline u32 qcom_scm_set_remote_state(u32 state,u32 id)
-		{ return -ENODEV; }
-
-static inline int qcom_scm_pas_init_image(u32 peripheral, const void *metadata,
-		size_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr,
-		phys_addr_t size) { return -ENODEV; }
-static inline int qcom_scm_pas_auth_and_reset(u32 peripheral)
-		{ return -ENODEV; }
-static inline int qcom_scm_pas_shutdown(u32 peripheral) { return -ENODEV; }
-static inline bool qcom_scm_pas_supported(u32 peripheral) { return false; }
-
-static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val)
-		{ return -ENODEV; }
-static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val)
-		{ return -ENODEV; }
-
-static inline bool qcom_scm_restore_sec_cfg_available(void) { return false; }
-static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare)
-		{ return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size)
-		{ return -ENODEV; }
-static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare)
-		{ return -ENODEV; }
-extern inline int qcom_scm_mem_protect_video_var(u32 cp_start, u32 cp_size,
-						 u32 cp_nonpixel_start,
-						 u32 cp_nonpixel_size)
-		{ return -ENODEV; }
-static inline int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz,
-		unsigned int *src, const struct qcom_scm_vmperm *newvm,
-		unsigned int dest_cnt) { return -ENODEV; }
-
-static inline bool qcom_scm_ocmem_lock_available(void) { return false; }
-static inline int qcom_scm_ocmem_lock(enum qcom_scm_ocmem_client id, u32 offset,
-		u32 size, u32 mode) { return -ENODEV; }
-static inline int qcom_scm_ocmem_unlock(enum qcom_scm_ocmem_client id,
-		u32 offset, u32 size) { return -ENODEV; }
-
-static inline bool qcom_scm_ice_available(void) { return false; }
-static inline int qcom_scm_ice_invalidate_key(u32 index) { return -ENODEV; }
-static inline int qcom_scm_ice_set_key(u32 index, const u8 *key, u32 key_size,
-				       enum qcom_scm_ice_cipher cipher,
-				       u32 data_unit_size) { return -ENODEV; }
-
-static inline bool qcom_scm_hdcp_available(void) { return false; }
-static inline int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt,
-		u32 *resp) { return -ENODEV; }
-
-static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
-		{ return -ENODEV; }
-
-static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
-				     u64 limit_node, u32 node_id, u64 version)
-		{ return -ENODEV; }
-
-static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
-
-static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
-#endif
 #endif
-- 
cgit v1.2.3


From ba882580f211dbe4fee7f010c9d38dd879db83a6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 7 Oct 2021 11:18:46 -0700
Subject: eth: platform: add a helper for loading netdev->dev_addr

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. To maintain netdev->dev_addr in this tree we need to make all
the writes to it got through appropriate helpers.

There is a handful of drivers which pass netdev->dev_addr as
the destination buffer to eth_platform_get_mac_address().
Add a helper which takes a dev pointer instead, so it can call
an appropriate helper.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  1 +
 net/ethernet/eth.c          | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index e75116f48cd1..3cf546d2ffd1 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,6 +29,7 @@ struct device;
 struct fwnode_handle;
 
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
+int platform_get_ethdev_address(struct device *dev, struct net_device *netdev);
 unsigned char *arch_get_platform_mac_address(void);
 int nvmem_get_mac_address(struct device *dev, void *addrbuf);
 int device_get_mac_address(struct device *dev, char *addr);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 182de70ac258..c7d9e08107cb 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -523,6 +523,26 @@ int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr)
 }
 EXPORT_SYMBOL(eth_platform_get_mac_address);
 
+/**
+ * platform_get_ethdev_address - Set netdev's MAC address from a given device
+ * @dev:	Pointer to the device
+ * @netdev:	Pointer to netdev to write the address to
+ *
+ * Wrapper around eth_platform_get_mac_address() which writes the address
+ * directly to netdev->dev_addr.
+ */
+int platform_get_ethdev_address(struct device *dev, struct net_device *netdev)
+{
+	u8 addr[ETH_ALEN] __aligned(2);
+	int ret;
+
+	ret = eth_platform_get_mac_address(dev, addr);
+	if (!ret)
+		eth_hw_addr_set(netdev, addr);
+	return ret;
+}
+EXPORT_SYMBOL(platform_get_ethdev_address);
+
 /**
  * nvmem_get_mac_address - Obtain the MAC address from an nvmem cell named
  * 'mac-address' associated with given device.
-- 
cgit v1.2.3


From 75ea27d0d62281c31ee259c872dfdeb072cf5e39 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Thu, 7 Oct 2021 18:16:50 +0200
Subject: net: introduce a function to check if a netdev name is in use

__dev_get_by_name is currently used to either retrieve a net device
reference using its name or to check if a name is already used by a
registered net device (per ns). In the later case there is no need to
return a reference to a net device.

Introduce a new helper, netdev_name_in_use, to check if a name is
currently used by a registered net device without leaking a reference
the corresponding net device. This helper uses netdev_name_node_lookup
instead of __dev_get_by_name as we don't need the extra logic retrieving
a reference to the corresponding net device.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 14 ++++++++++----
 2 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d79163208dfd..15f4a658e436 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2955,6 +2955,7 @@ struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 struct net_device *dev_get_by_name(struct net *net, const char *name);
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
+bool netdev_name_in_use(struct net *net, const char *name);
 int dev_alloc_name(struct net_device *dev, const char *name);
 int dev_open(struct net_device *dev, struct netlink_ext_ack *extack);
 void dev_close(struct net_device *dev);
diff --git a/net/core/dev.c b/net/core/dev.c
index 16ab09b6a7f8..1594cd2955ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -303,6 +303,12 @@ static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
 	return NULL;
 }
 
+bool netdev_name_in_use(struct net *net, const char *name)
+{
+	return netdev_name_node_lookup(net, name);
+}
+EXPORT_SYMBOL(netdev_name_in_use);
+
 int netdev_name_node_alt_create(struct net_device *dev, const char *name)
 {
 	struct netdev_name_node *name_node;
@@ -1133,7 +1139,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 	}
 
 	snprintf(buf, IFNAMSIZ, name, i);
-	if (!__dev_get_by_name(net, buf))
+	if (!netdev_name_in_use(net, buf))
 		return i;
 
 	/* It is possible to run out of possible slots
@@ -1187,7 +1193,7 @@ static int dev_get_valid_name(struct net *net, struct net_device *dev,
 
 	if (strchr(name, '%'))
 		return dev_alloc_name_ns(net, dev, name);
-	else if (__dev_get_by_name(net, name))
+	else if (netdev_name_in_use(net, name))
 		return -EEXIST;
 	else if (dev->name != name)
 		strlcpy(dev->name, name, IFNAMSIZ);
@@ -11153,7 +11159,7 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 	 * we can use it in the destination network namespace.
 	 */
 	err = -EEXIST;
-	if (__dev_get_by_name(net, dev->name)) {
+	if (netdev_name_in_use(net, dev->name)) {
 		/* We get here if we can't use the current device name */
 		if (!pat)
 			goto out;
@@ -11506,7 +11512,7 @@ static void __net_exit default_device_exit(struct net *net)
 
 		/* Push remaining network devices to init_net */
 		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
-		if (__dev_get_by_name(&init_net, fb_name))
+		if (netdev_name_in_use(&init_net, fb_name))
 			snprintf(fb_name, IFNAMSIZ, "dev%%d");
 		err = dev_change_net_namespace(dev, &init_net, fb_name);
 		if (err) {
-- 
cgit v1.2.3


From 0258b5fd7c7124b87e185a1a9322d2c66b1876b7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 22 Sep 2021 11:24:02 -0500
Subject: coredump: Limit coredumps to a single thread group

Today when a signal is delivered with a handler of SIG_DFL whose
default behavior is to generate a core dump not only that process but
every process that shares the mm is killed.

In the case of vfork this looks like a real world problem.  Consider
the following well defined sequence.

	if (vfork() == 0) {
		execve(...);
		_exit(EXIT_FAILURE);
	}

If a signal that generates a core dump is received after vfork but
before the execve changes the mm the process that called vfork will
also be killed (as the mm is shared).

Similarly if the execve fails after the point of no return the kernel
delivers SIGSEGV which will kill both the exec'ing process and because
the mm is shared the process that called vfork as well.

As far as I can tell this behavior is a violation of people's
reasonable expectations, POSIX, and is unnecessarily fragile when the
system is low on memory.

Solve this by making a userspace visible change to only kill a single
process/thread group.  This is possible because Jann Horn recently
modified[1] the coredump code so that the mm can safely be modified
while the coredump is happening.  With LinuxThreads long gone I don't
expect anyone to have a notice this behavior change in practice.

To accomplish this move the core_state pointer from mm_struct to
signal_struct, which allows different thread groups to coredump
simultatenously.

In zap_threads remove the work to kill anything except for the current
thread group.

v2: Remove core_state from the VM_BUG_ON_MM print to fix
    compile failure when CONFIG_DEBUG_VM is enabled.
    Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>

[1] a07279c9a8cd ("binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot")
Fixes: d89f3847def4 ("[PATCH] thread-aware coredumps, 2.5.43-C3")
History-tree: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Link: https://lkml.kernel.org/r/87y27mvnke.fsf@disp2133
Link: https://lkml.kernel.org/r/20211007144701.67592574@canb.auug.org.au
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/binfmt_elf.c              |  4 +--
 fs/binfmt_elf_fdpic.c        |  2 +-
 fs/coredump.c                | 84 +++++---------------------------------------
 fs/proc/array.c              |  6 ++--
 include/linux/mm_types.h     | 13 -------
 include/linux/sched/signal.h | 13 +++++++
 kernel/exit.c                | 13 +++----
 kernel/fork.c                |  1 -
 mm/debug.c                   |  4 +--
 9 files changed, 34 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 69d900a8473d..796e5327ee7d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1834,7 +1834,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	/*
 	 * Allocate a structure for each thread.
 	 */
-	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
+	for (ct = &dump_task->signal->core_state->dumper; ct; ct = ct->next) {
 		t = kzalloc(offsetof(struct elf_thread_core_info,
 				     notes[info->thread_notes]),
 			    GFP_KERNEL);
@@ -2024,7 +2024,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
 	if (!elf_note_info_init(info))
 		return 0;
 
-	for (ct = current->mm->core_state->dumper.next;
+	for (ct = current->signal->core_state->dumper.next;
 					ct; ct = ct->next) {
 		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
 		if (!ets)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 6d8fd6030cbb..c6f588dc4a9d 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1494,7 +1494,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
 		goto end_coredump;
 
-	for (ct = current->mm->core_state->dumper.next;
+	for (ct = current->signal->core_state->dumper.next;
 					ct; ct = ct->next) {
 		tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
 					     ct->task, &thread_status_size);
diff --git a/fs/coredump.c b/fs/coredump.c
index d576287fb88b..a6b3c196cdef 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -369,99 +369,34 @@ static int zap_process(struct task_struct *start, int exit_code, int flags)
 	return nr;
 }
 
-static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+static int zap_threads(struct task_struct *tsk,
 			struct core_state *core_state, int exit_code)
 {
-	struct task_struct *g, *p;
-	unsigned long flags;
 	int nr = -EAGAIN;
 
 	spin_lock_irq(&tsk->sighand->siglock);
 	if (!signal_group_exit(tsk->signal)) {
-		mm->core_state = core_state;
+		tsk->signal->core_state = core_state;
 		tsk->signal->group_exit_task = tsk;
 		nr = zap_process(tsk, exit_code, 0);
 		clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
+		tsk->flags |= PF_DUMPCORE;
+		atomic_set(&core_state->nr_threads, nr);
 	}
 	spin_unlock_irq(&tsk->sighand->siglock);
-	if (unlikely(nr < 0))
-		return nr;
-
-	tsk->flags |= PF_DUMPCORE;
-	if (atomic_read(&mm->mm_users) == nr + 1)
-		goto done;
-	/*
-	 * We should find and kill all tasks which use this mm, and we should
-	 * count them correctly into ->nr_threads. We don't take tasklist
-	 * lock, but this is safe wrt:
-	 *
-	 * fork:
-	 *	None of sub-threads can fork after zap_process(leader). All
-	 *	processes which were created before this point should be
-	 *	visible to zap_threads() because copy_process() adds the new
-	 *	process to the tail of init_task.tasks list, and lock/unlock
-	 *	of ->siglock provides a memory barrier.
-	 *
-	 * do_exit:
-	 *	The caller holds mm->mmap_lock. This means that the task which
-	 *	uses this mm can't pass coredump_task_exit(), so it can't exit
-	 *	or clear its ->mm.
-	 *
-	 * de_thread:
-	 *	It does list_replace_rcu(&leader->tasks, &current->tasks),
-	 *	we must see either old or new leader, this does not matter.
-	 *	However, it can change p->sighand, so lock_task_sighand(p)
-	 *	must be used. Since p->mm != NULL and we hold ->mmap_lock
-	 *	it can't fail.
-	 *
-	 *	Note also that "g" can be the old leader with ->mm == NULL
-	 *	and already unhashed and thus removed from ->thread_group.
-	 *	This is OK, __unhash_process()->list_del_rcu() does not
-	 *	clear the ->next pointer, we will find the new leader via
-	 *	next_thread().
-	 */
-	rcu_read_lock();
-	for_each_process(g) {
-		if (g == tsk->group_leader)
-			continue;
-		if (g->flags & PF_KTHREAD)
-			continue;
-
-		for_each_thread(g, p) {
-			if (unlikely(!p->mm))
-				continue;
-			if (unlikely(p->mm == mm)) {
-				lock_task_sighand(p, &flags);
-				nr += zap_process(p, exit_code,
-							SIGNAL_GROUP_EXIT);
-				unlock_task_sighand(p, &flags);
-			}
-			break;
-		}
-	}
-	rcu_read_unlock();
-done:
-	atomic_set(&core_state->nr_threads, nr);
 	return nr;
 }
 
 static int coredump_wait(int exit_code, struct core_state *core_state)
 {
 	struct task_struct *tsk = current;
-	struct mm_struct *mm = tsk->mm;
 	int core_waiters = -EBUSY;
 
 	init_completion(&core_state->startup);
 	core_state->dumper.task = tsk;
 	core_state->dumper.next = NULL;
 
-	if (mmap_write_lock_killable(mm))
-		return -EINTR;
-
-	if (!mm->core_state)
-		core_waiters = zap_threads(tsk, mm, core_state, exit_code);
-	mmap_write_unlock(mm);
-
+	core_waiters = zap_threads(tsk, core_state, exit_code);
 	if (core_waiters > 0) {
 		struct core_thread *ptr;
 
@@ -483,7 +418,7 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
 	return core_waiters;
 }
 
-static void coredump_finish(struct mm_struct *mm, bool core_dumped)
+static void coredump_finish(bool core_dumped)
 {
 	struct core_thread *curr, *next;
 	struct task_struct *task;
@@ -493,9 +428,10 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
 		current->signal->group_exit_code |= 0x80;
 	current->signal->group_exit_task = NULL;
 	current->signal->flags = SIGNAL_GROUP_EXIT;
+	next = current->signal->core_state->dumper.next;
+	current->signal->core_state = NULL;
 	spin_unlock_irq(&current->sighand->siglock);
 
-	next = mm->core_state->dumper.next;
 	while ((curr = next) != NULL) {
 		next = curr->next;
 		task = curr->task;
@@ -507,8 +443,6 @@ static void coredump_finish(struct mm_struct *mm, bool core_dumped)
 		curr->task = NULL;
 		wake_up_process(task);
 	}
-
-	mm->core_state = NULL;
 }
 
 static bool dump_interrupted(void)
@@ -839,7 +773,7 @@ fail_dropcount:
 fail_unlock:
 	kfree(argv);
 	kfree(cn.corename);
-	coredump_finish(mm, core_dumped);
+	coredump_finish(core_dumped);
 	revert_creds(old_cred);
 fail_creds:
 	put_cred(cred);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 49be8c8ef555..520c51be1e57 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -408,9 +408,9 @@ static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
 		   cpumask_pr_args(&task->cpus_mask));
 }
 
-static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
+static inline void task_core_dumping(struct seq_file *m, struct task_struct *task)
 {
-	seq_put_decimal_ull(m, "CoreDumping:\t", !!mm->core_state);
+	seq_put_decimal_ull(m, "CoreDumping:\t", !!task->signal->core_state);
 	seq_putc(m, '\n');
 }
 
@@ -436,7 +436,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 
 	if (mm) {
 		task_mem(m, mm);
-		task_core_dumping(m, mm);
+		task_core_dumping(m, task);
 		task_thp_status(m, mm);
 		mmput(mm);
 	}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..1039f6ae922c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -387,17 +387,6 @@ struct vm_area_struct {
 	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
 } __randomize_layout;
 
-struct core_thread {
-	struct task_struct *task;
-	struct core_thread *next;
-};
-
-struct core_state {
-	atomic_t nr_threads;
-	struct core_thread dumper;
-	struct completion startup;
-};
-
 struct kioctx_table;
 struct mm_struct {
 	struct {
@@ -518,8 +507,6 @@ struct mm_struct {
 
 		unsigned long flags; /* Must use atomic bitops to access */
 
-		struct core_state *core_state; /* coredumping support */
-
 #ifdef CONFIG_AIO
 		spinlock_t			ioctx_lock;
 		struct kioctx_table __rcu	*ioctx_table;
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e5f4ce622ee6..a8fe2a593a3a 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -72,6 +72,17 @@ struct multiprocess_signals {
 	struct hlist_node node;
 };
 
+struct core_thread {
+	struct task_struct *task;
+	struct core_thread *next;
+};
+
+struct core_state {
+	atomic_t nr_threads;
+	struct core_thread dumper;
+	struct completion startup;
+};
+
 /*
  * NOTE! "signal_struct" does not have its own
  * locking, because a shared signal_struct always
@@ -110,6 +121,8 @@ struct signal_struct {
 	int			group_stop_count;
 	unsigned int		flags; /* see SIGNAL_* flags below */
 
+	struct core_state *core_state; /* coredumping support */
+
 	/*
 	 * PR_SET_CHILD_SUBREAPER marks a process, like a service
 	 * manager, to re-parent orphan (double-forking) child processes
diff --git a/kernel/exit.c b/kernel/exit.c
index 774e6b5061b8..2b355e926c13 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -342,23 +342,18 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
 static void coredump_task_exit(struct task_struct *tsk)
 {
 	struct core_state *core_state;
-	struct mm_struct *mm;
-
-	mm = tsk->mm;
-	if (!mm)
-		return;
 
 	/*
 	 * Serialize with any possible pending coredump.
-	 * We must hold mmap_lock around checking core_state
+	 * We must hold siglock around checking core_state
 	 * and setting PF_POSTCOREDUMP.  The core-inducing thread
 	 * will increment ->nr_threads for each thread in the
 	 * group without PF_POSTCOREDUMP set.
 	 */
-	mmap_read_lock(mm);
+	spin_lock_irq(&tsk->sighand->siglock);
 	tsk->flags |= PF_POSTCOREDUMP;
-	core_state = mm->core_state;
-	mmap_read_unlock(mm);
+	core_state = tsk->signal->core_state;
+	spin_unlock_irq(&tsk->sighand->siglock);
 	if (core_state) {
 		struct core_thread self;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9bd9f2da9e41..c8adb76982f7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1044,7 +1044,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	seqcount_init(&mm->write_protect_seq);
 	mmap_init_lock(mm);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->core_state = NULL;
 	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
diff --git a/mm/debug.c b/mm/debug.c
index e73fe0a8ec3d..aa5fe4d5c4b4 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -214,7 +214,7 @@ void dump_mm(const struct mm_struct *mm)
 		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
 		"start_brk %lx brk %lx start_stack %lx\n"
 		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
-		"binfmt %px flags %lx core_state %px\n"
+		"binfmt %px flags %lx\n"
 #ifdef CONFIG_AIO
 		"ioctx_table %px\n"
 #endif
@@ -246,7 +246,7 @@ void dump_mm(const struct mm_struct *mm)
 		mm->start_code, mm->end_code, mm->start_data, mm->end_data,
 		mm->start_brk, mm->brk, mm->start_stack,
 		mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
-		mm->binfmt, mm->flags, mm->core_state,
+		mm->binfmt, mm->flags,
 #ifdef CONFIG_AIO
 		mm->ioctx_table,
 #endif
-- 
cgit v1.2.3


From 5bded8259ee3815a91791462dfb3312480779c3d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 7 Oct 2021 19:47:11 +0300
Subject: net: dsa: mv88e6xxx: isolate the ATU databases of standalone and
 bridged ports

Similar to commit 6087175b7991 ("net: dsa: mt7530: use independent VLAN
learning on VLAN-unaware bridges"), software forwarding between an
unoffloaded LAG port (a bonding interface with an unsupported policy)
and a mv88e6xxx user port directly under a bridge is broken.

We adopt the same strategy, which is to make the standalone ports not
find any ATU entry learned on a bridge port.

Theory: the mv88e6xxx ATU is looked up by FID and MAC address. There are
as many FIDs as VIDs (4096). The FID is derived from the VID when
possible (the VTU maps a VID to a FID), with a fallback to the port
based default FID value when not (802.1Q Mode is disabled on the port,
or the classified VID isn't present in the VTU).

The mv88e6xxx driver makes the following use of FIDs and VIDs:

- the port's DefaultVID (to which untagged & pvid-tagged packets get
  classified) is 0 and is absent from the VTU, so this kind of packets is
  processed in FID 0, the default FID assigned by mv88e6xxx_setup_port.

- every time a bridge VLAN is created, mv88e6xxx_port_vlan_join() ->
  mv88e6xxx_atu_new() associates a FID with that VID which increases
  linearly starting from 1. Like this:

  bridge vlan add dev lan0 vid 100 # FID 1
  bridge vlan add dev lan1 vid 100 # still FID 1
  bridge vlan add dev lan2 vid 1024 # FID 2

The FID allocation made by the driver is sub-optimal for the following
reasons:

(a) A standalone port has a DefaultPVID of 0 and a default FID of 0 too.
    A VLAN-unaware bridged port has a DefaultPVID of 0 and a default FID
    of 0 too. The difference is that the bridged ports may learn ATU
    entries, while the standalone port has the requirement that it must
    not, and must not find them either. Standalone ports must not use
    the same FID as ports belonging to a bridge. All standalone ports
    can use the same FID, since the ATU will never have an entry in
    that FID.

(b) Multiple VLAN-unaware bridges will all use a DefaultPVID of 0 and a
    default FID of 0 on all their ports. The FDBs will not be isolated
    between these bridges. Every VLAN-unaware bridge must use the same
    FID on all its ports, different from the FID of other bridge ports.

(c) Each bridge VLAN uses a unique FID which is useful for Independent
    VLAN Learning, but the same VLAN ID on multiple VLAN-aware bridges
    will result in the same FID being used by mv88e6xxx_atu_new().
    The correct behavior is for VLAN 1 in br0 to have a different FID
    compared to VLAN 1 in br1.

This patch cannot fix all the above. Traditionally the DSA framework did
not care about this, and the reality is that DSA core involvement is
needed for the aforementioned issues to be solved. The only thing we can
solve here is an issue which does not require API changes, and that is
issue (a), aka use a different FID for standalone ports vs ports under
VLAN-unaware bridges.

The first step is deciding what VID and FID to use for standalone ports,
and what VID and FID for bridged ports. The 0/0 pair for standalone
ports is what they used up till now, let's keep using that. For bridged
ports, there are 2 cases:

- VLAN-aware ports will never end up using the port default FID, because
  packets will always be classified to a VID in the VTU or dropped
  otherwise. The FID is the one associated with the VID in the VTU.

- On VLAN-unaware ports, we _could_ leave their DefaultVID (pvid) at
  zero (just as in the case of standalone ports), and just change the
  port's default FID from 0 to a different number (say 1).

However, Tobias points out that there is one more requirement to cater to:
cross-chip bridging. The Marvell DSA header does not carry the FID in
it, only the VID. So once a packet crosses a DSA link, if it has a VID
of zero it will get classified to the default FID of that cascade port.
Relying on a port default FID for upstream cascade ports results in
contradictions: a default FID of 0 breaks ATU isolation of bridged ports
on the downstream switch, a default FID of 1 breaks standalone ports on
the downstream switch.

So not only must standalone ports have different FIDs compared to
bridged ports, they must also have different DefaultVID values.
IEEE 802.1Q defines two reserved VID values: 0 and 4095. So we simply
choose 4095 as the DefaultVID of ports belonging to VLAN-unaware
bridges, and VID 4095 maps to FID 1.

For the xmit operation to look up the same ATU database, we need to put
VID 4095 in DSA tags sent to ports belonging to VLAN-unaware bridges
too. All shared ports are configured to map this VID to the bridging
FID, because they are members of that VLAN in the VTU. Shared ports
don't need to have 802.1QMode enabled in any way, they always parse the
VID from the DSA header, they don't need to look at the 802.1Q header.

We install VID 4095 to the VTU in mv88e6xxx_setup_port(), with the
mention that mv88e6xxx_vtu_setup() which was located right below that
call was flushing the VTU so those entries wouldn't be preserved.
So we need to relocate the VTU flushing prior to the port initialization
during ->setup(). Also note that this is why it is safe to assume that
VID 4095 will get associated with FID 1: the user ports haven't been
created, so there is no avenue for the user to create a bridge VLAN
which could otherwise race with the creation of another FID which would
otherwise use up the non-reserved FID value of 1.

[ Currently mv88e6xxx_port_vlan_join() doesn't have the option of
  specifying a preferred FID, it always calls mv88e6xxx_atu_new(). ]

mv88e6xxx_port_db_load_purge() is the function to access the ATU for
FDB/MDB entries, and it used to determine the FID to use for
VLAN-unaware FDB entries (VID=0) using mv88e6xxx_port_get_fid().
But the driver only called mv88e6xxx_port_set_fid() once, during probe,
so no surprises, the port FID was always 0, the call to get_fid() was
redundant. As much as I would have wanted to not touch that code, the
logic is broken when we add a new FID which is not the port-based
default. Now the port-based default FID only corresponds to standalone
ports, and FDB/MDB entries belong to the bridging service. So while in
the future, when the DSA API will support FDB isolation, we will have to
figure out the FID based on the bridge number, for now there's a single
bridging FID, so hardcode that.

Lastly, the tagger needs to check, when it is transmitting a VLAN
untagged skb, whether it is sending it towards a bridged or a standalone
port. When we see it is bridged we assume the bridge is VLAN-unaware.
Not because it cannot be VLAN-aware but:

- if we are transmitting from a VLAN-aware bridge we are likely doing so
  using TX forwarding offload. That code path guarantees that skbs have
  a vlan hwaccel tag in them, so we would not enter the "else" branch
  of the "if (skb->protocol == htons(ETH_P_8021Q))" condition.

- if we are transmitting on behalf of a VLAN-aware bridge but with no TX
  forwarding offload (no PVT support, out of space in the PVT, whatever),
  we would indeed be transmitting with VLAN 4095 instead of the bridge
  device's pvid. However we would be injecting a "From CPU" frame, and
  the switch won't learn from that - it only learns from "Forward" frames.
  So it is inconsequential for address learning. And VLAN 4095 is
  absolutely enough for the frame to exit the switch, since we never
  remove that VLAN from any port.

Fixes: 57e661aae6a8 ("net: dsa: mv88e6xxx: Link aggregation support")
Reported-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS                      |  1 +
 drivers/net/dsa/mv88e6xxx/chip.c | 67 ++++++++++++++++++++++++++++++++--------
 drivers/net/dsa/mv88e6xxx/chip.h |  3 ++
 include/linux/dsa/mv88e6xxx.h    | 13 ++++++++
 net/dsa/tag_dsa.c                | 12 +++++--
 5 files changed, 80 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/dsa/mv88e6xxx.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a4a0c2baaf27..17f652b2f653 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11153,6 +11153,7 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/net/dsa/marvell.txt
 F:	Documentation/networking/devlink/mv88e6xxx.rst
 F:	drivers/net/dsa/mv88e6xxx/
+F:	include/linux/dsa/mv88e6xxx.h
 F:	include/linux/platform_data/mv88e6xxx.h
 
 MARVELL ARMADA 3700 PHY DRIVERS
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d672112afffd..d7b29792732b 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -12,6 +12,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/delay.h>
+#include <linux/dsa/mv88e6xxx.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/if_bridge.h>
@@ -1681,13 +1682,17 @@ static int mv88e6xxx_port_commit_pvid(struct mv88e6xxx_chip *chip, int port)
 {
 	struct dsa_port *dp = dsa_to_port(chip->ds, port);
 	struct mv88e6xxx_port *p = &chip->ports[port];
+	u16 pvid = MV88E6XXX_VID_STANDALONE;
 	bool drop_untagged = false;
-	u16 pvid = 0;
 	int err;
 
-	if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) {
-		pvid = p->bridge_pvid.vid;
-		drop_untagged = !p->bridge_pvid.valid;
+	if (dp->bridge_dev) {
+		if (br_vlan_enabled(dp->bridge_dev)) {
+			pvid = p->bridge_pvid.vid;
+			drop_untagged = !p->bridge_pvid.valid;
+		} else {
+			pvid = MV88E6XXX_VID_BRIDGED;
+		}
 	}
 
 	err = mv88e6xxx_port_set_pvid(chip, port, pvid);
@@ -1754,11 +1759,15 @@ static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port,
 	u16 fid;
 	int err;
 
-	/* Null VLAN ID corresponds to the port private database */
+	/* Ports have two private address databases: one for when the port is
+	 * standalone and one for when the port is under a bridge and the
+	 * 802.1Q mode is disabled. When the port is standalone, DSA wants its
+	 * address database to remain 100% empty, so we never load an ATU entry
+	 * into a standalone port's database. Therefore, translate the null
+	 * VLAN ID into the port's database used for VLAN-unaware bridging.
+	 */
 	if (vid == 0) {
-		err = mv88e6xxx_port_get_fid(chip, port, &fid);
-		if (err)
-			return err;
+		fid = MV88E6XXX_FID_BRIDGED;
 	} else {
 		err = mv88e6xxx_vtu_get(chip, vid, &vlan);
 		if (err)
@@ -2434,7 +2443,16 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port,
 	int err;
 
 	mv88e6xxx_reg_lock(chip);
+
 	err = mv88e6xxx_bridge_map(chip, br);
+	if (err)
+		goto unlock;
+
+	err = mv88e6xxx_port_commit_pvid(chip, port);
+	if (err)
+		goto unlock;
+
+unlock:
 	mv88e6xxx_reg_unlock(chip);
 
 	return err;
@@ -2444,11 +2462,20 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
 					struct net_device *br)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
+	int err;
 
 	mv88e6xxx_reg_lock(chip);
+
 	if (mv88e6xxx_bridge_map(chip, br) ||
 	    mv88e6xxx_port_vlan_map(chip, port))
 		dev_err(ds->dev, "failed to remap in-chip Port VLAN\n");
+
+	err = mv88e6xxx_port_commit_pvid(chip, port);
+	if (err)
+		dev_err(ds->dev,
+			"port %d failed to restore standalone pvid: %pe\n",
+			port, ERR_PTR(err));
+
 	mv88e6xxx_reg_unlock(chip);
 }
 
@@ -2894,6 +2921,20 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 	if (err)
 		return err;
 
+	/* Associate MV88E6XXX_VID_BRIDGED with MV88E6XXX_FID_BRIDGED in the
+	 * ATU by virtue of the fact that mv88e6xxx_atu_new() will pick it as
+	 * the first free FID after MV88E6XXX_FID_STANDALONE. This will be used
+	 * as the private PVID on ports under a VLAN-unaware bridge.
+	 * Shared (DSA and CPU) ports must also be members of it, to translate
+	 * the VID from the DSA tag into MV88E6XXX_FID_BRIDGED, instead of
+	 * relying on their port default FID.
+	 */
+	err = mv88e6xxx_port_vlan_join(chip, port, MV88E6XXX_VID_BRIDGED,
+				       MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_UNTAGGED,
+				       false);
+	if (err)
+		return err;
+
 	if (chip->info->ops->port_set_jumbo_size) {
 		err = chip->info->ops->port_set_jumbo_size(chip, port, 10218);
 		if (err)
@@ -2966,7 +3007,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 	 * database, and allow bidirectional communication between the
 	 * CPU and DSA port(s), and the other ports.
 	 */
-	err = mv88e6xxx_port_set_fid(chip, port, 0);
+	err = mv88e6xxx_port_set_fid(chip, port, MV88E6XXX_FID_STANDALONE);
 	if (err)
 		return err;
 
@@ -3156,6 +3197,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 		}
 	}
 
+	err = mv88e6xxx_vtu_setup(chip);
+	if (err)
+		goto unlock;
+
 	/* Setup Switch Port Registers */
 	for (i = 0; i < mv88e6xxx_num_ports(chip); i++) {
 		if (dsa_is_unused_port(ds, i))
@@ -3185,10 +3230,6 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
 	if (err)
 		goto unlock;
 
-	err = mv88e6xxx_vtu_setup(chip);
-	if (err)
-		goto unlock;
-
 	err = mv88e6xxx_pvt_setup(chip);
 	if (err)
 		goto unlock;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 33d067e8396d..8271b8aa7b71 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -21,6 +21,9 @@
 #define EDSA_HLEN		8
 #define MV88E6XXX_N_FID		4096
 
+#define MV88E6XXX_FID_STANDALONE	0
+#define MV88E6XXX_FID_BRIDGED		1
+
 /* PVT limits for 4-bit port and 5-bit switch */
 #define MV88E6XXX_MAX_PVT_SWITCHES	32
 #define MV88E6XXX_MAX_PVT_PORTS		16
diff --git a/include/linux/dsa/mv88e6xxx.h b/include/linux/dsa/mv88e6xxx.h
new file mode 100644
index 000000000000..8c3d45eca46b
--- /dev/null
+++ b/include/linux/dsa/mv88e6xxx.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright 2021 NXP
+ */
+
+#ifndef _NET_DSA_TAG_MV88E6XXX_H
+#define _NET_DSA_TAG_MV88E6XXX_H
+
+#include <linux/if_vlan.h>
+
+#define MV88E6XXX_VID_STANDALONE	0
+#define MV88E6XXX_VID_BRIDGED		(VLAN_N_VID - 1)
+
+#endif
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 68d5ddc3ef35..b3da4b2ea11c 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -45,6 +45,7 @@
  *   6    6       2        2      4    2       N
  */
 
+#include <linux/dsa/mv88e6xxx.h>
 #include <linux/etherdevice.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -164,16 +165,21 @@ static struct sk_buff *dsa_xmit_ll(struct sk_buff *skb, struct net_device *dev,
 			dsa_header[2] &= ~0x10;
 		}
 	} else {
+		struct net_device *br = dp->bridge_dev;
+		u16 vid;
+
+		vid = br ? MV88E6XXX_VID_BRIDGED : MV88E6XXX_VID_STANDALONE;
+
 		skb_push(skb, DSA_HLEN + extra);
 		dsa_alloc_etype_header(skb, DSA_HLEN + extra);
 
-		/* Construct untagged DSA tag. */
+		/* Construct DSA header from untagged frame. */
 		dsa_header = dsa_etype_header_pos_tx(skb) + extra;
 
 		dsa_header[0] = (cmd << 6) | tag_dev;
 		dsa_header[1] = tag_port << 3;
-		dsa_header[2] = 0;
-		dsa_header[3] = 0;
+		dsa_header[2] = vid >> 8;
+		dsa_header[3] = vid & 0xff;
 	}
 
 	return skb;
-- 
cgit v1.2.3


From 8208461d3912e3e97e31bcbd4ce716e4a251a5dd Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Fri, 8 Oct 2021 15:24:27 +0300
Subject: net/mlx5: Add ifc bits to support optional counters

Adding bth_opcode field and the relevant bits. This field will be used
to capture and count congestion notification packets (CNP).

Adding source_vhca_port support bit.
This field will be used to check the capability to use the
source_vhca_port as a match criteria in cases of dual port.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 include/linux/mlx5/mlx5_ifc.h | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 96f5fb2af811..34254dbe7117 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -342,7 +342,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         outer_geneve_oam[0x1];
 	u8         outer_geneve_protocol_type[0x1];
 	u8         outer_geneve_opt_len[0x1];
-	u8         reserved_at_1e[0x1];
+	u8         source_vhca_port[0x1];
 	u8         source_eswitch_port[0x1];
 
 	u8         inner_dmac[0x1];
@@ -393,6 +393,14 @@ struct mlx5_ifc_flow_table_fields_supported_bits {
 	u8         metadata_reg_c_0[0x1];
 };
 
+struct mlx5_ifc_flow_table_fields_supported_2_bits {
+	u8         reserved_at_0[0xe];
+	u8         bth_opcode[0x1];
+	u8         reserved_at_f[0x11];
+
+	u8         reserved_at_20[0x60];
+};
+
 struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         ft_support[0x1];
 	u8         reserved_at_1[0x1];
@@ -539,7 +547,7 @@ struct mlx5_ifc_fte_match_set_misc_bits {
 	union mlx5_ifc_gre_key_bits gre_key;
 
 	u8         vxlan_vni[0x18];
-	u8         reserved_at_b8[0x8];
+	u8         bth_opcode[0x8];
 
 	u8         geneve_vni[0x18];
 	u8         reserved_at_d8[0x7];
@@ -756,7 +764,15 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_transmit_sniffer;
 
-	u8         reserved_at_e00[0x1200];
+	u8         reserved_at_e00[0x700];
+
+	struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_receive_rdma;
+
+	u8         reserved_at_1580[0x280];
+
+	struct mlx5_ifc_flow_table_fields_supported_2_bits ft_field_support_2_nic_transmit_rdma;
+
+	u8         reserved_at_1880[0x780];
 
 	u8         sw_steering_nic_rx_action_drop_icm_address[0x40];
 
-- 
cgit v1.2.3


From b8dfed636fc6239396c3a2ae5f812505906cf215 Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Fri, 8 Oct 2021 15:24:28 +0300
Subject: net/mlx5: Add priorities for counters in RDMA namespaces

Add additional flow steering priorities in the RDMA namespace.
This allows adding flow counters to count filtered RDMA traffic and then
continue processing in the regular RDMA steering flow.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Maor Gottlieb <maorg@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 54 +++++++++++++++++++----
 include/linux/mlx5/device.h                       |  2 +
 include/linux/mlx5/fs.h                           |  2 +
 3 files changed, 50 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe501ba88bea..71a08f84d49d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -99,6 +99,9 @@
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
+#define RDMA_RX_COUNTERS_PRIO_NUM_LEVELS 1
+#define RDMA_TX_COUNTERS_PRIO_NUM_LEVELS 1
+
 #define BY_PASS_PRIO_NUM_LEVELS 1
 #define BY_PASS_MIN_LEVEL (ETHTOOL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\
 			   LEFTOVERS_NUM_PRIOS)
@@ -206,34 +209,63 @@ static struct init_tree_node egress_root_fs = {
 	}
 };
 
-#define RDMA_RX_BYPASS_PRIO 0
-#define RDMA_RX_KERNEL_PRIO 1
+enum {
+	RDMA_RX_COUNTERS_PRIO,
+	RDMA_RX_BYPASS_PRIO,
+	RDMA_RX_KERNEL_PRIO,
+};
+
+#define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS
+#define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1)
+#define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2)
+
 static struct init_tree_node rdma_rx_root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 2,
+	.ar_size = 3,
 	.children = (struct init_tree_node[]) {
+		[RDMA_RX_COUNTERS_PRIO] =
+		ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_RDMA_RX_NUM_COUNTERS_PRIOS,
+						  RDMA_RX_COUNTERS_PRIO_NUM_LEVELS))),
 		[RDMA_RX_BYPASS_PRIO] =
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS, 0,
+		ADD_PRIO(0, RDMA_RX_BYPASS_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
 				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_REGULAR_PRIOS,
 						  BY_PASS_PRIO_NUM_LEVELS))),
 		[RDMA_RX_KERNEL_PRIO] =
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_REGULAR_PRIOS + 1, 0,
+		ADD_PRIO(0, RDMA_RX_KERNEL_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_SWITCH_DOMAIN,
 				ADD_MULTIPLE_PRIO(1, 1))),
 	}
 };
 
+enum {
+	RDMA_TX_COUNTERS_PRIO,
+	RDMA_TX_BYPASS_PRIO,
+};
+
+#define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS
+#define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1)
+
 static struct init_tree_node rdma_tx_root_fs = {
 	.type = FS_TYPE_NAMESPACE,
-	.ar_size = 1,
+	.ar_size = 2,
 	.children = (struct init_tree_node[]) {
-		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+		[RDMA_TX_COUNTERS_PRIO] =
+		ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0,
+			 FS_CHAINING_CAPS,
+			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
+				ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS,
+						  RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))),
+		[RDMA_TX_BYPASS_PRIO] =
+		ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0,
 			 FS_CHAINING_CAPS_RDMA_TX,
 			 ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF,
-				ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+				ADD_MULTIPLE_PRIO(RDMA_TX_BYPASS_MIN_LEVEL,
 						  BY_PASS_PRIO_NUM_LEVELS))),
 	}
 };
@@ -2215,6 +2247,12 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		prio = RDMA_RX_KERNEL_PRIO;
 	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX) {
 		root_ns = steering->rdma_tx_root_ns;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS) {
+		root_ns = steering->rdma_rx_root_ns;
+		prio = RDMA_RX_COUNTERS_PRIO;
+	} else if (type == MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS) {
+		root_ns = steering->rdma_tx_root_ns;
+		prio = RDMA_TX_COUNTERS_PRIO;
 	} else { /* Must be NIC RX */
 		root_ns = steering->root_ns;
 		prio = type;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 66eaf0aa7f69..ed0230ff9422 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1456,6 +1456,8 @@ static inline u16 mlx5_to_sw_pkey_sz(int pkey_sz)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey_sz;
 }
 
+#define MLX5_RDMA_RX_NUM_COUNTERS_PRIOS 2
+#define MLX5_RDMA_TX_NUM_COUNTERS_PRIOS 1
 #define MLX5_BY_PASS_NUM_REGULAR_PRIOS 16
 #define MLX5_BY_PASS_NUM_DONT_TRAP_PRIOS 16
 #define MLX5_BY_PASS_NUM_MULTICAST_PRIOS 1
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0106c67e8ccb..f2c3da2006d9 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -83,6 +83,8 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_RDMA_RX,
 	MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
 	MLX5_FLOW_NAMESPACE_RDMA_TX,
+	MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS,
+	MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS,
 };
 
 enum {
-- 
cgit v1.2.3


From 0392dd51f9c78d46109a408f27dc820300dcd8bd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 4 Oct 2021 10:10:10 -0400
Subject: SUNRPC: Per-rpc_clnt task PIDs

The current range of RPC task PIDs is 0..65535. That's not adequate
for distinguishing tasks across multiple rpc_clnts running high
throughput workloads.

To help relieve this situation and to reduce the bottleneck of
having a single atomic for assigning all RPC task PIDs, assign task
PIDs per rpc_clnt.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/sched.c          | 12 ++++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index a4661646adc9..267b7aeaf1a6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -40,6 +40,7 @@ struct rpc_clnt {
 	unsigned int		cl_clid;	/* client id */
 	struct list_head	cl_clients;	/* Global list of clients */
 	struct list_head	cl_tasks;	/* List of tasks */
+	atomic_t		cl_pid;		/* task PID counter */
 	spinlock_t		cl_lock;	/* spinlock */
 	struct rpc_xprt __rcu *	cl_xprt;	/* transport */
 	const struct rpc_procinfo *cl_procinfo;	/* procedure info */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b21457cec8a5..f4f311ea7a66 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -277,9 +277,17 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
-	static atomic_t rpc_pid;
+	struct rpc_clnt *clnt = task->tk_client;
 
-	task->tk_pid = atomic_inc_return(&rpc_pid);
+	/* Might be a task carrying a reverse-direction operation */
+	if (!clnt) {
+		static atomic_t rpc_pid;
+
+		task->tk_pid = atomic_inc_return(&rpc_pid);
+		return;
+	}
+
+	task->tk_pid = atomic_inc_return(&clnt->cl_pid);
 }
 #else
 static inline void rpc_task_set_debuginfo(struct rpc_task *task)
-- 
cgit v1.2.3


From 0199215216978b612d4e8be11e878b87bc643033 Mon Sep 17 00:00:00 2001
From: Juhee Kang <claudiajkang@gmail.com>
Date: Sun, 10 Oct 2021 13:03:29 +0900
Subject: mlxsw: spectrum: use netif_is_macsec() instead of open code

Open code which is dev->priv_flags & IFF_MACSEC has already defined as
netif_is_macsec(). So use netif_is_macsec() instead of open code.
This patch doesn't change logic.

Signed-off-by: Juhee Kang <claudiajkang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 15f4a658e436..0723c1314ea2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -5237,7 +5237,7 @@ static inline void netif_keep_dst(struct net_device *dev)
 static inline bool netif_reduces_vlan_mtu(struct net_device *dev)
 {
 	/* TODO: reserve and use an additional IFF bit, if we get more users */
-	return dev->priv_flags & IFF_MACSEC;
+	return netif_is_macsec(dev);
 }
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
-- 
cgit v1.2.3


From bdac5c2b243f68ec15f8203c3348ae79fee8e8d8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 16 Sep 2021 15:23:20 +0900
Subject: bootconfig: Allocate xbc_data inside xbc_init()

Allocate 'xbc_data' in the xbc_init() so that it does
not need to care about the ownership of the copied
data.

Link: https://lkml.kernel.org/r/163177339986.682366.898762699429769117.stgit@devnote2

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h |  2 +-
 init/main.c                | 13 ++-----------
 lib/bootconfig.c           | 33 +++++++++++++++++++++------------
 tools/bootconfig/main.c    |  6 +++---
 4 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 537e1b991f11..62e09b788172 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -271,7 +271,7 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node,
 }
 
 /* XBC node initializer */
-int __init xbc_init(char *buf, const char **emsg, int *epos);
+int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
 
 
 /* XBC cleanup data structures */
diff --git a/init/main.c b/init/main.c
index 81a79a77db46..d894989d86bc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -409,7 +409,7 @@ static void __init setup_boot_config(void)
 	const char *msg;
 	int pos;
 	u32 size, csum;
-	char *data, *copy, *err;
+	char *data, *err;
 	int ret;
 
 	/* Cut out the bootconfig data even if we have no bootconfig option */
@@ -442,16 +442,7 @@ static void __init setup_boot_config(void)
 		return;
 	}
 
-	copy = memblock_alloc(size + 1, SMP_CACHE_BYTES);
-	if (!copy) {
-		pr_err("Failed to allocate memory for bootconfig\n");
-		return;
-	}
-
-	memcpy(copy, data, size);
-	copy[size] = '\0';
-
-	ret = xbc_init(copy, &msg, &pos);
+	ret = xbc_init(data, size, &msg, &pos);
 	if (ret < 0) {
 		if (pos < 0)
 			pr_err("Failed to init bootconfig: %s.\n", msg);
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 5ae248b29373..66b02fddfea8 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -789,6 +789,7 @@ static int __init xbc_verify_tree(void)
  */
 void __init xbc_destroy_all(void)
 {
+	memblock_free_ptr(xbc_data, xbc_data_size);
 	xbc_data = NULL;
 	xbc_data_size = 0;
 	xbc_node_num = 0;
@@ -799,19 +800,20 @@ void __init xbc_destroy_all(void)
 
 /**
  * xbc_init() - Parse given XBC file and build XBC internal tree
- * @buf: boot config text
+ * @data: The boot config text original data
+ * @size: The size of @data
  * @emsg: A pointer of const char * to store the error message
  * @epos: A pointer of int to store the error position
  *
- * This parses the boot config text in @buf. @buf must be a
- * null terminated string and smaller than XBC_DATA_MAX.
+ * This parses the boot config text in @data. @size must be smaller
+ * than XBC_DATA_MAX.
  * Return the number of stored nodes (>0) if succeeded, or -errno
  * if there is any error.
  * In error cases, @emsg will be updated with an error message and
  * @epos will be updated with the error position which is the byte offset
  * of @buf. If the error is not a parser error, @epos will be -1.
  */
-int __init xbc_init(char *buf, const char **emsg, int *epos)
+int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 {
 	char *p, *q;
 	int ret, c;
@@ -824,28 +826,35 @@ int __init xbc_init(char *buf, const char **emsg, int *epos)
 			*emsg = "Bootconfig is already initialized";
 		return -EBUSY;
 	}
-
-	ret = strlen(buf);
-	if (ret > XBC_DATA_MAX - 1 || ret == 0) {
+	if (size > XBC_DATA_MAX || size == 0) {
 		if (emsg)
-			*emsg = ret ? "Config data is too big" :
+			*emsg = size ? "Config data is too big" :
 				"Config data is empty";
 		return -ERANGE;
 	}
 
+	xbc_data = memblock_alloc(size + 1, SMP_CACHE_BYTES);
+	if (!xbc_data) {
+		if (emsg)
+			*emsg = "Failed to allocate bootconfig data";
+		return -ENOMEM;
+	}
+	memcpy(xbc_data, data, size);
+	xbc_data[size] = '\0';
+	xbc_data_size = size + 1;
+
 	xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX,
 				   SMP_CACHE_BYTES);
 	if (!xbc_nodes) {
 		if (emsg)
 			*emsg = "Failed to allocate bootconfig nodes";
+		xbc_destroy_all();
 		return -ENOMEM;
 	}
 	memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
-	xbc_data = buf;
-	xbc_data_size = ret + 1;
-	last_parent = NULL;
 
-	p = buf;
+	last_parent = NULL;
+	p = xbc_data;
 	do {
 		q = strpbrk(p, "{}=+;:\n#");
 		if (!q) {
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index fd67496a947f..7269c9e35335 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -229,7 +229,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
 		return -EINVAL;
 	}
 
-	ret = xbc_init(*buf, &msg, NULL);
+	ret = xbc_init(*buf, size, &msg, NULL);
 	/* Wrong data */
 	if (ret < 0) {
 		pr_err("parse error: %s.\n", msg);
@@ -269,7 +269,7 @@ static int init_xbc_with_error(char *buf, int len)
 	if (!copy)
 		return -ENOMEM;
 
-	ret = xbc_init(buf, &msg, &pos);
+	ret = xbc_init(buf, len, &msg, &pos);
 	if (ret < 0)
 		show_xbc_error(copy, msg, pos);
 	free(copy);
@@ -382,7 +382,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	memcpy(data, buf, size);
 
 	/* Check the data format */
-	ret = xbc_init(buf, &msg, &pos);
+	ret = xbc_init(buf, size, &msg, &pos);
 	if (ret < 0) {
 		show_xbc_error(data, msg, pos);
 		free(data);
-- 
cgit v1.2.3


From e306220cb7b7c2948f191414ab06851e143b54c1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Thu, 16 Sep 2021 15:23:29 +0900
Subject: bootconfig: Add xbc_get_info() for the node information

Add xbc_get_info() API which allows user to get the
number of used xbc_nodes and the size of bootconfig
data. This is also useful for checking the bootconfig
is initialized or not.

Link: https://lkml.kernel.org/r/163177340877.682366.4360676589783197627.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h |  2 ++
 init/main.c                |  1 +
 lib/bootconfig.c           | 21 +++++++++++++++++++++
 tools/bootconfig/main.c    |  1 +
 4 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 62e09b788172..f955bb7eabbb 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -273,6 +273,8 @@ static inline int __init xbc_node_compose_key(struct xbc_node *node,
 /* XBC node initializer */
 int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
 
+/* XBC node and size information */
+int __init xbc_get_info(int *node_size, size_t *data_size);
 
 /* XBC cleanup data structures */
 void __init xbc_destroy_all(void);
diff --git a/init/main.c b/init/main.c
index d894989d86bc..afaed805de19 100644
--- a/init/main.c
+++ b/init/main.c
@@ -450,6 +450,7 @@ static void __init setup_boot_config(void)
 			pr_err("Failed to parse bootconfig: %s at %d.\n",
 				msg, pos);
 	} else {
+		xbc_get_info(&ret, NULL);
 		pr_info("Load bootconfig: %d bytes %d nodes\n", size, ret);
 		/* keys starting with "kernel." are passed via cmdline */
 		extra_command_line = xbc_make_cmdline("kernel");
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 66b02fddfea8..b088fe5c0001 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -34,6 +34,27 @@ static int xbc_err_pos __initdata;
 static int open_brace[XBC_DEPTH_MAX] __initdata;
 static int brace_index __initdata;
 
+/**
+ * xbc_get_info() - Get the information of loaded boot config
+ * node_size: A pointer to store the number of nodes.
+ * data_size: A pointer to store the size of bootconfig data.
+ *
+ * Get the number of used nodes in @node_size if it is not NULL,
+ * and the size of bootconfig data in @data_size if it is not NULL.
+ * Return 0 if the boot config is initialized, or return -ENODEV.
+ */
+int __init xbc_get_info(int *node_size, size_t *data_size)
+{
+	if (!xbc_data)
+		return -ENODEV;
+
+	if (node_size)
+		*node_size = xbc_node_num;
+	if (data_size)
+		*data_size = xbc_data_size;
+	return 0;
+}
+
 static int __init xbc_parse_error(const char *msg, const char *p)
 {
 	xbc_err_msg = msg;
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 7269c9e35335..4f2a8d884745 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -391,6 +391,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 		return ret;
 	}
 	printf("Apply %s to %s\n", xbc_path, path);
+	xbc_get_info(&ret, NULL);
 	printf("\tNumber of nodes: %d\n", ret);
 	printf("\tSize: %u bytes\n", (unsigned int)size);
 	printf("\tChecksum: %d\n", (unsigned int)csum);
-- 
cgit v1.2.3


From 115d4d08aeb942133d025a425dd611092893d774 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 17 Sep 2021 19:02:39 +0900
Subject: bootconfig: Rename xbc_destroy_all() to xbc_exit()

Avoid using this noisy name and use more calm one.
This is just a name change. No functional change.

Link: https://lkml.kernel.org/r/163187295918.2366983.5231840238429996027.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h | 2 +-
 init/main.c                | 2 +-
 lib/bootconfig.c           | 8 ++++----
 tools/bootconfig/main.c    | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index f955bb7eabbb..7eb7a7f8ade7 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -277,7 +277,7 @@ int __init xbc_init(const char *buf, size_t size, const char **emsg, int *epos);
 int __init xbc_get_info(int *node_size, size_t *data_size);
 
 /* XBC cleanup data structures */
-void __init xbc_destroy_all(void);
+void __init xbc_exit(void);
 
 /* Debug dump functions */
 void __init xbc_debug_dump(void);
diff --git a/init/main.c b/init/main.c
index afaed805de19..f1428234e1e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -462,7 +462,7 @@ static void __init setup_boot_config(void)
 
 static void __init exit_boot_config(void)
 {
-	xbc_destroy_all();
+	xbc_exit();
 }
 
 #else	/* !CONFIG_BOOT_CONFIG */
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b088fe5c0001..a3ce5a0c3ca4 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -802,13 +802,13 @@ static int __init xbc_verify_tree(void)
 }
 
 /**
- * xbc_destroy_all() - Clean up all parsed bootconfig
+ * xbc_exit() - Clean up all parsed bootconfig
  *
  * This clears all data structures of parsed bootconfig on memory.
  * If you need to reuse xbc_init() with new boot config, you can
  * use this.
  */
-void __init xbc_destroy_all(void)
+void __init xbc_exit(void)
 {
 	memblock_free_ptr(xbc_data, xbc_data_size);
 	xbc_data = NULL;
@@ -869,7 +869,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 	if (!xbc_nodes) {
 		if (emsg)
 			*emsg = "Failed to allocate bootconfig nodes";
-		xbc_destroy_all();
+		xbc_exit();
 		return -ENOMEM;
 	}
 	memset(xbc_nodes, 0, sizeof(struct xbc_node) * XBC_NODE_MAX);
@@ -925,7 +925,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 			*epos = xbc_err_pos;
 		if (emsg)
 			*emsg = xbc_err_msg;
-		xbc_destroy_all();
+		xbc_exit();
 	} else
 		ret = xbc_node_num;
 
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 4f2a8d884745..4252c23bd35d 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -397,7 +397,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	printf("\tChecksum: %d\n", (unsigned int)csum);
 
 	/* TODO: Check the options by schema */
-	xbc_destroy_all();
+	xbc_exit();
 	free(buf);
 
 	/* Remove old boot config if exists */
-- 
cgit v1.2.3


From 9b81c9bfff4651abb28bfa6d83c8b879e467963b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 17 Sep 2021 19:02:53 +0900
Subject: bootconfig: Remove unused debug function

Remove unused xbc_debug_dump() from bootconfig for clean up
the code.

Link: https://lkml.kernel.org/r/163187297371.2366983.12943349701785875450.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h |  3 ---
 lib/bootconfig.c           | 21 ---------------------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 7eb7a7f8ade7..85cdfd381877 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -279,7 +279,4 @@ int __init xbc_get_info(int *node_size, size_t *data_size);
 /* XBC cleanup data structures */
 void __init xbc_exit(void);
 
-/* Debug dump functions */
-void __init xbc_debug_dump(void);
-
 #endif
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b7e5a32b30d3..953789171858 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -4,15 +4,12 @@
  * Masami Hiramatsu <mhiramat@kernel.org>
  */
 
-#define pr_fmt(fmt)    "bootconfig: " fmt
-
 #include <linux/bootconfig.h>
 #include <linux/bug.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/memblock.h>
-#include <linux/printk.h>
 #include <linux/string.h>
 
 /*
@@ -940,21 +937,3 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 
 	return ret;
 }
-
-/**
- * xbc_debug_dump() - Dump current XBC node list
- *
- * Dump the current XBC node list on printk buffer for debug.
- */
-void __init xbc_debug_dump(void)
-{
-	int i;
-
-	for (i = 0; i < xbc_node_num; i++) {
-		pr_debug("[%d] %s (%s) .next=%d, .child=%d .parent=%d\n", i,
-			xbc_node_get_data(xbc_nodes + i),
-			xbc_node_is_value(xbc_nodes + i) ? "value" : "key",
-			xbc_nodes[i].next, xbc_nodes[i].child,
-			xbc_nodes[i].parent);
-	}
-}
-- 
cgit v1.2.3


From 4f292c4886bfdfc2a7191ef4ff3f7aac69d1cc3f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 17 Sep 2021 19:03:08 +0900
Subject: bootconfig: Replace u16 and u32 with uint16_t and uint32_t

Replace u16 and u32 with uint16_t and uint32_t so
that the tools/bootconfig only needs <stdint.h>.

Link: https://lkml.kernel.org/r/163187298835.2366983.9838262576854319669.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h              | 12 ++++++------
 lib/bootconfig.c                        | 16 ++++++++--------
 tools/bootconfig/include/linux/kernel.h |  4 +---
 tools/bootconfig/main.c                 | 20 ++++++++++----------
 4 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index 85cdfd381877..a6f8dc51f168 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -25,10 +25,10 @@
  * The checksum will be used with the BOOTCONFIG_MAGIC and the size for
  * embedding the bootconfig in the initrd image.
  */
-static inline __init u32 xbc_calc_checksum(void *data, u32 size)
+static inline __init uint32_t xbc_calc_checksum(void *data, uint32_t size)
 {
 	unsigned char *p = data;
-	u32 ret = 0;
+	uint32_t ret = 0;
 
 	while (size--)
 		ret += *p++;
@@ -38,10 +38,10 @@ static inline __init u32 xbc_calc_checksum(void *data, u32 size)
 
 /* XBC tree node */
 struct xbc_node {
-	u16 next;
-	u16 child;
-	u16 parent;
-	u16 data;
+	uint16_t next;
+	uint16_t child;
+	uint16_t parent;
+	uint16_t data;
 } __attribute__ ((__packed__));
 
 #define XBC_KEY		0
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 953789171858..a2f5f582181d 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -244,7 +244,7 @@ int __init xbc_node_compose_key_after(struct xbc_node *root,
 				      struct xbc_node *node,
 				      char *buf, size_t size)
 {
-	u16 keys[XBC_DEPTH_MAX];
+	uint16_t keys[XBC_DEPTH_MAX];
 	int depth = 0, ret = 0, total = 0;
 
 	if (!node || node == root)
@@ -359,21 +359,21 @@ const char * __init xbc_node_find_next_key_value(struct xbc_node *root,
 
 /* XBC parse and tree build */
 
-static int __init xbc_init_node(struct xbc_node *node, char *data, u32 flag)
+static int __init xbc_init_node(struct xbc_node *node, char *data, uint32_t flag)
 {
 	unsigned long offset = data - xbc_data;
 
 	if (WARN_ON(offset >= XBC_DATA_MAX))
 		return -EINVAL;
 
-	node->data = (u16)offset | flag;
+	node->data = (uint16_t)offset | flag;
 	node->child = 0;
 	node->next = 0;
 
 	return 0;
 }
 
-static struct xbc_node * __init xbc_add_node(char *data, u32 flag)
+static struct xbc_node * __init xbc_add_node(char *data, uint32_t flag)
 {
 	struct xbc_node *node;
 
@@ -403,7 +403,7 @@ static inline __init struct xbc_node *xbc_last_child(struct xbc_node *node)
 	return node;
 }
 
-static struct xbc_node * __init __xbc_add_sibling(char *data, u32 flag, bool head)
+static struct xbc_node * __init __xbc_add_sibling(char *data, uint32_t flag, bool head)
 {
 	struct xbc_node *sib, *node = xbc_add_node(data, flag);
 
@@ -430,17 +430,17 @@ static struct xbc_node * __init __xbc_add_sibling(char *data, u32 flag, bool hea
 	return node;
 }
 
-static inline struct xbc_node * __init xbc_add_sibling(char *data, u32 flag)
+static inline struct xbc_node * __init xbc_add_sibling(char *data, uint32_t flag)
 {
 	return __xbc_add_sibling(data, flag, false);
 }
 
-static inline struct xbc_node * __init xbc_add_head_sibling(char *data, u32 flag)
+static inline struct xbc_node * __init xbc_add_head_sibling(char *data, uint32_t flag)
 {
 	return __xbc_add_sibling(data, flag, true);
 }
 
-static inline __init struct xbc_node *xbc_add_child(char *data, u32 flag)
+static inline __init struct xbc_node *xbc_add_child(char *data, uint32_t flag)
 {
 	struct xbc_node *node = xbc_add_sibling(data, flag);
 
diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h
index c4854b8e7023..39f306c18dd0 100644
--- a/tools/bootconfig/include/linux/kernel.h
+++ b/tools/bootconfig/include/linux/kernel.h
@@ -3,11 +3,9 @@
 #define _SKC_LINUX_KERNEL_H
 
 #include <stdlib.h>
+#include <stdint.h>
 #include <stdbool.h>
 
-typedef unsigned short u16;
-typedef unsigned int   u32;
-
 #define unlikely(cond)	(cond)
 
 #define __init
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index adc6c6e73fa9..fb7c9fb953d7 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -178,7 +178,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
 {
 	struct stat stat;
 	int ret;
-	u32 size = 0, csum = 0, rcsum;
+	uint32_t size = 0, csum = 0, rcsum;
 	char magic[BOOTCONFIG_MAGIC_LEN];
 	const char *msg;
 
@@ -202,11 +202,11 @@ static int load_xbc_from_initrd(int fd, char **buf)
 	if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0)
 		return pr_errno("Failed to lseek for size", -errno);
 
-	if (read(fd, &size, sizeof(u32)) < 0)
+	if (read(fd, &size, sizeof(uint32_t)) < 0)
 		return pr_errno("Failed to read size", -errno);
 	size = le32toh(size);
 
-	if (read(fd, &csum, sizeof(u32)) < 0)
+	if (read(fd, &csum, sizeof(uint32_t)) < 0)
 		return pr_errno("Failed to read checksum", -errno);
 	csum = le32toh(csum);
 
@@ -364,7 +364,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	size_t total_size;
 	struct stat stat;
 	const char *msg;
-	u32 size, csum;
+	uint32_t size, csum;
 	int pos, pad;
 	int ret, fd;
 
@@ -378,7 +378,7 @@ static int apply_xbc(const char *path, const char *xbc_path)
 
 	/* Backup the bootconfig data */
 	data = calloc(size + BOOTCONFIG_ALIGN +
-		      sizeof(u32) + sizeof(u32) + BOOTCONFIG_MAGIC_LEN, 1);
+		      sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1);
 	if (!data)
 		return -ENOMEM;
 	memcpy(data, buf, size);
@@ -426,17 +426,17 @@ static int apply_xbc(const char *path, const char *xbc_path)
 	}
 
 	/* To align up the total size to BOOTCONFIG_ALIGN, get padding size */
-	total_size = stat.st_size + size + sizeof(u32) * 2 + BOOTCONFIG_MAGIC_LEN;
+	total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN;
 	pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size;
 	size += pad;
 
 	/* Add a footer */
 	p = data + size;
-	*(u32 *)p = htole32(size);
-	p += sizeof(u32);
+	*(uint32_t *)p = htole32(size);
+	p += sizeof(uint32_t);
 
-	*(u32 *)p = htole32(csum);
-	p += sizeof(u32);
+	*(uint32_t *)p = htole32(csum);
+	p += sizeof(uint32_t);
 
 	memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
 	p += BOOTCONFIG_MAGIC_LEN;
-- 
cgit v1.2.3


From 4ee1b4cac236650979a5d9b745bb0a83efde3a46 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Fri, 17 Sep 2021 19:03:16 +0900
Subject: bootconfig: Cleanup dummy headers in tools/bootconfig

Cleanup dummy headers in tools/bootconfig/include except
for tools/bootconfig/include/linux/bootconfig.h.
For this change, I use __KERNEL__ macro to split kernel
header #include and introduce xbc_alloc_mem() and
xbc_free_mem().

Link: https://lkml.kernel.org/r/163187299574.2366983.18371329724128746091.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/bootconfig.h                  | 10 +++++++
 lib/bootconfig.c                            | 43 +++++++++++++++++++++++----
 tools/bootconfig/Makefile                   |  2 +-
 tools/bootconfig/include/linux/bootconfig.h | 45 ++++++++++++++++++++++++++++-
 tools/bootconfig/include/linux/bug.h        | 12 --------
 tools/bootconfig/include/linux/ctype.h      |  7 -----
 tools/bootconfig/include/linux/errno.h      |  7 -----
 tools/bootconfig/include/linux/kernel.h     | 14 ---------
 tools/bootconfig/include/linux/memblock.h   | 11 -------
 tools/bootconfig/include/linux/string.h     | 32 --------------------
 tools/bootconfig/main.c                     |  1 -
 11 files changed, 93 insertions(+), 91 deletions(-)
 delete mode 100644 tools/bootconfig/include/linux/bug.h
 delete mode 100644 tools/bootconfig/include/linux/ctype.h
 delete mode 100644 tools/bootconfig/include/linux/errno.h
 delete mode 100644 tools/bootconfig/include/linux/kernel.h
 delete mode 100644 tools/bootconfig/include/linux/memblock.h
 delete mode 100644 tools/bootconfig/include/linux/string.h

(limited to 'include/linux')

diff --git a/include/linux/bootconfig.h b/include/linux/bootconfig.h
index a6f8dc51f168..a4665c7ab07c 100644
--- a/include/linux/bootconfig.h
+++ b/include/linux/bootconfig.h
@@ -7,8 +7,18 @@
  * Author: Masami Hiramatsu <mhiramat@kernel.org>
  */
 
+#ifdef __KERNEL__
 #include <linux/kernel.h>
 #include <linux/types.h>
+#else /* !__KERNEL__ */
+/*
+ * NOTE: This is only for tools/bootconfig, because tools/bootconfig will
+ * run the parser sanity test.
+ * This does NOT mean linux/bootconfig.h is available in the user space.
+ * However, if you change this file, please make sure the tools/bootconfig
+ * has no issue on building and running.
+ */
+#endif
 
 #define BOOTCONFIG_MAGIC	"#BOOTCONFIG\n"
 #define BOOTCONFIG_MAGIC_LEN	12
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index a2f5f582181d..a056ae137750 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -4,6 +4,7 @@
  * Masami Hiramatsu <mhiramat@kernel.org>
  */
 
+#ifdef __KERNEL__
 #include <linux/bootconfig.h>
 #include <linux/bug.h>
 #include <linux/ctype.h>
@@ -11,6 +12,16 @@
 #include <linux/kernel.h>
 #include <linux/memblock.h>
 #include <linux/string.h>
+#else /* !__KERNEL__ */
+/*
+ * NOTE: This is only for tools/bootconfig, because tools/bootconfig will
+ * run the parser sanity test.
+ * This does NOT mean lib/bootconfig.c is available in the user space.
+ * However, if you change this file, please make sure the tools/bootconfig
+ * has no issue on building and running.
+ */
+#include <linux/bootconfig.h>
+#endif
 
 /*
  * Extra Boot Config (XBC) is given as tree-structured ascii text of
@@ -31,6 +42,29 @@ static int xbc_err_pos __initdata;
 static int open_brace[XBC_DEPTH_MAX] __initdata;
 static int brace_index __initdata;
 
+#ifdef __KERNEL__
+static inline void *xbc_alloc_mem(size_t size)
+{
+	return memblock_alloc(size, SMP_CACHE_BYTES);
+}
+
+static inline void xbc_free_mem(void *addr, size_t size)
+{
+	memblock_free_ptr(addr, size);
+}
+
+#else /* !__KERNEL__ */
+
+static inline void *xbc_alloc_mem(size_t size)
+{
+	return malloc(size);
+}
+
+static inline void xbc_free_mem(void *addr, size_t size)
+{
+	free(addr);
+}
+#endif
 /**
  * xbc_get_info() - Get the information of loaded boot config
  * node_size: A pointer to store the number of nodes.
@@ -859,11 +893,11 @@ static int __init xbc_parse_tree(void)
  */
 void __init xbc_exit(void)
 {
-	memblock_free_ptr(xbc_data, xbc_data_size);
+	xbc_free_mem(xbc_data, xbc_data_size);
 	xbc_data = NULL;
 	xbc_data_size = 0;
 	xbc_node_num = 0;
-	memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+	xbc_free_mem(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
 	xbc_nodes = NULL;
 	brace_index = 0;
 }
@@ -902,7 +936,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 		return -ERANGE;
 	}
 
-	xbc_data = memblock_alloc(size + 1, SMP_CACHE_BYTES);
+	xbc_data = xbc_alloc_mem(size + 1);
 	if (!xbc_data) {
 		if (emsg)
 			*emsg = "Failed to allocate bootconfig data";
@@ -912,8 +946,7 @@ int __init xbc_init(const char *data, size_t size, const char **emsg, int *epos)
 	xbc_data[size] = '\0';
 	xbc_data_size = size + 1;
 
-	xbc_nodes = memblock_alloc(sizeof(struct xbc_node) * XBC_NODE_MAX,
-				   SMP_CACHE_BYTES);
+	xbc_nodes = xbc_alloc_mem(sizeof(struct xbc_node) * XBC_NODE_MAX);
 	if (!xbc_nodes) {
 		if (emsg)
 			*emsg = "Failed to allocate bootconfig nodes";
diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile
index f1eec3ccbe18..566c3e0ee561 100644
--- a/tools/bootconfig/Makefile
+++ b/tools/bootconfig/Makefile
@@ -17,7 +17,7 @@ ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 all: $(ALL_PROGRAMS) test
 
-$(OUTPUT)bootconfig: main.c $(LIBSRC)
+$(OUTPUT)bootconfig: main.c include/linux/bootconfig.h $(LIBSRC)
 	$(CC) $(filter %.c,$^) $(CFLAGS) -o $@
 
 test: $(ALL_PROGRAMS) test-bootconfig.sh
diff --git a/tools/bootconfig/include/linux/bootconfig.h b/tools/bootconfig/include/linux/bootconfig.h
index de7f30f99af3..6784296a0692 100644
--- a/tools/bootconfig/include/linux/bootconfig.h
+++ b/tools/bootconfig/include/linux/bootconfig.h
@@ -2,10 +2,53 @@
 #ifndef _BOOTCONFIG_LINUX_BOOTCONFIG_H
 #define _BOOTCONFIG_LINUX_BOOTCONFIG_H
 
-#include "../../../../include/linux/bootconfig.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+
 
 #ifndef fallthrough
 # define fallthrough
 #endif
 
+#define WARN_ON(cond)	\
+	((cond) ? printf("Internal warning(%s:%d, %s): %s\n",	\
+			__FILE__, __LINE__, __func__, #cond) : 0)
+
+#define unlikely(cond)	(cond)
+
+/* Copied from lib/string.c */
+static inline char *skip_spaces(const char *str)
+{
+	while (isspace(*str))
+		++str;
+	return (char *)str;
+}
+
+static inline char *strim(char *s)
+{
+	size_t size;
+	char *end;
+
+	size = strlen(s);
+	if (!size)
+		return s;
+
+	end = s + size - 1;
+	while (end >= s && isspace(*end))
+		end--;
+	*(end + 1) = '\0';
+
+	return skip_spaces(s);
+}
+
+#define __init
+#define __initdata
+
+#include "../../../../include/linux/bootconfig.h"
+
 #endif
diff --git a/tools/bootconfig/include/linux/bug.h b/tools/bootconfig/include/linux/bug.h
deleted file mode 100644
index 7b65a389c0dd..000000000000
--- a/tools/bootconfig/include/linux/bug.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SKC_LINUX_BUG_H
-#define _SKC_LINUX_BUG_H
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#define WARN_ON(cond)	\
-	((cond) ? printf("Internal warning(%s:%d, %s): %s\n",	\
-			__FILE__, __LINE__, __func__, #cond) : 0)
-
-#endif
diff --git a/tools/bootconfig/include/linux/ctype.h b/tools/bootconfig/include/linux/ctype.h
deleted file mode 100644
index c56ecc136448..000000000000
--- a/tools/bootconfig/include/linux/ctype.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SKC_LINUX_CTYPE_H
-#define _SKC_LINUX_CTYPE_H
-
-#include <ctype.h>
-
-#endif
diff --git a/tools/bootconfig/include/linux/errno.h b/tools/bootconfig/include/linux/errno.h
deleted file mode 100644
index 5d9f91ec2fda..000000000000
--- a/tools/bootconfig/include/linux/errno.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SKC_LINUX_ERRNO_H
-#define _SKC_LINUX_ERRNO_H
-
-#include <asm/errno.h>
-
-#endif
diff --git a/tools/bootconfig/include/linux/kernel.h b/tools/bootconfig/include/linux/kernel.h
deleted file mode 100644
index 39f306c18dd0..000000000000
--- a/tools/bootconfig/include/linux/kernel.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SKC_LINUX_KERNEL_H
-#define _SKC_LINUX_KERNEL_H
-
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdbool.h>
-
-#define unlikely(cond)	(cond)
-
-#define __init
-#define __initdata
-
-#endif
diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h
deleted file mode 100644
index f2e506f7d57f..000000000000
--- a/tools/bootconfig/include/linux/memblock.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _XBC_LINUX_MEMBLOCK_H
-#define _XBC_LINUX_MEMBLOCK_H
-
-#include <stdlib.h>
-
-#define SMP_CACHE_BYTES	0
-#define memblock_alloc(size, align)	malloc(size)
-#define memblock_free_ptr(paddr, size)	free(paddr)
-
-#endif
diff --git a/tools/bootconfig/include/linux/string.h b/tools/bootconfig/include/linux/string.h
deleted file mode 100644
index 8267af75153a..000000000000
--- a/tools/bootconfig/include/linux/string.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SKC_LINUX_STRING_H
-#define _SKC_LINUX_STRING_H
-
-#include <string.h>
-
-/* Copied from lib/string.c */
-static inline char *skip_spaces(const char *str)
-{
-	while (isspace(*str))
-		++str;
-	return (char *)str;
-}
-
-static inline char *strim(char *s)
-{
-	size_t size;
-	char *end;
-
-	size = strlen(s);
-	if (!size)
-		return s;
-
-	end = s + size - 1;
-	while (end >= s && isspace(*end))
-		end--;
-	*(end + 1) = '\0';
-
-	return skip_spaces(s);
-}
-
-#endif
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index fb7c9fb953d7..156b62a163c5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -12,7 +12,6 @@
 #include <errno.h>
 #include <endian.h>
 
-#include <linux/kernel.h>
 #include <linux/bootconfig.h>
 
 #define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
-- 
cgit v1.2.3


From bf2928c7a284e31f9f91a004b5dca09c522157c0 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 10 Sep 2021 08:22:06 +0200
Subject: PCI/VPD: Add pci_read/write_vpd_any()

In certain cases we need a variant of pci_read_vpd()/pci_write_vpd() that
does not check against dev->vpd.len. Such cases are:

  - Reading VPD if dev->vpd.len isn't set yet (in pci_vpd_size())

  - Devices that map non-VPD information to arbitrary places in VPD address
    space (example: Chelsio T3 EEPROM write-protect flag)

Therefore add pci_read_vpd_any() and pci_write_vpd_any() that check against
PCI_VPD_MAX_SIZE only.

Link: https://lore.kernel.org/r/93ecce28-a158-f02a-d134-8afcaced8efe@gmail.com
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/pci/vpd.c   | 72 +++++++++++++++++++++++++++++++++++++----------------
 include/linux/pci.h |  2 ++
 2 files changed, 52 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c
index 4be24890132e..0a804715d98e 100644
--- a/drivers/pci/vpd.c
+++ b/drivers/pci/vpd.c
@@ -156,9 +156,10 @@ static int pci_vpd_wait(struct pci_dev *dev, bool set)
 }
 
 static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
-			    void *arg)
+			    void *arg, bool check_size)
 {
 	struct pci_vpd *vpd = &dev->vpd;
+	unsigned int max_len = check_size ? vpd->len : PCI_VPD_MAX_SIZE;
 	int ret = 0;
 	loff_t end = pos + count;
 	u8 *buf = arg;
@@ -169,11 +170,11 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
 	if (pos < 0)
 		return -EINVAL;
 
-	if (pos > vpd->len)
+	if (pos >= max_len)
 		return 0;
 
-	if (end > vpd->len) {
-		end = vpd->len;
+	if (end > max_len) {
+		end = max_len;
 		count = end - pos;
 	}
 
@@ -217,9 +218,10 @@ static ssize_t pci_vpd_read(struct pci_dev *dev, loff_t pos, size_t count,
 }
 
 static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count,
-			     const void *arg)
+			     const void *arg, bool check_size)
 {
 	struct pci_vpd *vpd = &dev->vpd;
+	unsigned int max_len = check_size ? vpd->len : PCI_VPD_MAX_SIZE;
 	const u8 *buf = arg;
 	loff_t end = pos + count;
 	int ret = 0;
@@ -230,7 +232,7 @@ static ssize_t pci_vpd_write(struct pci_dev *dev, loff_t pos, size_t count,
 	if (pos < 0 || (pos & 3) || (count & 3))
 		return -EINVAL;
 
-	if (end > vpd->len)
+	if (end > max_len)
 		return -EINVAL;
 
 	if (mutex_lock_killable(&vpd->lock))
@@ -381,6 +383,24 @@ static int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
 	return -ENOENT;
 }
 
+static ssize_t __pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf,
+			      bool check_size)
+{
+	ssize_t ret;
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
+		dev = pci_get_func0_dev(dev);
+		if (!dev)
+			return -ENODEV;
+
+		ret = pci_vpd_read(dev, pos, count, buf, check_size);
+		pci_dev_put(dev);
+		return ret;
+	}
+
+	return pci_vpd_read(dev, pos, count, buf, check_size);
+}
+
 /**
  * pci_read_vpd - Read one entry from Vital Product Data
  * @dev:	PCI device struct
@@ -389,6 +409,20 @@ static int pci_vpd_find_info_keyword(const u8 *buf, unsigned int off,
  * @buf:	pointer to where to store result
  */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	return __pci_read_vpd(dev, pos, count, buf, true);
+}
+EXPORT_SYMBOL(pci_read_vpd);
+
+/* Same, but allow to access any address */
+ssize_t pci_read_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
+{
+	return __pci_read_vpd(dev, pos, count, buf, false);
+}
+EXPORT_SYMBOL(pci_read_vpd_any);
+
+static ssize_t __pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count,
+			       const void *buf, bool check_size)
 {
 	ssize_t ret;
 
@@ -397,14 +431,13 @@ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf)
 		if (!dev)
 			return -ENODEV;
 
-		ret = pci_vpd_read(dev, pos, count, buf);
+		ret = pci_vpd_write(dev, pos, count, buf, check_size);
 		pci_dev_put(dev);
 		return ret;
 	}
 
-	return pci_vpd_read(dev, pos, count, buf);
+	return pci_vpd_write(dev, pos, count, buf, check_size);
 }
-EXPORT_SYMBOL(pci_read_vpd);
 
 /**
  * pci_write_vpd - Write entry to Vital Product Data
@@ -415,22 +448,17 @@ EXPORT_SYMBOL(pci_read_vpd);
  */
 ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
 {
-	ssize_t ret;
-
-	if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) {
-		dev = pci_get_func0_dev(dev);
-		if (!dev)
-			return -ENODEV;
-
-		ret = pci_vpd_write(dev, pos, count, buf);
-		pci_dev_put(dev);
-		return ret;
-	}
-
-	return pci_vpd_write(dev, pos, count, buf);
+	return __pci_write_vpd(dev, pos, count, buf, true);
 }
 EXPORT_SYMBOL(pci_write_vpd);
 
+/* Same, but allow to access any address */
+ssize_t pci_write_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, const void *buf)
+{
+	return __pci_write_vpd(dev, pos, count, buf, false);
+}
+EXPORT_SYMBOL(pci_write_vpd_any);
+
 int pci_vpd_find_ro_info_keyword(const void *buf, unsigned int len,
 				 const char *kw, unsigned int *size)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..9649bd9e468d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1350,6 +1350,8 @@ void pci_unlock_rescan_remove(void);
 /* Vital Product Data routines */
 ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
 ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
+ssize_t pci_read_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
+ssize_t pci_write_vpd_any(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
 
 /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
 resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx);
-- 
cgit v1.2.3


From f614fb60a1983819e83796198de2b607fba75e99 Mon Sep 17 00:00:00 2001
From: Wenbin Mei <wenbin.mei@mediatek.com>
Date: Fri, 17 Sep 2021 20:48:02 +0800
Subject: mmc: core: Add host specific tuning support for eMMC HS400 mode

This adds a ->execute_hs400_tuning() host callback to enable optional
support for host specific tuning for eMMC HS400 mode. Additionally, share
mmc_get_ext_csd() through the public host headerfile, to allow it to be
used by the host drivers, which is needed to support the HS400 tuning.

Signed-off-by: Wenbin Mei <wenbin.mei@mediatek.com>
Link: https://lore.kernel.org/r/20210917124803.22871-3-wenbin.mei@mediatek.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c     | 8 ++++++++
 drivers/mmc/core/mmc_ops.h | 1 -
 include/linux/mmc/host.h   | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 29e58ffae379..b1c1716dacf0 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1224,6 +1224,14 @@ static int mmc_select_hs400(struct mmc_card *card)
 	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
 	mmc_set_bus_speed(card);
 
+	if (host->ops->execute_hs400_tuning) {
+		mmc_retune_disable(host);
+		err = host->ops->execute_hs400_tuning(host, card);
+		mmc_retune_enable(host);
+		if (err)
+			goto out_err;
+	}
+
 	if (host->ops->hs400_complete)
 		host->ops->hs400_complete(host);
 
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index ae25ffc2e870..e5e94567a9a9 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -38,7 +38,6 @@ int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
 int mmc_bus_test(struct mmc_card *card, u8 bus_width);
 int mmc_can_ext_csd(struct mmc_card *card);
-int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 int mmc_switch_status(struct mmc_card *card, bool crc_err_fatal);
 bool mmc_prepare_busy_cmd(struct mmc_host *host, struct mmc_command *cmd,
 			  unsigned int timeout_ms);
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0c0c9a0fdf57..b6c082e2664c 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -162,6 +162,9 @@ struct mmc_host_ops {
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
 
+	/* Execute HS400 tuning depending host driver */
+	int	(*execute_hs400_tuning)(struct mmc_host *host, struct mmc_card *card);
+
 	/* Prepare switch to DDR during the HS400 init sequence */
 	int	(*hs400_prepare_ddr)(struct mmc_host *host);
 
@@ -634,5 +637,6 @@ static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
 
 int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 int mmc_send_abort_tuning(struct mmc_host *host, u32 opcode);
+int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #endif /* LINUX_MMC_HOST_H */
-- 
cgit v1.2.3


From 0ae93b99beb283438aa571a6add4eab0c077d576 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 7 Oct 2021 16:17:24 -0400
Subject: SUNRPC: Simplify the SVC dispatch code path

Micro-optimization: The last user of the generic SVC dispatch code
path has been removed, so svc_process_common() can be simplified.
This declutters the hot path so that the by-far most common case
(a dispatch function exists) is made the /only/ path.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  5 +----
 net/sunrpc/svc.c           | 51 ++--------------------------------------------
 2 files changed, 3 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6263410c948a..4205a6ef4770 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -443,10 +443,7 @@ struct svc_version {
 	/* Need xprt with congestion control */
 	bool			vs_need_cong_ctrl;
 
-	/* Override dispatch function (e.g. when caching replies).
-	 * A return value of 0 means drop the request. 
-	 * vs_dispatch == NULL means use default dispatcher.
-	 */
+	/* Dispatch function */
 	int			(*vs_dispatch)(struct svc_rqst *, __be32 *);
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 08ca797bb8a4..e0dd6e6a4602 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1186,45 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
 #endif
 
-static int
-svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
-{
-	struct kvec *argv = &rqstp->rq_arg.head[0];
-	struct kvec *resv = &rqstp->rq_res.head[0];
-	const struct svc_procedure *procp = rqstp->rq_procinfo;
-
-	/*
-	 * Decode arguments
-	 * XXX: why do we ignore the return value?
-	 */
-	if (procp->pc_decode &&
-	    !procp->pc_decode(rqstp, argv->iov_base)) {
-		*statp = rpc_garbage_args;
-		return 1;
-	}
-
-	*statp = procp->pc_func(rqstp);
-
-	if (*statp == rpc_drop_reply ||
-	    test_bit(RQ_DROPME, &rqstp->rq_flags))
-		return 0;
-
-	if (rqstp->rq_auth_stat != rpc_auth_ok)
-		return 1;
-
-	if (*statp != rpc_success)
-		return 1;
-
-	/* Encode reply */
-	if (procp->pc_encode &&
-	    !procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
-		dprintk("svc: failed to encode reply\n");
-		/* serv->sv_stats->rpcsystemerr++; */
-		*statp = rpc_system_err;
-	}
-	return 1;
-}
-
 __be32
 svc_generic_init_request(struct svc_rqst *rqstp,
 		const struct svc_program *progp,
@@ -1392,16 +1353,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
 
 	/* Call the function that processes the request. */
-	if (!process.dispatch) {
-		if (!svc_generic_dispatch(rqstp, statp))
-			goto release_dropit;
-		if (*statp == rpc_garbage_args)
-			goto err_garbage;
-	} else {
-		dprintk("svc: calling dispatcher\n");
-		if (!process.dispatch(rqstp, statp))
-			goto release_dropit; /* Release reply info */
-	}
+	if (!process.dispatch(rqstp, statp))
+		goto release_dropit;
 
 	if (rqstp->rq_auth_stat != rpc_auth_ok)
 		goto err_release_bad_auth;
-- 
cgit v1.2.3


From 0bc73ad46a76ed6ece4dcacb28858e7b38561e1c Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 26 Sep 2021 17:55:41 +0300
Subject: net/mlx5e: Mutually exclude RX-FCS and RX-port-timestamp

Due to current HW arch limitations, RX-FCS (scattering FCS frame field
to software) and RX-port-timestamp (improved timestamp accuracy on the
receive side) can't work together.
RX-port-timestamp is not controlled by the user and it is enabled by
default when supported by the HW/FW.
This patch sets RX-port-timestamp opposite to RX-FCS configuration.

Fixes: 102722fc6832 ("net/mlx5e: Add support for RXFCS feature flag")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 57 +++++++++++++++++++++--
 include/linux/mlx5/mlx5_ifc.h                     | 10 +++-
 2 files changed, 60 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 336aa07313da..09c8b71b186c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3325,20 +3325,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable)
 	return mlx5_set_port_fcs(mdev, !enable);
 }
 
+static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
+{
+	u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
+	bool supported, curr_state;
+	int err;
+
+	if (!MLX5_CAP_GEN(mdev, ports_check))
+		return 0;
+
+	err = mlx5_query_ports_check(mdev, in, sizeof(in));
+	if (err)
+		return err;
+
+	supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
+	curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
+
+	if (!supported || enable == curr_state)
+		return 0;
+
+	MLX5_SET(pcmr_reg, in, local_port, 1);
+	MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
+
+	return mlx5_set_ports_check(mdev, in, sizeof(in));
+}
+
 static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_channels *chs = &priv->channels;
+	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
 	mutex_lock(&priv->state_lock);
 
-	priv->channels.params.scatter_fcs_en = enable;
-	err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable);
-	if (err)
-		priv->channels.params.scatter_fcs_en = !enable;
+	if (enable) {
+		err = mlx5e_set_rx_port_ts(mdev, false);
+		if (err)
+			goto out;
 
-	mutex_unlock(&priv->state_lock);
+		chs->params.scatter_fcs_en = true;
+		err = mlx5e_modify_channels_scatter_fcs(chs, true);
+		if (err) {
+			chs->params.scatter_fcs_en = false;
+			mlx5e_set_rx_port_ts(mdev, true);
+		}
+	} else {
+		chs->params.scatter_fcs_en = false;
+		err = mlx5e_modify_channels_scatter_fcs(chs, false);
+		if (err) {
+			chs->params.scatter_fcs_en = true;
+			goto out;
+		}
+		err = mlx5e_set_rx_port_ts(mdev, true);
+		if (err) {
+			mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err);
+			err = 0;
+		}
+	}
 
+out:
+	mutex_unlock(&priv->state_lock);
 	return err;
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3638d09ba77..993204a6c1a1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9475,16 +9475,22 @@ struct mlx5_ifc_pcmr_reg_bits {
 	u8         reserved_at_0[0x8];
 	u8         local_port[0x8];
 	u8         reserved_at_10[0x10];
+
 	u8         entropy_force_cap[0x1];
 	u8         entropy_calc_cap[0x1];
 	u8         entropy_gre_calc_cap[0x1];
-	u8         reserved_at_23[0x1b];
+	u8         reserved_at_23[0xf];
+	u8         rx_ts_over_crc_cap[0x1];
+	u8         reserved_at_33[0xb];
 	u8         fcs_cap[0x1];
 	u8         reserved_at_3f[0x1];
+
 	u8         entropy_force[0x1];
 	u8         entropy_calc[0x1];
 	u8         entropy_gre_calc[0x1];
-	u8         reserved_at_43[0x1b];
+	u8         reserved_at_43[0xf];
+	u8         rx_ts_over_crc[0x1];
+	u8         reserved_at_53[0xb];
 	u8         fcs_chk[0x1];
 	u8         reserved_at_5f[0x1];
 };
-- 
cgit v1.2.3


From 8e9028b3790ddb02c407e1a4da0ab7cf82790e7e Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 12 Oct 2021 15:42:59 -0500
Subject: PCI: Return NULL for to_pci_driver(NULL)

to_pci_driver() takes a pointer to a struct device_driver and uses
container_of() to find the struct pci_driver that contains it.

If given a NULL pointer to a struct device_driver, return a NULL pci_driver
pointer instead of applying container_of() to NULL.

This simplifies callers that would otherwise have to check for a NULL
pointer first.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..a2d62165a7f7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -900,7 +900,10 @@ struct pci_driver {
 	struct pci_dynids	dynids;
 };
 
-#define	to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
+static inline struct pci_driver *to_pci_driver(struct device_driver *drv)
+{
+    return drv ? container_of(drv, struct pci_driver, driver) : NULL;
+}
 
 /**
  * PCI_DEVICE - macro used to describe a specific PCI device
-- 
cgit v1.2.3


From 28da0555c3b542d605e4ca26eea6a740cf2c9174 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 17:37:25 +0300
Subject: net: dsa: move sja1110_process_meta_tstamp inside the tagging
 protocol driver

The problem is that DSA tagging protocols really must not depend on the
switch driver, because this creates a circular dependency at insmod
time, and the switch driver will effectively not load when the tagging
protocol driver is missing.

The code was structured in the way it was for a reason, though. The DSA
driver-facing API for PTP timestamping relies on the assumption that
two-step TX timestamps are provided by the hardware in an out-of-band
manner, typically by raising an interrupt and making that timestamp
available inside some sort of FIFO which is to be accessed over
SPI/MDIO/etc.

So the API puts .port_txtstamp into dsa_switch_ops, because it is
expected that the switch driver needs to save some state (like put the
skb into a queue until its TX timestamp arrives).

On SJA1110, TX timestamps are provided by the switch as Ethernet
packets, so this makes them be received and processed by the tagging
protocol driver. This in itself is great, because the timestamps are
full 64-bit and do not require reconstruction, and since Ethernet is the
fastest I/O method available to/from the switch, PTP timestamps arrive
very quickly, no matter how bottlenecked the SPI connection is, because
SPI interaction is not needed at all.

DSA's code structure and strict isolation between the tagging protocol
driver and the switch driver break the natural code organization.

When the tagging protocol driver receives a packet which is classified
as a metadata packet containing timestamps, it passes those timestamps
one by one to the switch driver, which then proceeds to compare them
based on the recorded timestamp ID that was generated in .port_txtstamp.

The communication between the tagging protocol and the switch driver is
done through a method exported by the switch driver, sja1110_process_meta_tstamp.
To satisfy build requirements, we force a dependency to build the
tagging protocol driver as a module when the switch driver is a module.
However, as explained in the first paragraph, that causes the circular
dependency.

To solve this, move the skb queue from struct sja1105_private :: struct
sja1105_ptp_data to struct sja1105_private :: struct sja1105_tagger_data.
The latter is a data structure for which hacks have already been put
into place to be able to create persistent storage per switch that is
accessible from the tagging protocol driver (see sja1105_setup_ports).

With the skb queue directly accessible from the tagging protocol driver,
we can now move sja1110_process_meta_tstamp into the tagging driver
itself, and avoid exporting a symbol.

Fixes: 566b18c8b752 ("net: dsa: sja1105: implement TX timestamping for SJA1110")
Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_ptp.c | 45 +++++------------------------------
 drivers/net/dsa/sja1105/sja1105_ptp.h | 19 ---------------
 include/linux/dsa/sja1105.h           | 29 +++++++++++-----------
 net/dsa/tag_sja1105.c                 | 43 +++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 691f6dd7e669..54396992a919 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -64,6 +64,7 @@ enum sja1105_ptp_clk_mode {
 static int sja1105_change_rxtstamping(struct sja1105_private *priv,
 				      bool on)
 {
+	struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_table *table;
@@ -79,7 +80,7 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv,
 		priv->tagger_data.stampable_skb = NULL;
 	}
 	ptp_cancel_worker_sync(ptp_data->clock);
-	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
+	skb_queue_purge(&tagger_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 
 	return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING);
@@ -452,40 +453,6 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
 	return priv->info->rxtstamp(ds, port, skb);
 }
 
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
-				 enum sja1110_meta_tstamp dir, u64 tstamp)
-{
-	struct sja1105_private *priv = ds->priv;
-	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
-	struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
-	struct skb_shared_hwtstamps shwt = {0};
-
-	/* We don't care about RX timestamps on the CPU port */
-	if (dir == SJA1110_META_TSTAMP_RX)
-		return;
-
-	spin_lock(&ptp_data->skb_txtstamp_queue.lock);
-
-	skb_queue_walk_safe(&ptp_data->skb_txtstamp_queue, skb, skb_tmp) {
-		if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
-			continue;
-
-		__skb_unlink(skb, &ptp_data->skb_txtstamp_queue);
-		skb_match = skb;
-
-		break;
-	}
-
-	spin_unlock(&ptp_data->skb_txtstamp_queue.lock);
-
-	if (WARN_ON(!skb_match))
-		return;
-
-	shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
-	skb_complete_tx_timestamp(skb_match, &shwt);
-}
-EXPORT_SYMBOL_GPL(sja1110_process_meta_tstamp);
-
 /* In addition to cloning the skb which is done by the common
  * sja1105_port_txtstamp, we need to generate a timestamp ID and save the
  * packet to the TX timestamping queue.
@@ -494,7 +461,6 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 {
 	struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 	struct sja1105_private *priv = ds->priv;
-	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 	struct sja1105_port *sp = &priv->ports[port];
 	u8 ts_id;
 
@@ -510,7 +476,7 @@ void sja1110_txtstamp(struct dsa_switch *ds, int port, struct sk_buff *skb)
 
 	spin_unlock(&sp->data->meta_lock);
 
-	skb_queue_tail(&ptp_data->skb_txtstamp_queue, clone);
+	skb_queue_tail(&sp->data->skb_txtstamp_queue, clone);
 }
 
 /* Called from dsa_skb_tx_timestamp. This callback is just to clone
@@ -953,7 +919,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
 	/* Only used on SJA1105 */
 	skb_queue_head_init(&ptp_data->skb_rxtstamp_queue);
 	/* Only used on SJA1110 */
-	skb_queue_head_init(&ptp_data->skb_txtstamp_queue);
+	skb_queue_head_init(&tagger_data->skb_txtstamp_queue);
 	spin_lock_init(&tagger_data->meta_lock);
 
 	ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev);
@@ -971,6 +937,7 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
 void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
+	struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
 	struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
 
 	if (IS_ERR_OR_NULL(ptp_data->clock))
@@ -978,7 +945,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
 
 	del_timer_sync(&ptp_data->extts_timer);
 	ptp_cancel_worker_sync(ptp_data->clock);
-	skb_queue_purge(&ptp_data->skb_txtstamp_queue);
+	skb_queue_purge(&tagger_data->skb_txtstamp_queue);
 	skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
 	ptp_clock_unregister(ptp_data->clock);
 	ptp_data->clock = NULL;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 3c874bb4c17b..3ae6b9fdd492 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -8,21 +8,6 @@
 
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
 
-/* Timestamps are in units of 8 ns clock ticks (equivalent to
- * a fixed 125 MHz clock).
- */
-#define SJA1105_TICK_NS			8
-
-static inline s64 ns_to_sja1105_ticks(s64 ns)
-{
-	return ns / SJA1105_TICK_NS;
-}
-
-static inline s64 sja1105_ticks_to_ns(s64 ticks)
-{
-	return ticks * SJA1105_TICK_NS;
-}
-
 /* Calculate the first base_time in the future that satisfies this
  * relationship:
  *
@@ -77,10 +62,6 @@ struct sja1105_ptp_data {
 	struct timer_list extts_timer;
 	/* Used only on SJA1105 to reconstruct partial timestamps */
 	struct sk_buff_head skb_rxtstamp_queue;
-	/* Used on SJA1110 where meta frames are generated only for
-	 * 2-step TX timestamps
-	 */
-	struct sk_buff_head skb_txtstamp_queue;
 	struct ptp_clock_info caps;
 	struct ptp_clock *clock;
 	struct sja1105_ptp_cmd cmd;
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 171106202fe5..0485ab2fcc46 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -48,6 +48,10 @@ struct sja1105_tagger_data {
 	spinlock_t meta_lock;
 	unsigned long state;
 	u8 ts_id;
+	/* Used on SJA1110 where meta frames are generated only for
+	 * 2-step TX timestamps
+	 */
+	struct sk_buff_head skb_txtstamp_queue;
 };
 
 struct sja1105_skb_cb {
@@ -69,25 +73,20 @@ struct sja1105_port {
 	bool hwts_tx_en;
 };
 
-enum sja1110_meta_tstamp {
-	SJA1110_META_TSTAMP_TX = 0,
-	SJA1110_META_TSTAMP_RX = 1,
-};
-
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP)
-
-void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port, u8 ts_id,
-				 enum sja1110_meta_tstamp dir, u64 tstamp);
-
-#else
+/* Timestamps are in units of 8 ns clock ticks (equivalent to
+ * a fixed 125 MHz clock).
+ */
+#define SJA1105_TICK_NS			8
 
-static inline void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
-					       u8 ts_id, enum sja1110_meta_tstamp dir,
-					       u64 tstamp)
+static inline s64 ns_to_sja1105_ticks(s64 ns)
 {
+	return ns / SJA1105_TICK_NS;
 }
 
-#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_PTP) */
+static inline s64 sja1105_ticks_to_ns(s64 ticks)
+{
+	return ticks * SJA1105_TICK_NS;
+}
 
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
 
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index c054f48541c8..2edede9ddac9 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -4,6 +4,7 @@
 #include <linux/if_vlan.h>
 #include <linux/dsa/sja1105.h>
 #include <linux/dsa/8021q.h>
+#include <linux/skbuff.h>
 #include <linux/packing.h>
 #include "dsa_priv.h"
 
@@ -53,6 +54,11 @@
 #define SJA1110_TX_TRAILER_LEN			4
 #define SJA1110_MAX_PADDING_LEN			15
 
+enum sja1110_meta_tstamp {
+	SJA1110_META_TSTAMP_TX = 0,
+	SJA1110_META_TSTAMP_RX = 1,
+};
+
 /* Similar to is_link_local_ether_addr(hdr->h_dest) but also covers PTP */
 static inline bool sja1105_is_link_local(const struct sk_buff *skb)
 {
@@ -520,6 +526,43 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 					      is_meta);
 }
 
+static void sja1110_process_meta_tstamp(struct dsa_switch *ds, int port,
+					u8 ts_id, enum sja1110_meta_tstamp dir,
+					u64 tstamp)
+{
+	struct sk_buff *skb, *skb_tmp, *skb_match = NULL;
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct skb_shared_hwtstamps shwt = {0};
+	struct sja1105_port *sp = dp->priv;
+
+	if (!dsa_port_is_sja1105(dp))
+		return;
+
+	/* We don't care about RX timestamps on the CPU port */
+	if (dir == SJA1110_META_TSTAMP_RX)
+		return;
+
+	spin_lock(&sp->data->skb_txtstamp_queue.lock);
+
+	skb_queue_walk_safe(&sp->data->skb_txtstamp_queue, skb, skb_tmp) {
+		if (SJA1105_SKB_CB(skb)->ts_id != ts_id)
+			continue;
+
+		__skb_unlink(skb, &sp->data->skb_txtstamp_queue);
+		skb_match = skb;
+
+		break;
+	}
+
+	spin_unlock(&sp->data->skb_txtstamp_queue.lock);
+
+	if (WARN_ON(!skb_match))
+		return;
+
+	shwt.hwtstamp = ns_to_ktime(sja1105_ticks_to_ns(tstamp));
+	skb_complete_tx_timestamp(skb_match, &shwt);
+}
+
 static struct sk_buff *sja1110_rcv_meta(struct sk_buff *skb, u16 rx_header)
 {
 	u8 *buf = dsa_etype_header_pos_rx(skb) + SJA1110_HEADER_LEN;
-- 
cgit v1.2.3


From 4ac0567e40b334b54988e3c28a2425ff9c8bdd35 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 22 Sep 2021 17:37:26 +0300
Subject: net: dsa: sja1105: break dependency between dsa_port_is_sja1105 and
 switch driver

It's nice to be able to test a tagging protocol with dsa_loop, but not
at the cost of losing the ability of building the tagging protocol and
switch driver as modules, because as things stand, there is a circular
dependency between the two. Tagging protocol drivers cannot depend on
switch drivers, that is a hard fact.

The reasoning behind the blamed patch was that accessing dp->priv should
first make sure that the structure behind that pointer is what we really
think it is.

Currently the "sja1105" and "sja1110" tagging protocols only operate
with the sja1105 switch driver, just like any other tagging protocol and
switch combination. The only way to mix and match them is by modifying
the code, and this applies to dsa_loop as well (by default that uses
DSA_TAG_PROTO_NONE). So while in principle there is an issue, in
practice there isn't one.

Until we extend dsa_loop to allow user space configuration, treat the
problem as a non-issue and just say that DSA ports found by tag_sja1105
are always sja1105 ports, which is in fact true. But keep the
dsa_port_is_sja1105 function so that it's easy to patch it during
testing, and rely on dead code elimination.

Fixes: 994d2cbb08ca ("net: dsa: tag_sja1105: be dsa_loop-safe")
Link: https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_main.c |  3 +--
 include/linux/dsa/sja1105.h            | 15 +--------------
 net/dsa/Kconfig                        |  1 -
 3 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7c0db80eff00..924c3f129992 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -3117,7 +3117,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	sja1105_static_config_free(&priv->static_config);
 }
 
-const struct dsa_switch_ops sja1105_switch_ops = {
+static const struct dsa_switch_ops sja1105_switch_ops = {
 	.get_tag_protocol	= sja1105_get_tag_protocol,
 	.setup			= sja1105_setup,
 	.teardown		= sja1105_teardown,
@@ -3166,7 +3166,6 @@ const struct dsa_switch_ops sja1105_switch_ops = {
 	.port_bridge_tx_fwd_offload = dsa_tag_8021q_bridge_tx_fwd_offload,
 	.port_bridge_tx_fwd_unoffload = dsa_tag_8021q_bridge_tx_fwd_unoffload,
 };
-EXPORT_SYMBOL_GPL(sja1105_switch_ops);
 
 static const struct of_device_id sja1105_dt_ids[];
 
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 0485ab2fcc46..9e07079528a5 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -88,22 +88,9 @@ static inline s64 sja1105_ticks_to_ns(s64 ticks)
 	return ticks * SJA1105_TICK_NS;
 }
 
-#if IS_ENABLED(CONFIG_NET_DSA_SJA1105)
-
-extern const struct dsa_switch_ops sja1105_switch_ops;
-
-static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
-{
-	return dp->ds->ops == &sja1105_switch_ops;
-}
-
-#else
-
 static inline bool dsa_port_is_sja1105(struct dsa_port *dp)
 {
-	return false;
+	return true;
 }
 
-#endif
-
 #endif /* _NET_DSA_SJA1105_H */
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 548285539752..bca1b5d66df2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -138,7 +138,6 @@ config NET_DSA_TAG_LAN9303
 
 config NET_DSA_TAG_SJA1105
 	tristate "Tag driver for NXP SJA1105 switches"
-	depends on NET_DSA_SJA1105 || !NET_DSA_SJA1105
 	select PACKING
 	help
 	  Say Y or M if you want to enable support for tagging frames with the
-- 
cgit v1.2.3


From deab6b1cd9789bb9bd466d5e76aecb8b336259b4 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 Oct 2021 14:40:40 +0300
Subject: net: dsa: tag_ocelot: break circular dependency with ocelot switch
 lib driver

As explained here:
https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
DSA tagging protocol drivers cannot depend on symbols exported by switch
drivers, because this creates a circular dependency that breaks module
autoloading.

The tag_ocelot.c file depends on the ocelot_ptp_rew_op() function
exported by the common ocelot switch lib. This function looks at
OCELOT_SKB_CB(skb) and computes how to populate the REW_OP field of the
DSA tag, for PTP timestamping (the command: one-step/two-step, and the
TX timestamp identifier).

None of that requires deep insight into the driver, it is quite
stateless, as it only depends upon the skb->cb. So let's make it a
static inline function and put it in include/linux/dsa/ocelot.h, a
file that despite its name is used by the ocelot switch driver for
populating the injection header too - since commit 40d3f295b5fe ("net:
mscc: ocelot: use common tag parsing code with DSA").

With that function declared as static inline, its body is expanded
inside each call site, so the dependency is broken and the DSA tagger
can be built without the switch library, upon which the felix driver
depends.

Fixes: 39e5308b3250 ("net: mscc: ocelot: support PTP Sync one-step timestamping")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mscc/ocelot.c     | 17 ----------------
 drivers/net/ethernet/mscc/ocelot_net.c |  1 +
 include/linux/dsa/ocelot.h             | 37 ++++++++++++++++++++++++++++++++++
 include/soc/mscc/ocelot.h              | 24 ----------------------
 net/dsa/Kconfig                        |  2 --
 net/dsa/tag_ocelot.c                   |  1 -
 net/dsa/tag_ocelot_8021q.c             |  1 +
 7 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index f0044329e3d7..a08e4f530c1c 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -601,23 +601,6 @@ static int ocelot_port_add_txtstamp_skb(struct ocelot *ocelot, int port,
 	return 0;
 }
 
-u32 ocelot_ptp_rew_op(struct sk_buff *skb)
-{
-	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
-	u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd;
-	u32 rew_op = 0;
-
-	if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) {
-		rew_op = ptp_cmd;
-		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
-	} else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
-		rew_op = ptp_cmd;
-	}
-
-	return rew_op;
-}
-EXPORT_SYMBOL(ocelot_ptp_rew_op);
-
 static bool ocelot_ptp_is_onestep_sync(struct sk_buff *skb,
 				       unsigned int ptp_class)
 {
diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c
index 2a85bcb5d0c2..2545727fd5b2 100644
--- a/drivers/net/ethernet/mscc/ocelot_net.c
+++ b/drivers/net/ethernet/mscc/ocelot_net.c
@@ -8,6 +8,7 @@
  * Copyright 2020-2021 NXP
  */
 
+#include <linux/dsa/ocelot.h>
 #include <linux/if_bridge.h>
 #include <linux/of_net.h>
 #include <linux/phy/phy.h>
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 435777a0073c..50641a7529ad 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -6,6 +6,26 @@
 #define _NET_DSA_TAG_OCELOT_H
 
 #include <linux/packing.h>
+#include <linux/skbuff.h>
+
+struct ocelot_skb_cb {
+	struct sk_buff *clone;
+	unsigned int ptp_class; /* valid only for clones */
+	u8 ptp_cmd;
+	u8 ts_id;
+};
+
+#define OCELOT_SKB_CB(skb) \
+	((struct ocelot_skb_cb *)((skb)->cb))
+
+#define IFH_TAG_TYPE_C			0
+#define IFH_TAG_TYPE_S			1
+
+#define IFH_REW_OP_NOOP			0x0
+#define IFH_REW_OP_DSCP			0x1
+#define IFH_REW_OP_ONE_STEP_PTP		0x2
+#define IFH_REW_OP_TWO_STEP_PTP		0x3
+#define IFH_REW_OP_ORIGIN_PTP		0x5
 
 #define OCELOT_TAG_LEN			16
 #define OCELOT_SHORT_PREFIX_LEN		4
@@ -215,4 +235,21 @@ static inline void ocelot_ifh_set_vid(void *injection, u64 vid)
 	packing(injection, &vid, 11, 0, OCELOT_TAG_LEN, PACK, 0);
 }
 
+/* Determine the PTP REW_OP to use for injecting the given skb */
+static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
+{
+	struct sk_buff *clone = OCELOT_SKB_CB(skb)->clone;
+	u8 ptp_cmd = OCELOT_SKB_CB(skb)->ptp_cmd;
+	u32 rew_op = 0;
+
+	if (ptp_cmd == IFH_REW_OP_TWO_STEP_PTP && clone) {
+		rew_op = ptp_cmd;
+		rew_op |= OCELOT_SKB_CB(clone)->ts_id << 3;
+	} else if (ptp_cmd == IFH_REW_OP_ORIGIN_PTP) {
+		rew_op = ptp_cmd;
+	}
+
+	return rew_op;
+}
+
 #endif
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index cabacef8731c..66b2e65c1179 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -89,15 +89,6 @@
 /* Source PGIDs, one per physical port */
 #define PGID_SRC			80
 
-#define IFH_TAG_TYPE_C			0
-#define IFH_TAG_TYPE_S			1
-
-#define IFH_REW_OP_NOOP			0x0
-#define IFH_REW_OP_DSCP			0x1
-#define IFH_REW_OP_ONE_STEP_PTP		0x2
-#define IFH_REW_OP_TWO_STEP_PTP		0x3
-#define IFH_REW_OP_ORIGIN_PTP		0x5
-
 #define OCELOT_NUM_TC			8
 
 #define OCELOT_SPEED_2500		0
@@ -695,16 +686,6 @@ struct ocelot_policer {
 	u32 burst; /* bytes */
 };
 
-struct ocelot_skb_cb {
-	struct sk_buff *clone;
-	unsigned int ptp_class; /* valid only for clones */
-	u8 ptp_cmd;
-	u8 ts_id;
-};
-
-#define OCELOT_SKB_CB(skb) \
-	((struct ocelot_skb_cb *)((skb)->cb))
-
 #define ocelot_read_ix(ocelot, reg, gi, ri) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi) + reg##_RSZ * (ri))
 #define ocelot_read_gix(ocelot, reg, gi) __ocelot_read_ix(ocelot, reg, reg##_GSZ * (gi))
 #define ocelot_read_rix(ocelot, reg, ri) __ocelot_read_ix(ocelot, reg, reg##_RSZ * (ri))
@@ -765,7 +746,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 
-u32 ocelot_ptp_rew_op(struct sk_buff *skb);
 #else
 
 static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp)
@@ -789,10 +769,6 @@ static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
 {
 }
 
-static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb)
-{
-	return 0;
-}
 #endif
 
 /* Hardware initialization */
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index bca1b5d66df2..d166377d7085 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -101,8 +101,6 @@ config NET_DSA_TAG_RTL4_A
 
 config NET_DSA_TAG_OCELOT
 	tristate "Tag driver for Ocelot family of switches, using NPI port"
-	depends on MSCC_OCELOT_SWITCH_LIB || \
-		   (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
 	select PACKING
 	help
 	  Say Y or M if you want to enable NPI tagging for the Ocelot switches
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 8025ed778d33..605b51ca6921 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -2,7 +2,6 @@
 /* Copyright 2019 NXP
  */
 #include <linux/dsa/ocelot.h>
-#include <soc/mscc/ocelot.h>
 #include "dsa_priv.h"
 
 static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev,
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 59072930cb02..1e4e66ea6796 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -9,6 +9,7 @@
  *   that on egress
  */
 #include <linux/dsa/8021q.h>
+#include <linux/dsa/ocelot.h>
 #include <soc/mscc/ocelot.h>
 #include <soc/mscc/ocelot_ptp.h>
 #include "dsa_priv.h"
-- 
cgit v1.2.3


From 49f885b2d97093451410e7279aa29d81e094e108 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 Oct 2021 14:40:41 +0300
Subject: net: dsa: tag_ocelot_8021q: break circular dependency with ocelot
 switch lib

Michael reported that when using the "ocelot-8021q" tagging protocol,
the switch driver module must be manually loaded before the tagging
protocol can be loaded/is available.

This appears to be the same problem described here:
https://lore.kernel.org/netdev/20210908220834.d7gmtnwrorhharna@skbuf/
where due to the fact that DSA tagging protocols make use of symbols
exported by the switch drivers, circular dependencies appear and this
breaks module autoloading.

The ocelot_8021q driver needs the ocelot_can_inject() and
ocelot_port_inject_frame() functions from the switch library. Previously
the wrong approach was taken to solve that dependency: shims were
provided for the case where the ocelot switch library was compiled out,
but that turns out to be insufficient, because the dependency when the
switch lib _is_ compiled is problematic too.

We cannot declare ocelot_can_inject() and ocelot_port_inject_frame() as
static inline functions, because these access I/O functions like
__ocelot_write_ix() which is called by ocelot_write_rix(). Making those
static inline basically means exposing the whole guts of the ocelot
switch library, not ideal...

We already have one tagging protocol driver which calls into the switch
driver during xmit but not using any exported symbol: sja1105_defer_xmit.
We can do the same thing here: create a kthread worker and one work item
per skb, and let the switch driver itself do the register accesses to
send the skb, and then consume it.

Fixes: 0a6f17c6ae21 ("net: dsa: tag_ocelot_8021q: add support for PTP timestamping")
Reported-by: Michael Walle <michael@walle.cc>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/ocelot/felix.c | 96 ++++++++++++++++++++++++++++++++++++++++--
 drivers/net/dsa/ocelot/felix.h |  1 +
 include/linux/dsa/ocelot.h     | 12 ++++++
 include/soc/mscc/ocelot.h      | 27 ------------
 net/dsa/Kconfig                |  2 -
 net/dsa/tag_ocelot_8021q.c     | 38 +++++++++++------
 6 files changed, 130 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 50ef20724958..f8603e068e7c 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1074,6 +1074,73 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
 	return 0;
 }
 
+#define work_to_xmit_work(w) \
+		container_of((w), struct felix_deferred_xmit_work, work)
+
+static void felix_port_deferred_xmit(struct kthread_work *work)
+{
+	struct felix_deferred_xmit_work *xmit_work = work_to_xmit_work(work);
+	struct dsa_switch *ds = xmit_work->dp->ds;
+	struct sk_buff *skb = xmit_work->skb;
+	u32 rew_op = ocelot_ptp_rew_op(skb);
+	struct ocelot *ocelot = ds->priv;
+	int port = xmit_work->dp->index;
+	int retries = 10;
+
+	do {
+		if (ocelot_can_inject(ocelot, 0))
+			break;
+
+		cpu_relax();
+	} while (--retries);
+
+	if (!retries) {
+		dev_err(ocelot->dev, "port %d failed to inject skb\n",
+			port);
+		kfree_skb(skb);
+		return;
+	}
+
+	ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
+
+	consume_skb(skb);
+	kfree(xmit_work);
+}
+
+static int felix_port_setup_tagger_data(struct dsa_switch *ds, int port)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct ocelot *ocelot = ds->priv;
+	struct felix *felix = ocelot_to_felix(ocelot);
+	struct felix_port *felix_port;
+
+	if (!dsa_port_is_user(dp))
+		return 0;
+
+	felix_port = kzalloc(sizeof(*felix_port), GFP_KERNEL);
+	if (!felix_port)
+		return -ENOMEM;
+
+	felix_port->xmit_worker = felix->xmit_worker;
+	felix_port->xmit_work_fn = felix_port_deferred_xmit;
+
+	dp->priv = felix_port;
+
+	return 0;
+}
+
+static void felix_port_teardown_tagger_data(struct dsa_switch *ds, int port)
+{
+	struct dsa_port *dp = dsa_to_port(ds, port);
+	struct felix_port *felix_port = dp->priv;
+
+	if (!felix_port)
+		return;
+
+	dp->priv = NULL;
+	kfree(felix_port);
+}
+
 /* Hardware initialization done here so that we can allocate structures with
  * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
  * us to allocate structures twice (leak memory) and map PCI memory twice
@@ -1102,6 +1169,12 @@ static int felix_setup(struct dsa_switch *ds)
 		}
 	}
 
+	felix->xmit_worker = kthread_create_worker(0, "felix_xmit");
+	if (IS_ERR(felix->xmit_worker)) {
+		err = PTR_ERR(felix->xmit_worker);
+		goto out_deinit_timestamp;
+	}
+
 	for (port = 0; port < ds->num_ports; port++) {
 		if (dsa_is_unused_port(ds, port))
 			continue;
@@ -1112,6 +1185,14 @@ static int felix_setup(struct dsa_switch *ds)
 		 * bits of vlan tag.
 		 */
 		felix_port_qos_map_init(ocelot, port);
+
+		err = felix_port_setup_tagger_data(ds, port);
+		if (err) {
+			dev_err(ds->dev,
+				"port %d failed to set up tagger data: %pe\n",
+				port, ERR_PTR(err));
+			goto out_deinit_ports;
+		}
 	}
 
 	err = ocelot_devlink_sb_register(ocelot);
@@ -1138,9 +1219,13 @@ out_deinit_ports:
 		if (dsa_is_unused_port(ds, port))
 			continue;
 
+		felix_port_teardown_tagger_data(ds, port);
 		ocelot_deinit_port(ocelot, port);
 	}
 
+	kthread_destroy_worker(felix->xmit_worker);
+
+out_deinit_timestamp:
 	ocelot_deinit_timestamp(ocelot);
 	ocelot_deinit(ocelot);
 
@@ -1164,17 +1249,20 @@ static void felix_teardown(struct dsa_switch *ds)
 		felix_del_tag_protocol(ds, port, felix->tag_proto);
 	}
 
-	ocelot_devlink_sb_unregister(ocelot);
-	ocelot_deinit_timestamp(ocelot);
-	ocelot_deinit(ocelot);
-
 	for (port = 0; port < ocelot->num_phys_ports; port++) {
 		if (dsa_is_unused_port(ds, port))
 			continue;
 
+		felix_port_teardown_tagger_data(ds, port);
 		ocelot_deinit_port(ocelot, port);
 	}
 
+	kthread_destroy_worker(felix->xmit_worker);
+
+	ocelot_devlink_sb_unregister(ocelot);
+	ocelot_deinit_timestamp(ocelot);
+	ocelot_deinit(ocelot);
+
 	if (felix->info->mdio_bus_free)
 		felix->info->mdio_bus_free(ocelot);
 }
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 54024b6f9498..be3e42e135c0 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -62,6 +62,7 @@ struct felix {
 	resource_size_t			switch_base;
 	resource_size_t			imdio_base;
 	enum dsa_tag_protocol		tag_proto;
+	struct kthread_worker		*xmit_worker;
 };
 
 struct net_device *felix_port_to_netdev(struct ocelot *ocelot, int port);
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index 50641a7529ad..8ae999f587c4 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -5,6 +5,7 @@
 #ifndef _NET_DSA_TAG_OCELOT_H
 #define _NET_DSA_TAG_OCELOT_H
 
+#include <linux/kthread.h>
 #include <linux/packing.h>
 #include <linux/skbuff.h>
 
@@ -160,6 +161,17 @@ struct ocelot_skb_cb {
  *         +------+------+------+------+------+------+------+------+
  */
 
+struct felix_deferred_xmit_work {
+	struct dsa_port *dp;
+	struct sk_buff *skb;
+	struct kthread_work work;
+};
+
+struct felix_port {
+	void (*xmit_work_fn)(struct kthread_work *work);
+	struct kthread_worker *xmit_worker;
+};
+
 static inline void ocelot_xfh_get_rew_val(void *extraction, u64 *rew_val)
 {
 	packing(extraction, rew_val, 116, 85, OCELOT_TAG_LEN, UNPACK, 0);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 66b2e65c1179..d7055b41982d 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -737,8 +737,6 @@ u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target,
 void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target,
 			      u32 val, u32 reg, u32 offset);
 
-#if IS_ENABLED(CONFIG_MSCC_OCELOT_SWITCH_LIB)
-
 /* Packet I/O */
 bool ocelot_can_inject(struct ocelot *ocelot, int grp);
 void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
@@ -746,31 +744,6 @@ void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp,
 int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb);
 void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp);
 
-#else
-
-static inline bool ocelot_can_inject(struct ocelot *ocelot, int grp)
-{
-	return false;
-}
-
-static inline void ocelot_port_inject_frame(struct ocelot *ocelot, int port,
-					    int grp, u32 rew_op,
-					    struct sk_buff *skb)
-{
-}
-
-static inline int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp,
-					struct sk_buff **skb)
-{
-	return -EIO;
-}
-
-static inline void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp)
-{
-}
-
-#endif
-
 /* Hardware initialization */
 int ocelot_regfields_init(struct ocelot *ocelot,
 			  const struct reg_field *const regfields);
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index d166377d7085..d8ee15f1c7a9 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -112,8 +112,6 @@ config NET_DSA_TAG_OCELOT
 
 config NET_DSA_TAG_OCELOT_8021Q
 	tristate "Tag driver for Ocelot family of switches, using VLAN"
-	depends on MSCC_OCELOT_SWITCH_LIB || \
-	          (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
 	help
 	  Say Y or M if you want to enable support for tagging frames with a
 	  custom VLAN-based header. Frames that require timestamping, such as
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 1e4e66ea6796..d05c352f96e5 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -10,10 +10,31 @@
  */
 #include <linux/dsa/8021q.h>
 #include <linux/dsa/ocelot.h>
-#include <soc/mscc/ocelot.h>
-#include <soc/mscc/ocelot_ptp.h>
 #include "dsa_priv.h"
 
+static struct sk_buff *ocelot_defer_xmit(struct dsa_port *dp,
+					 struct sk_buff *skb)
+{
+	struct felix_deferred_xmit_work *xmit_work;
+	struct felix_port *felix_port = dp->priv;
+
+	xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+	if (!xmit_work)
+		return NULL;
+
+	/* Calls felix_port_deferred_xmit in felix.c */
+	kthread_init_work(&xmit_work->work, felix_port->xmit_work_fn);
+	/* Increase refcount so the kfree_skb in dsa_slave_xmit
+	 * won't really free the packet.
+	 */
+	xmit_work->dp = dp;
+	xmit_work->skb = skb_get(skb);
+
+	kthread_queue_work(felix_port->xmit_worker, &xmit_work->work);
+
+	return NULL;
+}
+
 static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
@@ -21,18 +42,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
-	struct ocelot *ocelot = dp->ds->priv;
-	int port = dp->index;
-	u32 rew_op = 0;
 
-	rew_op = ocelot_ptp_rew_op(skb);
-	if (rew_op) {
-		if (!ocelot_can_inject(ocelot, 0))
-			return NULL;
-
-		ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb);
-		return NULL;
-	}
+	if (ocelot_ptp_rew_op(skb))
+		return ocelot_defer_xmit(dp, skb);
 
 	return dsa_8021q_xmit(skb, netdev, ETH_P_8021Q,
 			      ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
-- 
cgit v1.2.3


From 5008062f1c3f5af3acf86164aa6fcc77b0c7bdce Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 13 Oct 2021 14:19:56 +0100
Subject: nvmem: core: add nvmem cell post processing callback

Some NVMEM providers have certain nvmem cells encoded, which requires
post processing before actually using it.

For example mac-address is stored in either in ascii or delimited or reverse-order.

Having a post-process callback hook to provider drivers would enable them to
do this vendor specific post processing before nvmem consumers see it.

Tested-by: Joakim Zhang <qiangqing.zhang@nxp.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20211013131957.30271-3-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 9 +++++++++
 include/linux/nvmem-provider.h | 5 +++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index f37d7838d2e3..d7013c9b0fae 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -38,6 +38,7 @@ struct nvmem_device {
 	unsigned int		nkeepout;
 	nvmem_reg_read_t	reg_read;
 	nvmem_reg_write_t	reg_write;
+	nvmem_cell_post_process_t cell_post_process;
 	struct gpio_desc	*wp_gpio;
 	void *priv;
 };
@@ -796,6 +797,7 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	nvmem->type = config->type;
 	nvmem->reg_read = config->reg_read;
 	nvmem->reg_write = config->reg_write;
+	nvmem->cell_post_process = config->cell_post_process;
 	nvmem->keepout = config->keepout;
 	nvmem->nkeepout = config->nkeepout;
 	if (config->of_node)
@@ -1440,6 +1442,13 @@ static int __nvmem_cell_read(struct nvmem_device *nvmem,
 	if (cell->bit_offset || cell->nbits)
 		nvmem_shift_read_buffer_in_place(cell, buf);
 
+	if (nvmem->cell_post_process) {
+		rc = nvmem->cell_post_process(nvmem->priv, id,
+					      cell->offset, buf, cell->bytes);
+		if (rc)
+			return rc;
+	}
+
 	if (len)
 		*len = cell->bytes;
 
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 104505e9028f..98efb7b5660d 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -19,6 +19,9 @@ typedef int (*nvmem_reg_read_t)(void *priv, unsigned int offset,
 				void *val, size_t bytes);
 typedef int (*nvmem_reg_write_t)(void *priv, unsigned int offset,
 				 void *val, size_t bytes);
+/* used for vendor specific post processing of cell data */
+typedef int (*nvmem_cell_post_process_t)(void *priv, const char *id, unsigned int offset,
+					  void *buf, size_t bytes);
 
 enum nvmem_type {
 	NVMEM_TYPE_UNKNOWN = 0,
@@ -62,6 +65,7 @@ struct nvmem_keepout {
  * @no_of_node:	Device should not use the parent's of_node even if it's !NULL.
  * @reg_read:	Callback to read data.
  * @reg_write:	Callback to write data.
+ * @cell_post_process:	Callback for vendor specific post processing of cell data
  * @size:	Device size.
  * @word_size:	Minimum read/write access granularity.
  * @stride:	Minimum read/write access stride.
@@ -92,6 +96,7 @@ struct nvmem_config {
 	bool			no_of_node;
 	nvmem_reg_read_t	reg_read;
 	nvmem_reg_write_t	reg_write;
+	nvmem_cell_post_process_t cell_post_process;
 	int	size;
 	int	word_size;
 	int	stride;
-- 
cgit v1.2.3


From 16c663642c7ec03cd4cee5fec520bb69e97babe4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 12 Oct 2021 11:57:22 -0400
Subject: SUNRPC: Replace the "__be32 *p" parameter to .pc_decode

The passed-in value of the "__be32 *p" parameter is now unused in
every server-side XDR decoder, and can be removed.

Note also that there is a line in each decoder that sets up a local
pointer to a struct xdr_stream. Passing that pointer from the
dispatcher instead saves one line per decoder function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c             |  3 +--
 fs/lockd/xdr.c             | 27 ++++++++++-----------------
 fs/lockd/xdr4.c            | 27 ++++++++++-----------------
 fs/nfsd/nfs2acl.c          | 12 ++++++------
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3xdr.c          | 45 +++++++++++++++------------------------------
 fs/nfsd/nfs4xdr.c          |  4 ++--
 fs/nfsd/nfsd.h             |  3 ++-
 fs/nfsd/nfssvc.c           |  7 +++----
 fs/nfsd/nfsxdr.c           | 30 ++++++++++--------------------
 fs/nfsd/xdr.h              | 21 +++++++++++----------
 fs/nfsd/xdr3.h             | 31 ++++++++++++++++---------------
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/lockd/xdr.h  | 19 ++++++++++---------
 include/linux/lockd/xdr4.h | 19 +++++++++----------
 include/linux/sunrpc/svc.h |  3 ++-
 16 files changed, 112 insertions(+), 149 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b632be3ad57b..9a82471bda07 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -780,11 +780,10 @@ module_exit(exit_nlm);
 static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	const struct svc_procedure *procp = rqstp->rq_procinfo;
-	struct kvec *argv = rqstp->rq_arg.head;
 	struct kvec *resv = rqstp->rq_res.head;
 
 	svcxdr_init_decode(rqstp);
-	if (!procp->pc_decode(rqstp, argv->iov_base))
+	if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream))
 		goto out_decode_err;
 
 	*statp = procp->pc_func(rqstp);
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 9235e60b1769..895f15222104 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -146,15 +146,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
  */
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
 
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -171,9 +170,8 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -197,9 +195,8 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -218,9 +215,8 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
@@ -233,9 +229,8 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_res *resp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
@@ -247,10 +242,10 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_reboot *argp = rqstp->rq_argp;
+	__be32 *p;
 	u32 len;
 
 	if (xdr_stream_decode_u32(xdr, &len) < 0)
@@ -273,9 +268,8 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -301,9 +295,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 98e957e4566c..573c7d580a5e 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -145,15 +145,14 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
  */
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
 
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -170,9 +169,8 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -196,9 +194,8 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
@@ -216,9 +213,8 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
@@ -231,9 +227,8 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_res *resp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
@@ -245,10 +240,10 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_reboot *argp = rqstp->rq_argp;
+	__be32 *p;
 	u32 len;
 
 	if (xdr_stream_decode_u32(xdr, &len) < 0)
@@ -271,9 +266,8 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -299,9 +293,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4b43929c1f25..0069c0fdb94f 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -188,9 +188,9 @@ out:
  * XDR decode functions
  */
 
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
@@ -201,9 +201,9 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 	return 1;
 }
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
@@ -222,9 +222,9 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
 	return 1;
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_accessargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 5dfe7644a517..b1e352ed2436 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -127,9 +127,9 @@ out:
  * XDR decode functions
  */
 
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_getaclargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
@@ -140,9 +140,9 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
 	return 1;
 }
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 267e56f218af..5f744f03cda7 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -557,18 +557,16 @@ void fill_post_wcc(struct svc_fh *fhp)
  */
 
 int
-nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_fhandle *args = rqstp->rq_argp;
 
 	return svcxdr_decode_nfs_fh3(xdr, &args->fh);
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_sattrargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_nfs_fh3(xdr, &args->fh) &&
@@ -577,18 +575,16 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_diropargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_accessargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
@@ -600,9 +596,8 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_readargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
@@ -616,9 +611,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
@@ -649,9 +643,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_createargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
@@ -674,9 +667,8 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_createargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs3(xdr, &args->fh,
@@ -685,9 +677,8 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 	struct kvec *tail = rqstp->rq_arg.tail;
@@ -713,9 +704,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_mknodargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
@@ -742,9 +732,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_renameargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs3(xdr, &args->ffh,
@@ -754,9 +743,8 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_linkargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_nfs_fh3(xdr, &args->ffh) &&
@@ -765,9 +753,8 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
@@ -784,9 +771,8 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	u32 dircount;
 
@@ -807,9 +793,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd3_commitargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a54b2845473b..fc0f154f1172 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5412,14 +5412,14 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
 
 	/* svcxdr_tmp_alloc */
 	args->to_free = NULL;
 
-	args->xdr = &rqstp->rq_arg_stream;
+	args->xdr = xdr;
 	args->ops = args->iops;
 	args->rqstp = rqstp;
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 9664303afdaf..6e8ad5f9757c 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -78,7 +78,8 @@ extern const struct seq_operations nfs_exports_op;
  */
 struct nfsd_voidargs { };
 struct nfsd_voidres { };
-int		nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p);
+int		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+				      struct xdr_stream *xdr);
 int		nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ccb59e91011b..7cd13e9474ff 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1004,7 +1004,6 @@ out:
 int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
-	struct kvec *argv = &rqstp->rq_arg.head[0];
 	struct kvec *resv = &rqstp->rq_res.head[0];
 	__be32 *p;
 
@@ -1015,7 +1014,7 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	rqstp->rq_cachetype = proc->pc_cachetype;
 
 	svcxdr_init_decode(rqstp);
-	if (!proc->pc_decode(rqstp, argv->iov_base))
+	if (!proc->pc_decode(rqstp, &rqstp->rq_arg_stream))
 		goto out_decode_err;
 
 	switch (nfsd_cache_lookup(rqstp)) {
@@ -1065,13 +1064,13 @@ out_encode_err:
 /**
  * nfssvc_decode_voidarg - Decode void arguments
  * @rqstp: Server RPC transaction context
- * @p: buffer containing arguments to decode
+ * @xdr: XDR stream positioned at arguments to decode
  *
  * Return values:
  *   %0: Arguments were not valid
  *   %1: Decoding was successful
  */
-int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index ddcc18adfeb1..08e899180ee4 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -273,18 +273,16 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
  */
 
 int
-nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_fhandle *args = rqstp->rq_argp;
 
 	return svcxdr_decode_fhandle(xdr, &args->fh);
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_sattrargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_fhandle(xdr, &args->fh) &&
@@ -292,18 +290,16 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_diropargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_readargs *args = rqstp->rq_argp;
 	u32 totalcount;
 
@@ -321,9 +317,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_writeargs *args = rqstp->rq_argp;
 	u32 beginoffset, totalcount;
 
@@ -350,9 +345,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_createargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs(xdr, &args->fh,
@@ -361,9 +355,8 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_renameargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_diropargs(xdr, &args->ffh,
@@ -373,9 +366,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_linkargs *args = rqstp->rq_argp;
 
 	return svcxdr_decode_fhandle(xdr, &args->ffh) &&
@@ -384,9 +376,8 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 
@@ -405,9 +396,8 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_arg_stream;
 	struct nfsd_readdirargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 80fd6d7f3404..804f9af94d6d 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -141,16 +141,17 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_fhandleargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_readargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_createargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
+int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
 int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
 int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
 int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 712c117300cb..60a8909205e5 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -265,21 +265,22 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandleargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
+int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
 int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 3e4052e3bd50..1d1b8771bdcf 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -756,7 +756,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index a98309c0121c..170ad6f5596a 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,18 +96,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *);
+int	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_res(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_void(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *);
 
 #endif /* LOCKD_XDR_H */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 5ae766f26e04..68e14e0f2b1f 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -22,21 +22,20 @@
 #define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
 #define	nlm4_failed		cpu_to_be32(NLM_FAILED)
 
+int	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-
-int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_res(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_void(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *);
 
 extern const struct rpc_version nlm_version4;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 4205a6ef4770..da3c5bc43d85 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -454,7 +454,8 @@ struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
-	int			(*pc_decode)(struct svc_rqst *, __be32 *data);
+	int			(*pc_decode)(struct svc_rqst *rqstp,
+					     struct xdr_stream *xdr);
 	/* XDR encode result: */
 	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
 	/* XDR free result: */
-- 
cgit v1.2.3


From c44b31c263798ec34614dd394c31ef1a2e7e716e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 12 Oct 2021 11:57:28 -0400
Subject: SUNRPC: Change return value type of .pc_decode

Returning an undecorated integer is an age-old trope, but it's
not clear (even to previous experts in this code) that the only
valid return values are 1 and 0. These functions do not return
a negative errno, rpc_stat value, or a positive length.

Document there are only two valid return values by having
.pc_decode return only true or false.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/xdr.c             |  96 ++++++++++++++++++------------------
 fs/lockd/xdr4.c            |  97 +++++++++++++++++++------------------
 fs/nfsd/nfs2acl.c          |  30 ++++++------
 fs/nfsd/nfs3acl.c          |  22 ++++-----
 fs/nfsd/nfs3xdr.c          | 118 ++++++++++++++++++++++-----------------------
 fs/nfsd/nfs4xdr.c          |  24 ++++-----
 fs/nfsd/nfsd.h             |   2 +-
 fs/nfsd/nfssvc.c           |   6 +--
 fs/nfsd/nfsxdr.c           |  62 ++++++++++++------------
 fs/nfsd/xdr.h              |  20 ++++----
 fs/nfsd/xdr3.h             |  30 ++++++------
 fs/nfsd/xdr4.h             |   2 +-
 include/linux/lockd/xdr.h  |  18 +++----
 include/linux/lockd/xdr4.h |  18 +++----
 include/linux/sunrpc/svc.h |   2 +-
 15 files changed, 274 insertions(+), 273 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 895f15222104..622c2ca37dbf 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -145,103 +145,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
  * Decode Call arguments
  */
 
-int
+bool
 nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 	if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 	argp->monitor = 1;		/* monitor client by default */
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	argp->lock.fl.fl_type = F_UNLCK;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_stats(xdr, &resp->status))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_reboot *argp = rqstp->rq_argp;
@@ -249,25 +249,25 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	u32 len;
 
 	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return 0;
+		return false;
 	if (len > SM_MAXSTRLEN)
-		return 0;
+		return false;
 	p = xdr_inline_decode(xdr, len);
 	if (!p)
-		return 0;
+		return false;
 	argp->len = len;
 	argp->mon = (char *)p;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 	p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
 	if (!p)
-		return 0;
+		return false;
 	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
@@ -278,34 +278,34 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	lock->svid = ~(u32)0;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return 0;
+		return false;
 	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return 0;
+		return false;
 	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return 0;
+		return false;
 	/* XXX: Range checks are missing in the original code */
 	if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 573c7d580a5e..45551dee26b4 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -144,102 +144,103 @@ svcxdr_encode_testrply(struct xdr_stream *xdr, const struct nlm_res *resp)
  * Decode Call arguments
  */
 
-int
+bool
 nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
 	if (xdr_stream_decode_bool(xdr, &argp->reclaim) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 	argp->monitor = 1;		/* monitor client by default */
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	u32 exclusive;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &argp->block) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_bool(xdr, &exclusive) < 0)
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	if (exclusive)
 		argp->lock.fl.fl_type = F_WRLCK;
-	return 1;
+
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_lock(xdr, &argp->lock))
-		return 0;
+		return false;
 	argp->lock.fl.fl_type = F_UNLCK;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_cookie(xdr, &resp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_stats(xdr, &resp->status))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_reboot *argp = rqstp->rq_argp;
@@ -247,25 +248,25 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	u32 len;
 
 	if (xdr_stream_decode_u32(xdr, &len) < 0)
-		return 0;
+		return false;
 	if (len > SM_MAXSTRLEN)
-		return 0;
+		return false;
 	p = xdr_inline_decode(xdr, len);
 	if (!p)
-		return 0;
+		return false;
 	argp->len = len;
 	argp->mon = (char *)p;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 	p = xdr_inline_decode(xdr, SM_PRIV_SIZE);
 	if (!p)
-		return 0;
+		return false;
 	memcpy(&argp->priv.data, p, sizeof(argp->priv.data));
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
@@ -276,34 +277,34 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	lock->svid = ~(u32)0;
 
 	if (!svcxdr_decode_cookie(xdr, &argp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return 0;
+		return false;
 	if (!svcxdr_decode_fhandle(xdr, &lock->fh))
-		return 0;
+		return false;
 	if (!svcxdr_decode_owner(xdr, &lock->oh))
-		return 0;
+		return false;
 	/* XXX: Range checks are missing in the original code */
 	if (xdr_stream_decode_u32(xdr, &argp->fsm_mode) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->fsm_access) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_args *argp = rqstp->rq_argp;
 	struct nlm_lock	*lock = &argp->lock;
 
 	if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->state) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 0069c0fdb94f..cf6ba5e7937e 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -188,51 +188,51 @@ out:
  * XDR decode functions
  */
 
-static int
+static bool
 nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_getaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-static int
+static bool
 nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &argp->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return 0;
+		return false;
 	if (argp->mask & ~NFS_ACL_MASK)
-		return 0;
+		return false;
 	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
 				   &argp->acl_access : NULL))
-		return 0;
+		return false;
 	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
 				   &argp->acl_default : NULL))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-static int
+static bool
 nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_accessargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->access) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index b1e352ed2436..9e9f6afb2e00 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -127,38 +127,38 @@ out:
  * XDR decode functions
  */
 
-static int
+static bool
 nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_getaclargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->mask) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-static int
+static bool
 nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_setaclargs *argp = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &argp->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &argp->mask) < 0)
-		return 0;
+		return false;
 	if (argp->mask & ~NFS_ACL_MASK)
-		return 0;
+		return false;
 	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_ACL) ?
 				   &argp->acl_access : NULL))
-		return 0;
+		return false;
 	if (!nfs_stream_decode_acl(xdr, NULL, (argp->mask & NFS_DFACL) ?
 				   &argp->acl_default : NULL))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 5f744f03cda7..1f3de46d24d4 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -556,7 +556,7 @@ void fill_post_wcc(struct svc_fh *fhp)
  * XDR decode functions
  */
 
-int
+bool
 nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_fhandle *args = rqstp->rq_argp;
@@ -564,7 +564,7 @@ nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return svcxdr_decode_nfs_fh3(xdr, &args->fh);
 }
 
-int
+bool
 nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_sattrargs *args = rqstp->rq_argp;
@@ -574,7 +574,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_decode_sattrguard3(xdr, args);
 }
 
-int
+bool
 nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_diropargs *args = rqstp->rq_argp;
@@ -582,75 +582,75 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len);
 }
 
-int
+bool
 nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_accessargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->access) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_writeargs *args = rqstp->rq_argp;
 	u32 max_blocksize = svc_max_payload(rqstp);
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->stable) < 0)
-		return 0;
+		return false;
 
 	/* opaque data */
 	if (xdr_stream_decode_u32(xdr, &args->len) < 0)
-		return 0;
+		return false;
 
 	/* request sanity */
 	if (args->count != args->len)
-		return 0;
+		return false;
 	if (args->count > max_blocksize) {
 		args->count = max_blocksize;
 		args->len = max_blocksize;
 	}
 	if (!xdr_stream_subsegment(xdr, &args->payload, args->count))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_createargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->createmode) < 0)
-		return 0;
+		return false;
 	switch (args->createmode) {
 	case NFS3_CREATE_UNCHECKED:
 	case NFS3_CREATE_GUARDED:
@@ -658,15 +658,15 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	case NFS3_CREATE_EXCLUSIVE:
 		args->verf = xdr_inline_decode(xdr, NFS3_CREATEVERFSIZE);
 		if (!args->verf)
-			return 0;
+			return false;
 		break;
 	default:
-		return 0;
+		return false;
 	}
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_createargs *args = rqstp->rq_argp;
@@ -676,7 +676,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_decode_sattr3(rqstp, xdr, &args->attrs);
 }
 
-int
+bool
 nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_symlinkargs *args = rqstp->rq_argp;
@@ -685,33 +685,33 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	size_t remaining;
 
 	if (!svcxdr_decode_diropargs3(xdr, &args->ffh, &args->fname, &args->flen))
-		return 0;
+		return false;
 	if (!svcxdr_decode_sattr3(rqstp, xdr, &args->attrs))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
-		return 0;
+		return false;
 
 	/* request sanity */
 	remaining = head->iov_len + rqstp->rq_arg.page_len + tail->iov_len;
 	remaining -= xdr_stream_pos(xdr);
 	if (remaining < xdr_align_size(args->tlen))
-		return 0;
+		return false;
 
 	args->first.iov_base = xdr->p;
 	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_mknodargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_diropargs3(xdr, &args->fh, &args->name, &args->len))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->ftype) < 0)
-		return 0;
+		return false;
 	switch (args->ftype) {
 	case NF3CHR:
 	case NF3BLK:
@@ -725,13 +725,13 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		/* Valid XDR but illegal file types */
 		break;
 	default:
-		return 0;
+		return false;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_renameargs *args = rqstp->rq_argp;
@@ -742,7 +742,7 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 					 &args->tname, &args->tlen);
 }
 
-int
+bool
 nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_linkargs *args = rqstp->rq_argp;
@@ -752,59 +752,59 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 					 &args->tname, &args->tlen);
 }
 
-int
+bool
 nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
-		return 0;
+		return false;
 	args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
 	if (!args->verf)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readdirargs *args = rqstp->rq_argp;
 	u32 dircount;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u64(xdr, &args->cookie) < 0)
-		return 0;
+		return false;
 	args->verf = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
 	if (!args->verf)
-		return 0;
+		return false;
 	/* dircount is ignored */
 	if (xdr_stream_decode_u32(xdr, &dircount) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_commitargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_nfs_fh3(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u64(xdr, &args->offset) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fc0f154f1172..dd1ee9ada7dd 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2322,7 +2322,7 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
 	return true;
 }
 
-static int
+static bool
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
 	struct nfsd4_op *op;
@@ -2335,25 +2335,25 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	int i;
 
 	if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0)
-		return 0;
+		return false;
 	max_reply += XDR_UNIT;
 	argp->tag = NULL;
 	if (unlikely(argp->taglen)) {
 		if (argp->taglen > NFSD4_MAX_TAGLEN)
-			return 0;
+			return false;
 		p = xdr_inline_decode(argp->xdr, argp->taglen);
 		if (!p)
-			return 0;
+			return false;
 		argp->tag = svcxdr_savemem(argp, p, argp->taglen);
 		if (!argp->tag)
-			return 0;
+			return false;
 		max_reply += xdr_align_size(argp->taglen);
 	}
 
 	if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
-		return 0;
+		return false;
 
 	/*
 	 * NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
@@ -2361,14 +2361,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	 * nfsd4_proc can handle this is an NFS-level error.
 	 */
 	if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
-		return 1;
+		return true;
 
 	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
 		argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
 		if (!argp->ops) {
 			argp->ops = argp->iops;
 			dprintk("nfsd: couldn't allocate room for COMPOUND\n");
-			return 0;
+			return false;
 		}
 	}
 
@@ -2380,7 +2380,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 		op->replay = NULL;
 
 		if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
-			return 0;
+			return false;
 		if (nfsd4_opnum_in_range(argp, op)) {
 			op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
 			if (op->status != nfs_ok)
@@ -2427,7 +2427,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
 		clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
 
-	return 1;
+	return true;
 }
 
 static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
@@ -5411,7 +5411,7 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
 	}
 }
 
-int
+bool
 nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd4_compoundargs *args = rqstp->rq_argp;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 6e8ad5f9757c..bfcddd4c7534 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -78,7 +78,7 @@ extern const struct seq_operations nfs_exports_op;
  */
 struct nfsd_voidargs { };
 struct nfsd_voidres { };
-int		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
+bool		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr);
 int		nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
 
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 7cd13e9474ff..beb564e8a3db 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1067,10 +1067,10 @@ out_encode_err:
  * @xdr: XDR stream positioned at arguments to decode
  *
  * Return values:
- *   %0: Arguments were not valid
- *   %1: Decoding was successful
+ *   %false: Arguments were not valid
+ *   %true: Decoding was successful
  */
-int nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 08e899180ee4..b5817a41b3de 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -272,7 +272,7 @@ svcxdr_encode_fattr(struct svc_rqst *rqstp, struct xdr_stream *xdr,
  * XDR decode functions
  */
 
-int
+bool
 nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_fhandle *args = rqstp->rq_argp;
@@ -280,7 +280,7 @@ nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return svcxdr_decode_fhandle(xdr, &args->fh);
 }
 
-int
+bool
 nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_sattrargs *args = rqstp->rq_argp;
@@ -289,7 +289,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
 }
 
-int
+bool
 nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_diropargs *args = rqstp->rq_argp;
@@ -297,54 +297,54 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return svcxdr_decode_diropargs(xdr, &args->fh, &args->name, &args->len);
 }
 
-int
+bool
 nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_readargs *args = rqstp->rq_argp;
 	u32 totalcount;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 	/* totalcount is ignored */
 	if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_writeargs *args = rqstp->rq_argp;
 	u32 beginoffset, totalcount;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return 0;
+		return false;
 	/* beginoffset is ignored */
 	if (xdr_stream_decode_u32(xdr, &beginoffset) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->offset) < 0)
-		return 0;
+		return false;
 	/* totalcount is ignored */
 	if (xdr_stream_decode_u32(xdr, &totalcount) < 0)
-		return 0;
+		return false;
 
 	/* opaque data */
 	if (xdr_stream_decode_u32(xdr, &args->len) < 0)
-		return 0;
+		return false;
 	if (args->len > NFSSVC_MAXBLKSIZE_V2)
-		return 0;
+		return false;
 	if (!xdr_stream_subsegment(xdr, &args->payload, args->len))
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_createargs *args = rqstp->rq_argp;
@@ -354,7 +354,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
 }
 
-int
+bool
 nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_renameargs *args = rqstp->rq_argp;
@@ -365,7 +365,7 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 					&args->tname, &args->tlen);
 }
 
-int
+bool
 nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_linkargs *args = rqstp->rq_argp;
@@ -375,39 +375,39 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 					&args->tname, &args->tlen);
 }
 
-int
+bool
 nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_symlinkargs *args = rqstp->rq_argp;
 	struct kvec *head = rqstp->rq_arg.head;
 
 	if (!svcxdr_decode_diropargs(xdr, &args->ffh, &args->fname, &args->flen))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->tlen) < 0)
-		return 0;
+		return false;
 	if (args->tlen == 0)
-		return 0;
+		return false;
 
 	args->first.iov_len = head->iov_len - xdr_stream_pos(xdr);
 	args->first.iov_base = xdr_inline_decode(xdr, args->tlen);
 	if (!args->first.iov_base)
-		return 0;
+		return false;
 	return svcxdr_decode_sattr(rqstp, xdr, &args->attrs);
 }
 
-int
+bool
 nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_readdirargs *args = rqstp->rq_argp;
 
 	if (!svcxdr_decode_fhandle(xdr, &args->fh))
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->cookie) < 0)
-		return 0;
+		return false;
 	if (xdr_stream_decode_u32(xdr, &args->count) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 804f9af94d6d..31be7d30e64e 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -141,16 +141,16 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
 int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 60a8909205e5..ef72bc4868da 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -265,21 +265,21 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_fhandleargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_diropargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_writeargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_createargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_renameargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
 int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 1d1b8771bdcf..8812256cd520 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -756,7 +756,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 170ad6f5596a..e1362244f909 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -96,15 +96,15 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 68e14e0f2b1f..376b8f6a3763 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -22,15 +22,15 @@
 #define	nlm4_fbig		cpu_to_be32(NLM_FBIG)
 #define	nlm4_failed		cpu_to_be32(NLM_FAILED)
 
-int	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_cancargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
 int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index da3c5bc43d85..d6109fa7a57b 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -454,7 +454,7 @@ struct svc_procedure {
 	/* process the request: */
 	__be32			(*pc_func)(struct svc_rqst *);
 	/* XDR decode args: */
-	int			(*pc_decode)(struct svc_rqst *rqstp,
+	bool			(*pc_decode)(struct svc_rqst *rqstp,
 					     struct xdr_stream *xdr);
 	/* XDR encode result: */
 	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
-- 
cgit v1.2.3


From fda494411485aff91768842c532f90fb8eb54943 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 13 Oct 2021 10:41:06 -0400
Subject: SUNRPC: Replace the "__be32 *p" parameter to .pc_encode

The passed-in value of the "__be32 *p" parameter is now unused in
every server-side XDR encoder, and can be removed.

Note also that there is a line in each encoder that sets up a local
pointer to a struct xdr_stream. Passing that pointer from the
dispatcher instead saves one line per encoder function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c             |  3 +--
 fs/lockd/xdr.c             | 11 ++++-------
 fs/lockd/xdr4.c            | 11 ++++-------
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfsd/nfs2acl.c          |  8 ++++----
 fs/nfsd/nfs3acl.c          |  8 ++++----
 fs/nfsd/nfs3xdr.c          | 46 ++++++++++++++++------------------------------
 fs/nfsd/nfs4xdr.c          |  7 ++++---
 fs/nfsd/nfsd.h             |  3 ++-
 fs/nfsd/nfssvc.c           |  9 +++------
 fs/nfsd/nfsxdr.c           | 22 ++++++++--------------
 fs/nfsd/xdr.h              | 14 +++++++-------
 fs/nfsd/xdr3.h             | 30 +++++++++++++++---------------
 fs/nfsd/xdr4.h             |  2 +-
 include/linux/lockd/xdr.h  |  8 ++++----
 include/linux/lockd/xdr4.h |  8 ++++----
 include/linux/sunrpc/svc.h |  3 ++-
 17 files changed, 85 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 9a82471bda07..b220e1b91726 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -780,7 +780,6 @@ module_exit(exit_nlm);
 static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	const struct svc_procedure *procp = rqstp->rq_procinfo;
-	struct kvec *resv = rqstp->rq_res.head;
 
 	svcxdr_init_decode(rqstp);
 	if (!procp->pc_decode(rqstp, &rqstp->rq_arg_stream))
@@ -793,7 +792,7 @@ static int nlmsvc_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 		return 1;
 
 	svcxdr_init_encode(rqstp);
-	if (!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len))
+	if (!procp->pc_encode(rqstp, &rqstp->rq_res_stream))
 		goto out_encode_err;
 
 	return 1;
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 622c2ca37dbf..2595b4d14cd4 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -314,15 +314,14 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
@@ -330,9 +329,8 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
@@ -340,9 +338,8 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 45551dee26b4..32231c21c22d 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -313,15 +313,14 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
@@ -329,9 +328,8 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_cookie(xdr, &resp->cookie) &&
@@ -339,9 +337,8 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 4c48d85f6517..286d330488a7 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
  * svc_process_common() looks for an XDR encoder to know when
  * not to drop a Reply.
  */
-static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
+static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return xdr_ressize_check(rqstp, p);
+	return 1;
 }
 
 static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index cf6ba5e7937e..25592ba1ed50 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -240,9 +240,9 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct inode *inode;
@@ -280,9 +280,9 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9e9f6afb2e00..e186467b63ec 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -166,9 +166,9 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
 	struct dentry *dentry = resp->fh.fh_dentry;
 	struct kvec *head = rqstp->rq_res.head;
@@ -218,9 +218,9 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p)
+static int
+nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 1f3de46d24d4..63f0be4e44f7 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -813,9 +813,8 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 
 /* GETATTR */
 int
-nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -833,9 +832,8 @@ nfs3svc_encode_getattrres(struct svc_rqst *rqstp, __be32 *p)
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
@@ -843,9 +841,9 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p)
 }
 
 /* LOOKUP */
-int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
+int
+nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -869,9 +867,8 @@ int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, __be32 *p)
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -893,9 +890,8 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p)
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
@@ -921,9 +917,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_readres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
@@ -954,9 +949,8 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_writeres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -982,9 +976,8 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -1008,9 +1001,8 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p)
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_renameres *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
@@ -1020,9 +1012,8 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p)
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_linkres *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_nfsstat3(xdr, resp->status) &&
@@ -1032,9 +1023,8 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p)
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_readdirres *resp = rqstp->rq_resp;
 	struct xdr_buf *dirlist = &resp->dirlist;
 
@@ -1286,9 +1276,8 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -1333,9 +1322,8 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -1376,9 +1364,8 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
@@ -1400,9 +1387,8 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p)
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd3_commitres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c6623080ad98..fc77db35f2e7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5427,10 +5427,11 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-	struct xdr_buf *buf = resp->xdr->buf;
+	struct xdr_buf *buf = xdr->buf;
+	__be32 *p;
 
 	WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
 				 buf->tail[0].iov_len);
@@ -5443,7 +5444,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p)
 
 	*p++ = resp->cstate.status;
 
-	rqstp->rq_next_page = resp->xdr->page_ptr + 1;
+	rqstp->rq_next_page = xdr->page_ptr + 1;
 
 	*p++ = htonl(resp->taglen);
 	memcpy(p, resp->tag, resp->taglen);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index bfcddd4c7534..345f8247d5da 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -80,7 +80,8 @@ struct nfsd_voidargs { };
 struct nfsd_voidres { };
 bool		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr);
-int		nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
+int		nfssvc_encode_voidres(struct svc_rqst *rqstp,
+				      struct xdr_stream *xdr);
 
 /*
  * Function prototypes.
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index beb564e8a3db..ed6a28ecf278 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1004,8 +1004,6 @@ out:
 int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	const struct svc_procedure *proc = rqstp->rq_procinfo;
-	struct kvec *resv = &rqstp->rq_res.head[0];
-	__be32 *p;
 
 	/*
 	 * Give the xdr decoder a chance to change this if it wants
@@ -1030,14 +1028,13 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	 * Need to grab the location to store the status, as
 	 * NFSv4 does some encoding while processing
 	 */
-	p = resv->iov_base + resv->iov_len;
 	svcxdr_init_encode(rqstp);
 
 	*statp = proc->pc_func(rqstp);
 	if (*statp == rpc_drop_reply || test_bit(RQ_DROPME, &rqstp->rq_flags))
 		goto out_update_drop;
 
-	if (!proc->pc_encode(rqstp, p))
+	if (!proc->pc_encode(rqstp, &rqstp->rq_res_stream))
 		goto out_encode_err;
 
 	nfsd_cache_update(rqstp, rqstp->rq_cachetype, statp + 1);
@@ -1078,13 +1075,13 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 /**
  * nfssvc_encode_voidres - Encode void results
  * @rqstp: Server RPC transaction context
- * @p: buffer in which to encode results
+ * @xdr: XDR stream into which to encode results
  *
  * Return values:
  *   %0: Local error while encoding
  *   %1: Encoding was successful
  */
-int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	return 1;
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b5817a41b3de..6aa8138ae2f7 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -415,18 +415,16 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 int
-nfssvc_encode_statres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_stat *resp = rqstp->rq_resp;
 
 	return svcxdr_encode_stat(xdr, resp->status);
 }
 
 int
-nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_attrstat *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
@@ -442,9 +440,8 @@ nfssvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
@@ -462,9 +459,8 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
@@ -484,9 +480,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_readres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
@@ -509,9 +504,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	struct xdr_buf *dirlist = &resp->dirlist;
 
@@ -532,11 +526,11 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p)
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p)
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	struct xdr_stream *xdr = &rqstp->rq_res_stream;
 	struct nfsd_statfsres *resp = rqstp->rq_resp;
 	struct kstatfs	*stat = &resp->stats;
+	__be32 *p;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
 		return 0;
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 31be7d30e64e..1133fb3bf328 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int nfssvc_encode_statres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_attrstatres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *);
-int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *);
+int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
 int nfssvc_encode_entry(void *data, const char *name, int namlen,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index ef72bc4868da..bb017fc7cba1 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int nfs3svc_encode_getattrres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_lookupres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_createres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *);
+int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 6aeb6755278f..3bd553925c35 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -758,7 +758,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
+int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index e1362244f909..d8bd26a5525e 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -106,9 +106,9 @@ bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_res(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_void(struct svc_rqst *, __be32 *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 #endif /* LOCKD_XDR_H */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 376b8f6a3763..50677be3557d 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -32,10 +32,10 @@ bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_res(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_void(struct svc_rqst *, __be32 *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *);
+int	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+int	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 extern const struct rpc_version nlm_version4;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d6109fa7a57b..85694cd0db66 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -457,7 +457,8 @@ struct svc_procedure {
 	bool			(*pc_decode)(struct svc_rqst *rqstp,
 					     struct xdr_stream *xdr);
 	/* XDR encode result: */
-	int			(*pc_encode)(struct svc_rqst *, __be32 *data);
+	int			(*pc_encode)(struct svc_rqst *rqstp,
+					     struct xdr_stream *xdr);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
 	unsigned int		pc_argsize;	/* argument struct size */
-- 
cgit v1.2.3


From 130e2054d4a652a2bd79fb1557ddcd19c053cb37 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 13 Oct 2021 10:41:13 -0400
Subject: SUNRPC: Change return value type of .pc_encode

Returning an undecorated integer is an age-old trope, but it's
not clear (even to previous experts in this code) that the only
valid return values are 1 and 0. These functions do not return
a negative errno, rpc_stat value, or a positive length.

Document there are only two valid return values by having
.pc_encode return only true or false.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/xdr.c             |  18 ++---
 fs/lockd/xdr4.c            |  18 ++---
 fs/nfs/callback_xdr.c      |   4 +-
 fs/nfsd/nfs2acl.c          |   4 +-
 fs/nfsd/nfs3acl.c          |  18 ++---
 fs/nfsd/nfs3xdr.c          | 166 ++++++++++++++++++++++-----------------------
 fs/nfsd/nfs4xdr.c          |   4 +-
 fs/nfsd/nfsd.h             |   2 +-
 fs/nfsd/nfssvc.c           |   8 +--
 fs/nfsd/nfsxdr.c           |  60 ++++++++--------
 fs/nfsd/xdr.h              |  14 ++--
 fs/nfsd/xdr3.h             |  30 ++++----
 fs/nfsd/xdr4.h             |   2 +-
 include/linux/lockd/xdr.h  |   8 +--
 include/linux/lockd/xdr4.h |   8 +--
 include/linux/sunrpc/svc.h |   2 +-
 16 files changed, 183 insertions(+), 183 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 2595b4d14cd4..2fb5748dae0c 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -313,13 +313,13 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  * Encode Reply results
  */
 
-int
+bool
 nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
-int
+bool
 nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
@@ -328,7 +328,7 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_encode_testrply(xdr, resp);
 }
 
-int
+bool
 nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
@@ -337,18 +337,18 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_encode_stats(xdr, resp->status);
 }
 
-int
+bool
 nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_encode_stats(xdr, resp->status))
-		return 0;
+		return false;
 	/* sequence */
 	if (xdr_stream_encode_u32(xdr, 0) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 32231c21c22d..856267c0864b 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -312,13 +312,13 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  * Encode Reply results
  */
 
-int
+bool
 nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
-int
+bool
 nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
@@ -327,7 +327,7 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_encode_testrply(xdr, resp);
 }
 
-int
+bool
 nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
@@ -336,18 +336,18 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		svcxdr_encode_stats(xdr, resp->status);
 }
 
-int
+bool
 nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nlm_res *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_cookie(xdr, &resp->cookie))
-		return 0;
+		return false;
 	if (!svcxdr_encode_stats(xdr, resp->status))
-		return 0;
+		return false;
 	/* sequence */
 	if (xdr_stream_encode_u32(xdr, 0) < 0)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 286d330488a7..a67c41ec545f 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -67,9 +67,9 @@ static __be32 nfs4_callback_null(struct svc_rqst *rqstp)
  * svc_process_common() looks for an XDR encoder to know when
  * not to drop a Reply.
  */
-static int nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+static bool nfs4_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
 static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 25592ba1ed50..367551bddfc6 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -240,7 +240,7 @@ nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETACL */
-static int
+static bool
 nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
@@ -280,7 +280,7 @@ nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 }
 
 /* ACCESS */
-static int
+static bool
 nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index e186467b63ec..35b2ebda14da 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -166,7 +166,7 @@ nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETACL */
-static int
+static bool
 nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_getaclres *resp = rqstp->rq_resp;
@@ -178,14 +178,14 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	int w;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		inode = d_inode(dentry);
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->mask) < 0)
-			return 0;
+			return false;
 
 		base = (char *)xdr->p - (char *)head->iov_base;
 
@@ -194,7 +194,7 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 			(resp->mask & NFS_DFACL) ? resp->acl_default : NULL);
 		while (w > 0) {
 			if (!*(rqstp->rq_next_page++))
-				return 0;
+				return false;
 			w -= PAGE_SIZE;
 		}
 
@@ -207,18 +207,18 @@ nfs3svc_encode_getaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 					  resp->mask & NFS_DFACL,
 					  NFS_ACL_DEFAULT);
 		if (n <= 0)
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* SETACL */
-static int
+static bool
 nfs3svc_encode_setaclres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 63f0be4e44f7..c3ac1b6aa3aa 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -812,26 +812,26 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  */
 
 /* GETATTR */
-int
+bool
 nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		lease_get_mtime(d_inode(resp->fh.fh_dentry), &resp->stat.mtime);
 		if (!svcxdr_encode_fattr3(rqstp, xdr, &resp->fh, &resp->stat))
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
 /* SETATTR, REMOVE, RMDIR */
-int
+bool
 nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_attrstat *resp = rqstp->rq_resp;
@@ -841,166 +841,166 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 }
 
 /* LOOKUP */
-int
+bool
 nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_nfs_fh3(xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->dirfh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* ACCESS */
-int
+bool
 nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_accessres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->access) < 0)
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* READLINK */
-int
+bool
 nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readlinkres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
-			return 0;
+			return false;
 		xdr_write_pages(xdr, resp->pages, 0, resp->len);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* READ */
-int
+bool
 nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return 0;
+			return false;
 		if (xdr_stream_encode_bool(xdr, resp->eof) < 0)
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return 0;
+			return false;
 		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
 				resp->count);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* WRITE */
-int
+bool
 nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_writeres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->committed) < 0)
-			return 0;
+			return false;
 		if (!svcxdr_encode_writeverf3(xdr, resp->verf))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
-int
+bool
 nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_fh3(xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->dirfh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* RENAME */
-int
+bool
 nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_renameres *resp = rqstp->rq_resp;
@@ -1011,7 +1011,7 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 }
 
 /* LINK */
-int
+bool
 nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_linkres *resp = rqstp->rq_resp;
@@ -1022,33 +1022,33 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 }
 
 /* READDIR */
-int
+bool
 nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_readdirres *resp = rqstp->rq_resp;
 	struct xdr_buf *dirlist = &resp->dirlist;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_cookieverf3(xdr, resp->verf))
-			return 0;
+			return false;
 		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
 		/* no more entries */
 		if (xdr_stream_encode_item_absent(xdr) < 0)
-			return 0;
+			return false;
 		if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static __be32
@@ -1275,26 +1275,26 @@ svcxdr_encode_fsstat3resok(struct xdr_stream *xdr,
 }
 
 /* FSSTAT */
-int
+bool
 nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_fsstatres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_fsstat3resok(xdr, resp))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static bool
@@ -1321,26 +1321,26 @@ svcxdr_encode_fsinfo3resok(struct xdr_stream *xdr,
 }
 
 /* FSINFO */
-int
+bool
 nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_fsinfores *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_fsinfo3resok(xdr, resp))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 static bool
@@ -1363,49 +1363,49 @@ svcxdr_encode_pathconf3resok(struct xdr_stream *xdr,
 }
 
 /* PATHCONF */
-int
+bool
 nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_pathconfres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_pathconf3resok(xdr, resp))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_post_op_attr(rqstp, xdr, &nfs3svc_null_fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /* COMMIT */
-int
+bool
 nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd3_commitres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_nfsstat3(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_writeverf3(xdr, resp->verf))
-			return 0;
+			return false;
 		break;
 	default:
 		if (!svcxdr_encode_wcc_data(rqstp, xdr, &resp->fh))
-			return 0;
+			return false;
 	}
 
-	return 1;
+	return true;
 }
 
 /*
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index fc77db35f2e7..9b609aac47e1 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5426,7 +5426,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return nfsd4_decode_compound(args);
 }
 
-int
+bool
 nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd4_compoundres *resp = rqstp->rq_resp;
@@ -5452,5 +5452,5 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	*p++ = htonl(resp->opcnt);
 
 	nfsd4_sequence_done(resp);
-	return 1;
+	return true;
 }
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 345f8247d5da..498e5a489826 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -80,7 +80,7 @@ struct nfsd_voidargs { };
 struct nfsd_voidres { };
 bool		nfssvc_decode_voidarg(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr);
-int		nfssvc_encode_voidres(struct svc_rqst *rqstp,
+bool		nfssvc_encode_voidres(struct svc_rqst *rqstp,
 				      struct xdr_stream *xdr);
 
 /*
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ed6a28ecf278..362e819ff06a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1078,12 +1078,12 @@ bool nfssvc_decode_voidarg(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  * @xdr: XDR stream into which to encode results
  *
  * Return values:
- *   %0: Local error while encoding
- *   %1: Encoding was successful
+ *   %false: Local error while encoding
+ *   %true: Encoding was successful
  */
-int nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
+bool nfssvc_encode_voidres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
-	return 1;
+	return true;
 }
 
 int nfsd_pool_stats_open(struct inode *inode, struct file *file)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6aa8138ae2f7..aba8520b4b8b 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -414,7 +414,7 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr)
  * XDR encode functions
  */
 
-int
+bool
 nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_stat *resp = rqstp->rq_resp;
@@ -422,110 +422,110 @@ nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	return svcxdr_encode_stat(xdr, resp->status);
 }
 
-int
+bool
 nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_attrstat *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_diropres *resp = rqstp->rq_resp;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_fhandle(xdr, &resp->fh))
-			return 0;
+			return false;
 		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_readlinkres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (xdr_stream_encode_u32(xdr, resp->len) < 0)
-			return 0;
+			return false;
 		xdr_write_pages(xdr, &resp->page, 0, resp->len);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->len) < 0)
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_readres *resp = rqstp->rq_resp;
 	struct kvec *head = rqstp->rq_res.head;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		if (!svcxdr_encode_fattr(rqstp, xdr, &resp->fh, &resp->stat))
-			return 0;
+			return false;
 		if (xdr_stream_encode_u32(xdr, resp->count) < 0)
-			return 0;
+			return false;
 		xdr_write_pages(xdr, resp->pages, rqstp->rq_res.page_base,
 				resp->count);
 		if (svc_encode_result_payload(rqstp, head->iov_len, resp->count) < 0)
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_readdirres *resp = rqstp->rq_resp;
 	struct xdr_buf *dirlist = &resp->dirlist;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		xdr_write_pages(xdr, dirlist->pages, 0, dirlist->len);
 		/* no more entries */
 		if (xdr_stream_encode_item_absent(xdr) < 0)
-			return 0;
+			return false;
 		if (xdr_stream_encode_bool(xdr, resp->common.err == nfserr_eof) < 0)
-			return 0;
+			return false;
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
-int
+bool
 nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 {
 	struct nfsd_statfsres *resp = rqstp->rq_resp;
@@ -533,12 +533,12 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 	__be32 *p;
 
 	if (!svcxdr_encode_stat(xdr, resp->status))
-		return 0;
+		return false;
 	switch (resp->status) {
 	case nfs_ok:
 		p = xdr_reserve_space(xdr, XDR_UNIT * 5);
 		if (!p)
-			return 0;
+			return false;
 		*p++ = cpu_to_be32(NFSSVC_MAXBLKSIZE_V2);
 		*p++ = cpu_to_be32(stat->f_bsize);
 		*p++ = cpu_to_be32(stat->f_blocks);
@@ -547,7 +547,7 @@ nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr)
 		break;
 	}
 
-	return 1;
+	return true;
 }
 
 /**
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 1133fb3bf328..528fb299430e 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -152,13 +152,13 @@ bool nfssvc_decode_linkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfssvc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_statres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_attrstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_diropres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_statfsres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfssvc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 void nfssvc_encode_nfscookie(struct nfsd_readdirres *resp, u32 offset);
 int nfssvc_encode_entry(void *data, const char *name, int namlen,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index bb017fc7cba1..03fe4e21306c 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -281,21 +281,21 @@ bool nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool nfs3svc_decode_commitargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_getattrres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_wccstat(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_lookupres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_accessres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_writeres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_createres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_renameres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_linkres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_readdirres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs3svc_encode_commitres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 void nfs3svc_release_fhandle(struct svc_rqst *);
 void nfs3svc_release_fhandle2(struct svc_rqst *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 3bd553925c35..846ab6df9d48 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -758,7 +758,7 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 
 bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
 bool nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool nfs4svc_encode_compoundres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index d8bd26a5525e..398f70093cd3 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -106,9 +106,9 @@ bool	nlmsvc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlmsvc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlmsvc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlmsvc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 #endif /* LOCKD_XDR_H */
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 50677be3557d..9a6b55da8fd6 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -32,10 +32,10 @@ bool	nlm4svc_decode_reboot(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlm4svc_decode_shareargs(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 bool	nlm4svc_decode_notify(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
-int	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
-int	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_testres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_res(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr);
+bool	nlm4svc_encode_shareres(struct svc_rqst *rqstp, struct xdr_stream *xdr);
 
 extern const struct rpc_version nlm_version4;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 85694cd0db66..0ae28ae6caf2 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -457,7 +457,7 @@ struct svc_procedure {
 	bool			(*pc_decode)(struct svc_rqst *rqstp,
 					     struct xdr_stream *xdr);
 	/* XDR encode result: */
-	int			(*pc_encode)(struct svc_rqst *rqstp,
+	bool			(*pc_encode)(struct svc_rqst *rqstp,
 					     struct xdr_stream *xdr);
 	/* XDR free result: */
 	void			(*pc_release)(struct svc_rqst *);
-- 
cgit v1.2.3


From 40af35fdf79c77e19c597e47cc8fe9a8e200de30 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 12 Oct 2021 09:06:32 -0700
Subject: netdevice: demote the type of some dev_addr_set() helpers

__dev_addr_set() and dev_addr_mod() and pretty low level,
let the arguments be void, there's no chance for confusion
in callers converted to use them. Keep u8 in dev_addr_set()
because some of the callers are converted from a loop
and we want to make sure assignments are not from an array
of a different type.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0723c1314ea2..f33af341bfb2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4643,7 +4643,7 @@ void __hw_addr_init(struct netdev_hw_addr_list *list);
 
 /* Functions used for device addresses handling */
 static inline void
-__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len)
+__dev_addr_set(struct net_device *dev, const void *addr, size_t len)
 {
 	memcpy(dev->dev_addr, addr, len);
 }
@@ -4655,7 +4655,7 @@ static inline void dev_addr_set(struct net_device *dev, const u8 *addr)
 
 static inline void
 dev_addr_mod(struct net_device *dev, unsigned int offset,
-	     const u8 *addr, size_t len)
+	     const void *addr, size_t len)
 {
 	memcpy(&dev->dev_addr[offset], addr, len);
 }
-- 
cgit v1.2.3


From 06de2cd788bfa3b51137e9bd471d68029ab68103 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 12 Oct 2021 17:39:27 +0200
Subject: gpio: max730x: Make __max730x_remove() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An spi or i2c remove callback is only called for devices that probed
successfully. In this case this implies that __max730x_probe() set a
non-NULL driver data. So the check ts == NULL is never true. With this
check dropped, __max730x_remove() returns zero unconditionally. Make it
return void instead which makes it easier to see in the callers that
there is no error to handle.

Also the return value of i2c and spi remove callbacks is ignored anyway.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
---
 drivers/gpio/gpio-max7300.c | 4 +++-
 drivers/gpio/gpio-max7301.c | 4 +++-
 drivers/gpio/gpio-max730x.c | 6 +-----
 include/linux/spi/max7301.h | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c
index 19cc2ed6a3f5..b2b547dd6e84 100644
--- a/drivers/gpio/gpio-max7300.c
+++ b/drivers/gpio/gpio-max7300.c
@@ -50,7 +50,9 @@ static int max7300_probe(struct i2c_client *client,
 
 static int max7300_remove(struct i2c_client *client)
 {
-	return __max730x_remove(&client->dev);
+	__max730x_remove(&client->dev);
+
+	return 0;
 }
 
 static const struct i2c_device_id max7300_id[] = {
diff --git a/drivers/gpio/gpio-max7301.c b/drivers/gpio/gpio-max7301.c
index 1307c243b4e9..5862d73bf325 100644
--- a/drivers/gpio/gpio-max7301.c
+++ b/drivers/gpio/gpio-max7301.c
@@ -66,7 +66,9 @@ static int max7301_probe(struct spi_device *spi)
 
 static int max7301_remove(struct spi_device *spi)
 {
-	return __max730x_remove(&spi->dev);
+	__max730x_remove(&spi->dev);
+
+	return 0;
 }
 
 static const struct spi_device_id max7301_id[] = {
diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c
index b8c1fe20f49a..bb5cf14ae4c8 100644
--- a/drivers/gpio/gpio-max730x.c
+++ b/drivers/gpio/gpio-max730x.c
@@ -220,18 +220,14 @@ exit_destroy:
 }
 EXPORT_SYMBOL_GPL(__max730x_probe);
 
-int __max730x_remove(struct device *dev)
+void __max730x_remove(struct device *dev)
 {
 	struct max7301 *ts = dev_get_drvdata(dev);
 
-	if (ts == NULL)
-		return -ENODEV;
-
 	/* Power down the chip and disable IRQ output */
 	ts->write(dev, 0x04, 0x00);
 	gpiochip_remove(&ts->chip);
 	mutex_destroy(&ts->lock);
-	return 0;
 }
 EXPORT_SYMBOL_GPL(__max730x_remove);
 
diff --git a/include/linux/spi/max7301.h b/include/linux/spi/max7301.h
index 21449067aedb..e392c53758bc 100644
--- a/include/linux/spi/max7301.h
+++ b/include/linux/spi/max7301.h
@@ -31,6 +31,6 @@ struct max7301_platform_data {
 	u32		input_pullup_active;
 };
 
-extern int __max730x_remove(struct device *dev);
+extern void __max730x_remove(struct device *dev);
 extern int __max730x_probe(struct max7301 *ts);
 #endif
-- 
cgit v1.2.3


From 6312d52838b21f5c4a5afa1269a00df4364fd354 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Wed, 13 Oct 2021 15:57:43 +0200
Subject: marvell: octeontx2: build error: unknown type name 'u64'

Building an allmodconfig kernel arm64 kernel, the following build error
shows up:

In file included from drivers/crypto/marvell/octeontx2/cn10k_cpt.c:4:
include/linux/soc/marvell/octeontx2/asm.h:38:15: error: unknown type name 'u64'
   38 | static inline u64 otx2_atomic64_fetch_add(u64 incr, u64 *ptr)
      |               ^~~

Include linux/types.h in asm.h so the compiler knows what the type
'u64' are.

Fixes: af3826db74d1 ("octeontx2-pf: Use hardware register for CQE count")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Link: https://lore.kernel.org/r/20211013135743.3826594-1-anders.roxell@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/soc/marvell/octeontx2/asm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h
index 0f79fd7f81a1..d683251a0b40 100644
--- a/include/linux/soc/marvell/octeontx2/asm.h
+++ b/include/linux/soc/marvell/octeontx2/asm.h
@@ -5,6 +5,7 @@
 #ifndef __SOC_OTX2_ASM_H
 #define __SOC_OTX2_ASM_H
 
+#include <linux/types.h>
 #if defined(CONFIG_ARM64)
 /*
  * otx2_lmt_flush is used for LMT store operation.
-- 
cgit v1.2.3


From 78b727d0281507fabf954573420790b21e34aefe Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Fri, 17 Sep 2021 11:04:34 +0800
Subject: clk: qcom: smd-rpm: Add QCM2290 RPM clock support

Add support for RPM-managed clocks on the QCM2290 platform.

Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Link: https://lore.kernel.org/r/20210917030434.19859-4-shawn.guo@linaro.org
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/qcom/clk-smd-rpm.c         | 59 ++++++++++++++++++++++++++++++++++
 include/dt-bindings/clock/qcom,rpmcc.h |  6 ++++
 include/linux/soc/qcom/smd-rpm.h       |  2 ++
 3 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c
index 27fce64c2003..5776d85a1e5c 100644
--- a/drivers/clk/qcom/clk-smd-rpm.c
+++ b/drivers/clk/qcom/clk-smd-rpm.c
@@ -1077,6 +1077,64 @@ static const struct rpm_smd_clk_desc rpm_clk_sm6115 = {
 	.num_clks = ARRAY_SIZE(sm6115_clks),
 };
 
+/* QCM2290 */
+DEFINE_CLK_SMD_RPM_XO_BUFFER(qcm2290, ln_bb_clk2, ln_bb_clk2_a, 0x2, 19200000);
+DEFINE_CLK_SMD_RPM_XO_BUFFER(qcm2290, rf_clk3, rf_clk3_a, 6, 38400000);
+
+DEFINE_CLK_SMD_RPM(qcm2290, qpic_clk, qpic_a_clk, QCOM_SMD_RPM_QPIC_CLK, 0);
+DEFINE_CLK_SMD_RPM(qcm2290, hwkm_clk, hwkm_a_clk, QCOM_SMD_RPM_HWKM_CLK, 0);
+DEFINE_CLK_SMD_RPM(qcm2290, pka_clk, pka_a_clk, QCOM_SMD_RPM_PKA_CLK, 0);
+DEFINE_CLK_SMD_RPM(qcm2290, cpuss_gnoc_clk, cpuss_gnoc_a_clk,
+		   QCOM_SMD_RPM_MEM_CLK, 1);
+DEFINE_CLK_SMD_RPM(qcm2290, bimc_gpu_clk, bimc_gpu_a_clk,
+		   QCOM_SMD_RPM_MEM_CLK, 2);
+
+static struct clk_smd_rpm *qcm2290_clks[] = {
+	[RPM_SMD_XO_CLK_SRC] = &sdm660_bi_tcxo,
+	[RPM_SMD_XO_A_CLK_SRC] = &sdm660_bi_tcxo_a,
+	[RPM_SMD_SNOC_CLK] = &sm6125_snoc_clk,
+	[RPM_SMD_SNOC_A_CLK] = &sm6125_snoc_a_clk,
+	[RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk,
+	[RPM_SMD_BIMC_A_CLK] = &msm8916_bimc_a_clk,
+	[RPM_SMD_QDSS_CLK] = &sm6125_qdss_clk,
+	[RPM_SMD_QDSS_A_CLK] = &sm6125_qdss_a_clk,
+	[RPM_SMD_LN_BB_CLK2] = &qcm2290_ln_bb_clk2,
+	[RPM_SMD_LN_BB_CLK2_A] = &qcm2290_ln_bb_clk2_a,
+	[RPM_SMD_RF_CLK3] = &qcm2290_rf_clk3,
+	[RPM_SMD_RF_CLK3_A] = &qcm2290_rf_clk3_a,
+	[RPM_SMD_CNOC_CLK] = &sm6125_cnoc_clk,
+	[RPM_SMD_CNOC_A_CLK] = &sm6125_cnoc_a_clk,
+	[RPM_SMD_IPA_CLK] = &msm8976_ipa_clk,
+	[RPM_SMD_IPA_A_CLK] = &msm8976_ipa_a_clk,
+	[RPM_SMD_QUP_CLK] = &sm6125_qup_clk,
+	[RPM_SMD_QUP_A_CLK] = &sm6125_qup_a_clk,
+	[RPM_SMD_MMRT_CLK] = &sm6125_mmrt_clk,
+	[RPM_SMD_MMRT_A_CLK] = &sm6125_mmrt_a_clk,
+	[RPM_SMD_MMNRT_CLK] = &sm6125_mmnrt_clk,
+	[RPM_SMD_MMNRT_A_CLK] = &sm6125_mmnrt_a_clk,
+	[RPM_SMD_SNOC_PERIPH_CLK] = &sm6125_snoc_periph_clk,
+	[RPM_SMD_SNOC_PERIPH_A_CLK] = &sm6125_snoc_periph_a_clk,
+	[RPM_SMD_SNOC_LPASS_CLK] = &sm6125_snoc_lpass_clk,
+	[RPM_SMD_SNOC_LPASS_A_CLK] = &sm6125_snoc_lpass_a_clk,
+	[RPM_SMD_CE1_CLK] = &msm8992_ce1_clk,
+	[RPM_SMD_CE1_A_CLK] = &msm8992_ce1_a_clk,
+	[RPM_SMD_QPIC_CLK] = &qcm2290_qpic_clk,
+	[RPM_SMD_QPIC_CLK_A] = &qcm2290_qpic_a_clk,
+	[RPM_SMD_HWKM_CLK] = &qcm2290_hwkm_clk,
+	[RPM_SMD_HWKM_A_CLK] = &qcm2290_hwkm_a_clk,
+	[RPM_SMD_PKA_CLK] = &qcm2290_pka_clk,
+	[RPM_SMD_PKA_A_CLK] = &qcm2290_pka_a_clk,
+	[RPM_SMD_BIMC_GPU_CLK] = &qcm2290_bimc_gpu_clk,
+	[RPM_SMD_BIMC_GPU_A_CLK] = &qcm2290_bimc_gpu_a_clk,
+	[RPM_SMD_CPUSS_GNOC_CLK] = &qcm2290_cpuss_gnoc_clk,
+	[RPM_SMD_CPUSS_GNOC_A_CLK] = &qcm2290_cpuss_gnoc_a_clk,
+};
+
+static const struct rpm_smd_clk_desc rpm_clk_qcm2290 = {
+	.clks = qcm2290_clks,
+	.num_clks = ARRAY_SIZE(qcm2290_clks),
+};
+
 static const struct of_device_id rpm_smd_clk_match_table[] = {
 	{ .compatible = "qcom,rpmcc-mdm9607", .data = &rpm_clk_mdm9607 },
 	{ .compatible = "qcom,rpmcc-msm8226", .data = &rpm_clk_msm8974 },
@@ -1089,6 +1147,7 @@ static const struct of_device_id rpm_smd_clk_match_table[] = {
 	{ .compatible = "qcom,rpmcc-msm8994", .data = &rpm_clk_msm8994 },
 	{ .compatible = "qcom,rpmcc-msm8996", .data = &rpm_clk_msm8996 },
 	{ .compatible = "qcom,rpmcc-msm8998", .data = &rpm_clk_msm8998 },
+	{ .compatible = "qcom,rpmcc-qcm2290", .data = &rpm_clk_qcm2290 },
 	{ .compatible = "qcom,rpmcc-qcs404",  .data = &rpm_clk_qcs404  },
 	{ .compatible = "qcom,rpmcc-sdm660",  .data = &rpm_clk_sdm660  },
 	{ .compatible = "qcom,rpmcc-sm6115",  .data = &rpm_clk_sm6115  },
diff --git a/include/dt-bindings/clock/qcom,rpmcc.h b/include/dt-bindings/clock/qcom,rpmcc.h
index aa834d516234..fb624ff39273 100644
--- a/include/dt-bindings/clock/qcom,rpmcc.h
+++ b/include/dt-bindings/clock/qcom,rpmcc.h
@@ -159,5 +159,11 @@
 #define RPM_SMD_SNOC_PERIPH_A_CLK		113
 #define RPM_SMD_SNOC_LPASS_CLK			114
 #define RPM_SMD_SNOC_LPASS_A_CLK		115
+#define RPM_SMD_HWKM_CLK			116
+#define RPM_SMD_HWKM_A_CLK			117
+#define RPM_SMD_PKA_CLK				118
+#define RPM_SMD_PKA_A_CLK			119
+#define RPM_SMD_CPUSS_GNOC_CLK			120
+#define RPM_SMD_CPUSS_GNOC_A_CLK		121
 
 #endif
diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h
index 60e66fc9b6bf..860dd8cdf9f3 100644
--- a/include/linux/soc/qcom/smd-rpm.h
+++ b/include/linux/soc/qcom/smd-rpm.h
@@ -38,6 +38,8 @@ struct qcom_smd_rpm;
 #define QCOM_SMD_RPM_IPA_CLK	0x617069
 #define QCOM_SMD_RPM_CE_CLK	0x6563
 #define QCOM_SMD_RPM_AGGR_CLK	0x72676761
+#define QCOM_SMD_RPM_HWKM_CLK	0x6d6b7768
+#define QCOM_SMD_RPM_PKA_CLK	0x616b70
 
 int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm,
 		       int state,
-- 
cgit v1.2.3


From 9974cb5c879048f5144d0660c4932d98176213c4 Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Wed, 13 Oct 2021 17:47:02 +0800
Subject: net: delete redundant function declaration

The implement of function netdev_all_upper_get_next_dev_rcu has been
removed in:
	commit f1170fd462c6 ("net: Remove all_adj_list and its references")
so delete redundant declaration in header file.

Fixes: f1170fd462c6 ("net: Remove all_adj_list and its references")
Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Link: https://lore.kernel.org/r/20211013094702.3931071-1-chenwandun@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f33af341bfb2..173984414f38 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4801,8 +4801,6 @@ struct netdev_nested_priv {
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter);
-struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
-						     struct list_head **iter);
 
 #ifdef CONFIG_LOCKDEP
 static LIST_HEAD(net_unlink_list);
-- 
cgit v1.2.3


From f0ada6da3a0d69682e21f1783d02676e0fbf1bc1 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 13 Oct 2021 17:37:07 +0300
Subject: device property: Add missed header in fwnode.h

When adding some stuff to the header file we must not rely on
implicit dependencies that are happen by luck or bugs in other
headers. Hence fwnode.h needs to use bits.h directly.

Fixes: c2c724c868c4 ("driver core: Add fw_devlink_parse_fwtree()")
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211013143707.80222-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/fwnode.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 9f4ad719bfe3..3a532ba66f6c 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/bits.h>
 #include <linux/err.h>
 
 struct fwnode_operations;
-- 
cgit v1.2.3


From 5de62ea84abd732ded7c5569426fd71c0420f83e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 21 Sep 2021 22:16:02 +0200
Subject: sched,livepatch: Use wake_up_if_idle()

Make sure to prod idle CPUs so they call klp_update_patch_state().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Acked-by: Miroslav Benes <mbenes@suse.cz>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Tested-by: Petr Mladek <pmladek@suse.com>
Tested-by: Vasily Gorbik <gor@linux.ibm.com> # on s390
Link: https://lkml.kernel.org/r/20210929151723.162004989@infradead.org
---
 include/linux/sched/idle.h    | 4 ++++
 kernel/livepatch/transition.c | 5 ++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index 22873d276be6..d73d314d59c6 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -11,7 +11,11 @@ enum cpu_idle_type {
 	CPU_MAX_IDLE_TYPES
 };
 
+#ifdef CONFIG_SMP
 extern void wake_up_if_idle(int cpu);
+#else
+static inline void wake_up_if_idle(int cpu) { }
+#endif
 
 /*
  * Idle thread specific functions to determine the need_resched
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 75251e9dbc3c..5683ac0d2566 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -413,8 +413,11 @@ void klp_try_complete_transition(void)
 	for_each_possible_cpu(cpu) {
 		task = idle_task(cpu);
 		if (cpu_online(cpu)) {
-			if (!klp_try_switch_task(task))
+			if (!klp_try_switch_task(task)) {
 				complete = false;
+				/* Make idle task go through the main loop. */
+				wake_up_if_idle(cpu);
+			}
 		} else if (task->patch_state != klp_target_state) {
 			/* offline idle tasks can be switched immediately */
 			clear_tsk_thread_flag(task, TIF_PATCH_PENDING);
-- 
cgit v1.2.3


From 4ef0c5c6b5ba1f38f0ea1cedad0cad722f00c14a Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Wed, 15 Sep 2021 14:40:30 +0800
Subject: kernel/sched: Fix sched_fork() access an invalid sched_task_group

There is a small race between copy_process() and sched_fork()
where child->sched_task_group point to an already freed pointer.

	parent doing fork()      | someone moving the parent
				 | to another cgroup
  -------------------------------+-------------------------------
  copy_process()
      + dup_task_struct()<1>
				  parent move to another cgroup,
				  and free the old cgroup. <2>
      + sched_fork()
	+ __set_task_cpu()<3>
	+ task_fork_fair()
	  + sched_slice()<4>

In the worst case, this bug can lead to "use-after-free" and
cause panic as shown above:

  (1) parent copy its sched_task_group to child at <1>;

  (2) someone move the parent to another cgroup and free the old
      cgroup at <2>;

  (3) the sched_task_group and cfs_rq that belong to the old cgroup
      will be accessed at <3> and <4>, which cause a panic:

  [] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
  [] PGD 8000001fa0a86067 P4D 8000001fa0a86067 PUD 2029955067 PMD 0
  [] Oops: 0000 [#1] SMP PTI
  [] CPU: 7 PID: 648398 Comm: ebizzy Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0.x86_64+ #1
  [] RIP: 0010:sched_slice+0x84/0xc0

  [] Call Trace:
  []  task_fork_fair+0x81/0x120
  []  sched_fork+0x132/0x240
  []  copy_process.part.5+0x675/0x20e0
  []  ? __handle_mm_fault+0x63f/0x690
  []  _do_fork+0xcd/0x3b0
  []  do_syscall_64+0x5d/0x1d0
  []  entry_SYSCALL_64_after_hwframe+0x65/0xca
  [] RIP: 0033:0x7f04418cd7e1

Between cgroup_can_fork() and cgroup_post_fork(), the cgroup
membership and thus sched_task_group can't change. So update child's
sched_task_group at sched_post_fork() and move task_fork() and
__set_task_cpu() (where accees the sched_task_group) from sched_fork()
to sched_post_fork().

Fixes: 8323f26ce342 ("sched: Fix race in task_group")
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lkml.kernel.org/r/20210915064030.2231-1-zhangqiao22@huawei.com
---
 include/linux/sched/task.h |  3 ++-
 kernel/fork.c              |  2 +-
 kernel/sched/core.c        | 43 ++++++++++++++++++++++---------------------
 3 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ef02be869cf2..ba88a6987400 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -54,7 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_post_fork(struct task_struct *p);
+extern void sched_post_fork(struct task_struct *p,
+			    struct kernel_clone_args *kargs);
 extern void sched_dead(struct task_struct *p);
 
 void __noreturn do_task_dead(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..0e4251dc5436 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2405,7 +2405,7 @@ static __latent_entropy struct task_struct *copy_process(
 	write_unlock_irq(&tasklist_lock);
 
 	proc_fork_connector(p);
-	sched_post_fork(p);
+	sched_post_fork(p, args);
 	cgroup_post_fork(p, args);
 	perf_event_fork(p);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3b55ef99c03f..935c2da00339 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4343,8 +4343,6 @@ int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
  */
 int sched_fork(unsigned long clone_flags, struct task_struct *p)
 {
-	unsigned long flags;
-
 	__sched_fork(clone_flags, p);
 	/*
 	 * We mark the process as NEW here. This guarantees that
@@ -4390,24 +4388,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 
 	init_entity_runnable_average(&p->se);
 
-	/*
-	 * The child is not yet in the pid-hash so no cgroup attach races,
-	 * and the cgroup is pinned to this child due to cgroup_fork()
-	 * is ran before sched_fork().
-	 *
-	 * Silence PROVE_RCU.
-	 */
-	raw_spin_lock_irqsave(&p->pi_lock, flags);
-	rseq_migrate(p);
-	/*
-	 * We're setting the CPU for the first time, we don't migrate,
-	 * so use __set_task_cpu().
-	 */
-	__set_task_cpu(p, smp_processor_id());
-	if (p->sched_class->task_fork)
-		p->sched_class->task_fork(p);
-	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-
 #ifdef CONFIG_SCHED_INFO
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -4423,8 +4403,29 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	return 0;
 }
 
-void sched_post_fork(struct task_struct *p)
+void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
 {
+	unsigned long flags;
+#ifdef CONFIG_CGROUP_SCHED
+	struct task_group *tg;
+#endif
+
+	raw_spin_lock_irqsave(&p->pi_lock, flags);
+#ifdef CONFIG_CGROUP_SCHED
+	tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
+			  struct task_group, css);
+	p->sched_task_group = autogroup_task_group(p, tg);
+#endif
+	rseq_migrate(p);
+	/*
+	 * We're setting the CPU for the first time, we don't migrate,
+	 * so use __set_task_cpu().
+	 */
+	__set_task_cpu(p, smp_processor_id());
+	if (p->sched_class->task_fork)
+		p->sched_class->task_fork(p);
+	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
 	uclamp_post_fork(p);
 }
 
-- 
cgit v1.2.3


From 804bccba71a57e7e5deb507a4c8ebbab730909c0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 23 Sep 2021 19:54:50 -0700
Subject: sched: Fill unconditional hole induced by sched_entity

With struct sched_entity before the other sched entities, its alignment
won't induce a struct hole. This saves 64 bytes in defconfig task_struct:

Before:
	...
        unsigned int               rt_priority;          /*   120     4 */

        /* XXX 4 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
        const struct sched_class  * sched_class;         /*   128     8 */

        /* XXX 56 bytes hole, try to pack */

        /* --- cacheline 3 boundary (192 bytes) --- */
        struct sched_entity        se __attribute__((__aligned__(64))); /*   192   448 */
        /* --- cacheline 10 boundary (640 bytes) --- */
        struct sched_rt_entity     rt;                   /*   640    48 */
        struct sched_dl_entity     dl __attribute__((__aligned__(8))); /*   688   224 */
        /* --- cacheline 14 boundary (896 bytes) was 16 bytes ago --- */

After:
	...
        unsigned int               rt_priority;          /*   120     4 */

        /* XXX 4 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
        struct sched_entity        se __attribute__((__aligned__(64))); /*   128   448 */
        /* --- cacheline 9 boundary (576 bytes) --- */
        struct sched_rt_entity     rt;                   /*   576    48 */
        struct sched_dl_entity     dl __attribute__((__aligned__(8))); /*   624   224 */
        /* --- cacheline 13 boundary (832 bytes) was 16 bytes ago --- */

Summary diff:
-	/* size: 7040, cachelines: 110, members: 188 */
+	/* size: 6976, cachelines: 109, members: 188 */

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210924025450.4138503-1-keescook@chromium.org
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 193e16e2d0e4..343603f77f8b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -775,10 +775,10 @@ struct task_struct {
 	int				normal_prio;
 	unsigned int			rt_priority;
 
-	const struct sched_class	*sched_class;
 	struct sched_entity		se;
 	struct sched_rt_entity		rt;
 	struct sched_dl_entity		dl;
+	const struct sched_class	*sched_class;
 
 #ifdef CONFIG_SCHED_CORE
 	struct rb_node			core_node;
-- 
cgit v1.2.3


From e9bdcdbf6936dd1fbf419e00222dc9038b7812c2 Mon Sep 17 00:00:00 2001
From: Christian Brauner <christian.brauner@ubuntu.com>
Date: Mon, 11 Oct 2021 15:32:44 +0200
Subject: pid: add pidfd_get_task() helper

The number of system calls making use of pidfds is constantly
increasing. Some of those new system calls duplicate the code to turn a
pidfd into task_struct it refers to. Give them a simple helper for this.

Link: https://lore.kernel.org/r/20211004125050.1153693-2-christian.brauner@ubuntu.com
Link: https://lore.kernel.org/r/20211011133245.1703103-2-brauner@kernel.org
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Matthew Bobrowski <repnop@google.com>
Cc: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Reviewed-by: Matthew Bobrowski <repnop@google.com>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
---
 include/linux/pid.h |  1 +
 kernel/pid.c        | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index af308e15f174..343abf22092e 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -78,6 +78,7 @@ struct file;
 
 extern struct pid *pidfd_pid(const struct file *file);
 struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
+struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
 int pidfd_create(struct pid *pid, unsigned int flags);
 
 static inline struct pid *get_pid(struct pid *pid)
diff --git a/kernel/pid.c b/kernel/pid.c
index efe87db44683..2fc0a16ec77b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -539,6 +539,42 @@ struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
 	return pid;
 }
 
+/**
+ * pidfd_get_task() - Get the task associated with a pidfd
+ *
+ * @pidfd: pidfd for which to get the task
+ * @flags: flags associated with this pidfd
+ *
+ * Return the task associated with @pidfd. The function takes a reference on
+ * the returned task. The caller is responsible for releasing that reference.
+ *
+ * Currently, the process identified by @pidfd is always a thread-group leader.
+ * This restriction currently exists for all aspects of pidfds including pidfd
+ * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
+ * (only supports thread group leaders).
+ *
+ * Return: On success, the task_struct associated with the pidfd.
+ *	   On error, a negative errno number will be returned.
+ */
+struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
+{
+	unsigned int f_flags;
+	struct pid *pid;
+	struct task_struct *task;
+
+	pid = pidfd_get_pid(pidfd, &f_flags);
+	if (IS_ERR(pid))
+		return ERR_CAST(pid);
+
+	task = get_pid_task(pid, PIDTYPE_TGID);
+	put_pid(pid);
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	*flags = f_flags;
+	return task;
+}
+
 /**
  * pidfd_create() - Create a new pid file descriptor.
  *
-- 
cgit v1.2.3


From 6098475d4cb48d821bdf453c61118c56e26294f0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 8 Oct 2021 14:31:57 +0100
Subject: spi: Fix deadlock when adding SPI controllers on SPI buses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we have a global spi_add_lock which we take when adding new
devices so that we can check that we're not trying to reuse a chip
select that's already controlled.  This means that if the SPI device is
itself a SPI controller and triggers the instantiation of further SPI
devices we trigger a deadlock as we try to register and instantiate
those devices while in the process of doing so for the parent controller
and hence already holding the global spi_add_lock.  Since we only care
about concurrency within a single SPI bus move the lock to be per
controller, avoiding the deadlock.

This can be easily triggered in the case of spi-mux.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 17 ++++++-----------
 include/linux/spi/spi.h |  3 +++
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index aea037c65985..412a10586233 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -478,12 +478,6 @@ static LIST_HEAD(spi_controller_list);
  */
 static DEFINE_MUTEX(board_lock);
 
-/*
- * Prevents addition of devices with same chip select and
- * addition of devices below an unregistering controller.
- */
-static DEFINE_MUTEX(spi_add_lock);
-
 /**
  * spi_alloc_device - Allocate a new SPI device
  * @ctlr: Controller to which device is connected
@@ -636,9 +630,9 @@ int spi_add_device(struct spi_device *spi)
 	 * chipselect **BEFORE** we call setup(), else we'll trash
 	 * its configuration.  Lock against concurrent add() calls.
 	 */
-	mutex_lock(&spi_add_lock);
+	mutex_lock(&ctlr->add_lock);
 	status = __spi_add_device(spi);
-	mutex_unlock(&spi_add_lock);
+	mutex_unlock(&ctlr->add_lock);
 	return status;
 }
 EXPORT_SYMBOL_GPL(spi_add_device);
@@ -658,7 +652,7 @@ static int spi_add_device_locked(struct spi_device *spi)
 	/* Set the bus ID string */
 	spi_dev_set_name(spi);
 
-	WARN_ON(!mutex_is_locked(&spi_add_lock));
+	WARN_ON(!mutex_is_locked(&ctlr->add_lock));
 	return __spi_add_device(spi);
 }
 
@@ -2830,6 +2824,7 @@ int spi_register_controller(struct spi_controller *ctlr)
 	spin_lock_init(&ctlr->bus_lock_spinlock);
 	mutex_init(&ctlr->bus_lock_mutex);
 	mutex_init(&ctlr->io_mutex);
+	mutex_init(&ctlr->add_lock);
 	ctlr->bus_lock_flag = 0;
 	init_completion(&ctlr->xfer_completion);
 	if (!ctlr->max_dma_len)
@@ -2966,7 +2961,7 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 
 	/* Prevent addition of new devices, unregister existing ones */
 	if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
-		mutex_lock(&spi_add_lock);
+		mutex_lock(&ctlr->add_lock);
 
 	device_for_each_child(&ctlr->dev, NULL, __unregister);
 
@@ -2997,7 +2992,7 @@ void spi_unregister_controller(struct spi_controller *ctlr)
 	mutex_unlock(&board_lock);
 
 	if (IS_ENABLED(CONFIG_SPI_DYNAMIC))
-		mutex_unlock(&spi_add_lock);
+		mutex_unlock(&ctlr->add_lock);
 }
 EXPORT_SYMBOL_GPL(spi_unregister_controller);
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 8371bca13729..6b0b686f6f90 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -531,6 +531,9 @@ struct spi_controller {
 	/* I/O mutex */
 	struct mutex		io_mutex;
 
+	/* Used to avoid adding the same CS twice */
+	struct mutex		add_lock;
+
 	/* lock and mutex for SPI bus locking */
 	spinlock_t		bus_lock_spinlock;
 	struct mutex		bus_lock_mutex;
-- 
cgit v1.2.3


From 9f37ab0412eba537377c38b1dde1a04fbd7b5264 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 14 Oct 2021 14:18:59 +0000
Subject: PCI/switchtec: Add check of event support

Not all events are supported by every gen/variant of the Switchtec
firmware. To solve this, since Gen4, a new bit in each event header
is introduced to indicate if an event is supported by the firmware.

Link: https://lore.kernel.org/r/20211014141859.11444-6-kelvin.cao@microchip.com
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Kelvin Cao <kelvin.cao@microchip.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/switch/switchtec.c | 8 +++++++-
 include/linux/switchtec.h      | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 236cb40cc7c5..38c2b036fb8e 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -1024,6 +1024,9 @@ static int event_ctl(struct switchtec_dev *stdev,
 		return PTR_ERR(reg);
 
 	hdr = ioread32(reg);
+	if (hdr & SWITCHTEC_EVENT_NOT_SUPP)
+		return -EOPNOTSUPP;
+
 	for (i = 0; i < ARRAY_SIZE(ctl->data); i++)
 		ctl->data[i] = ioread32(&reg[i + 1]);
 
@@ -1096,7 +1099,7 @@ static int ioctl_event_ctl(struct switchtec_dev *stdev,
 		for (ctl.index = 0; ctl.index < nr_idxs; ctl.index++) {
 			ctl.flags = event_flags;
 			ret = event_ctl(stdev, &ctl);
-			if (ret < 0)
+			if (ret < 0 && ret != -EOPNOTSUPP)
 				return ret;
 		}
 	} else {
@@ -1403,6 +1406,9 @@ static int mask_event(struct switchtec_dev *stdev, int eid, int idx)
 	hdr_reg = event_regs[eid].map_reg(stdev, off, idx);
 	hdr = ioread32(hdr_reg);
 
+	if (hdr & SWITCHTEC_EVENT_NOT_SUPP)
+		return 0;
+
 	if (!(hdr & SWITCHTEC_EVENT_OCCURRED && hdr & SWITCHTEC_EVENT_EN_IRQ))
 		return 0;
 
diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h
index 082f1d51957a..be24056ac00f 100644
--- a/include/linux/switchtec.h
+++ b/include/linux/switchtec.h
@@ -19,6 +19,7 @@
 #define SWITCHTEC_EVENT_EN_CLI   BIT(2)
 #define SWITCHTEC_EVENT_EN_IRQ   BIT(3)
 #define SWITCHTEC_EVENT_FATAL    BIT(4)
+#define SWITCHTEC_EVENT_NOT_SUPP BIT(31)
 
 #define SWITCHTEC_DMA_MRPC_EN	BIT(0)
 
-- 
cgit v1.2.3


From 766607570becbd26cab6d66a544dd8d0d964df5a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 14 Oct 2021 07:24:31 -0700
Subject: ethernet: constify references to netdev->dev_addr in drivers

This big patch sprinkles const on local variables and
function arguments which may refer to netdev->dev_addr.

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. To maintain netdev->dev_addr in this tree we need to make all
the writes to it got through appropriate helpers.

Some of the changes here are not strictly required - const
is sometimes cast off but pointer is not used for writing.
It seems like it's still better to add the const in case
the code changes later or relevant -W flags get enabled
for the build.

No functional changes.

Link: https://lore.kernel.org/r/20211014142432.449314-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/actions/owl-emac.c                      |  2 +-
 drivers/net/ethernet/adaptec/starfire.c                      | 10 +++++-----
 drivers/net/ethernet/alacritech/slicoss.c                    |  2 +-
 drivers/net/ethernet/alteon/acenic.c                         |  4 ++--
 drivers/net/ethernet/altera/altera_tse_main.c                |  2 +-
 drivers/net/ethernet/amd/nmclan_cs.c                         |  3 ++-
 drivers/net/ethernet/amd/xgbe/xgbe-dev.c                     |  2 +-
 drivers/net/ethernet/amd/xgbe/xgbe.h                         |  2 +-
 drivers/net/ethernet/apm/xgene-v2/mac.c                      |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c               |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c            |  2 +-
 drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c            |  2 +-
 drivers/net/ethernet/apple/bmac.c                            |  8 ++++----
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h               |  6 +++---
 drivers/net/ethernet/aquantia/atlantic/aq_macsec.c           |  2 +-
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c    |  4 ++--
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c    |  4 ++--
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h    |  2 +-
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c |  4 ++--
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c    |  4 ++--
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c     |  2 +-
 drivers/net/ethernet/broadcom/b44.c                          |  6 ++++--
 drivers/net/ethernet/broadcom/bcmsysport.c                   |  2 +-
 drivers/net/ethernet/broadcom/bgmac.c                        |  2 +-
 drivers/net/ethernet/broadcom/bnx2.c                         |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h                  |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c             |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h            |  3 ++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c             |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c                    |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c              |  4 ++--
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h              |  2 +-
 drivers/net/ethernet/broadcom/genet/bcmgenet.c               |  4 ++--
 drivers/net/ethernet/calxeda/xgmac.c                         |  2 +-
 drivers/net/ethernet/chelsio/cxgb/gmac.h                     |  2 +-
 drivers/net/ethernet/chelsio/cxgb/pm3393.c                   |  2 +-
 drivers/net/ethernet/chelsio/cxgb/vsc7326.c                  |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/common.h                  |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/xgmac.c                   |  2 +-
 drivers/net/ethernet/cisco/enic/enic_pp.c                    |  2 +-
 drivers/net/ethernet/dlink/dl2k.c                            |  2 +-
 drivers/net/ethernet/dnet.c                                  |  6 +++---
 drivers/net/ethernet/emulex/benet/be_cmds.c                  |  2 +-
 drivers/net/ethernet/emulex/benet/be_cmds.h                  |  2 +-
 drivers/net/ethernet/emulex/benet/be_main.c                  |  2 +-
 drivers/net/ethernet/ethoc.c                                 |  2 +-
 drivers/net/ethernet/fealnx.c                                |  2 +-
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c               |  4 ++--
 drivers/net/ethernet/freescale/fman/fman_dtsec.c             |  8 ++++----
 drivers/net/ethernet/freescale/fman/fman_dtsec.h             |  2 +-
 drivers/net/ethernet/freescale/fman/fman_memac.c             |  8 ++++----
 drivers/net/ethernet/freescale/fman/fman_memac.h             |  2 +-
 drivers/net/ethernet/freescale/fman/fman_tgec.c              |  8 ++++----
 drivers/net/ethernet/freescale/fman/fman_tgec.h              |  2 +-
 drivers/net/ethernet/freescale/fman/mac.h                    |  2 +-
 drivers/net/ethernet/hisilicon/hisi_femac.c                  |  2 +-
 drivers/net/ethernet/hisilicon/hix5hd2_gmac.c                |  2 +-
 drivers/net/ethernet/hisilicon/hns/hnae.h                    |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c            |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c           |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c            |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h            |  5 +++--
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c          |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hnae3.h                  |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c      |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c    |  2 +-
 drivers/net/ethernet/i825xx/sun3_82586.c                     |  2 +-
 drivers/net/ethernet/intel/i40e/i40e.h                       |  2 +-
 drivers/net/ethernet/intel/ixgb/ixgb_hw.c                    |  2 +-
 drivers/net/ethernet/intel/ixgb/ixgb_hw.h                    |  2 +-
 drivers/net/ethernet/marvell/mv643xx_eth.c                   |  2 +-
 drivers/net/ethernet/marvell/mvneta.c                        |  4 ++--
 drivers/net/ethernet/marvell/pxa168_eth.c                    |  6 +++---
 drivers/net/ethernet/mediatek/mtk_star_emac.c                |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c              |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c        |  2 +-
 drivers/net/ethernet/micrel/ks8842.c                         |  2 +-
 drivers/net/ethernet/micrel/ksz884x.c                        |  4 ++--
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c             |  3 ++-
 drivers/net/ethernet/neterion/s2io.c                         |  2 +-
 drivers/net/ethernet/neterion/s2io.h                         |  2 +-
 drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c      |  6 +++---
 drivers/net/ethernet/nxp/lpc_eth.c                           |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_dev.c                    |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_dev_api.h                |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c                     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_main.c                   |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c                    |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.h                    |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_rdma.c                   |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_vf.c                     |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_vf.h                     |  4 ++--
 drivers/net/ethernet/qlogic/qede/qede_filter.c               |  2 +-
 drivers/net/ethernet/qualcomm/emac/emac-mac.c                |  2 +-
 drivers/net/ethernet/rdc/r6040.c                             | 12 ++++++------
 drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h            |  2 +-
 drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c              |  3 ++-
 drivers/net/ethernet/sfc/ef10.c                              |  4 ++--
 drivers/net/ethernet/sfc/ef10_sriov.c                        |  2 +-
 drivers/net/ethernet/sfc/ef10_sriov.h                        |  6 +++---
 drivers/net/ethernet/sfc/net_driver.h                        |  2 +-
 drivers/net/ethernet/sfc/siena_sriov.c                       |  2 +-
 drivers/net/ethernet/sfc/siena_sriov.h                       |  2 +-
 drivers/net/ethernet/sis/sis900.c                            |  2 +-
 drivers/net/ethernet/smsc/smsc911x.c                         |  2 +-
 drivers/net/ethernet/smsc/smsc9420.c                         |  2 +-
 drivers/net/ethernet/stmicro/stmmac/common.h                 |  4 ++--
 drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c            |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c         |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c          |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c            |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c             |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c              |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c          |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/hwif.h                   |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c       |  4 ++--
 drivers/net/ethernet/sun/sunbmac.c                           |  2 +-
 drivers/net/ethernet/sun/sunqe.c                             |  2 +-
 drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c                |  2 +-
 drivers/net/ethernet/synopsys/dwc-xlgmac.h                   |  2 +-
 drivers/net/ethernet/ti/tlan.c                               |  4 ++--
 drivers/net/ethernet/toshiba/tc35815.c                       |  3 ++-
 drivers/net/ethernet/xilinx/xilinx_emaclite.c                |  7 ++++---
 drivers/net/ethernet/xircom/xirc2ps_cs.c                     |  2 +-
 drivers/net/phy/mscc/mscc_main.c                             |  2 +-
 drivers/net/usb/aqc111.c                                     |  2 +-
 drivers/net/usb/ax88179_178a.c                               |  8 ++++----
 drivers/net/usb/catc.c                                       |  2 +-
 drivers/net/usb/dm9601.c                                     |  3 ++-
 drivers/net/usb/mcs7830.c                                    |  3 ++-
 drivers/net/usb/sr9700.c                                     |  3 ++-
 include/linux/qed/qed_eth_if.h                               |  2 +-
 include/linux/qed/qed_if.h                                   |  2 +-
 include/linux/qed/qed_rdma_if.h                              |  3 ++-
 134 files changed, 204 insertions(+), 189 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/actions/owl-emac.c b/drivers/net/ethernet/actions/owl-emac.c
index dce93acd1644..1cfdd01b4c2e 100644
--- a/drivers/net/ethernet/actions/owl-emac.c
+++ b/drivers/net/ethernet/actions/owl-emac.c
@@ -342,7 +342,7 @@ static u32 owl_emac_dma_cmd_stop(struct owl_emac_priv *priv)
 static void owl_emac_set_hw_mac_addr(struct net_device *netdev)
 {
 	struct owl_emac_priv *priv = netdev_priv(netdev);
-	u8 *mac_addr = netdev->dev_addr;
+	const u8 *mac_addr = netdev->dev_addr;
 	u32 addr_high, addr_low;
 
 	addr_high = mac_addr[0] << 8 | mac_addr[1];
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index e0f6cc910bd2..16b6b83f670b 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -955,7 +955,7 @@ static int netdev_open(struct net_device *dev)
 	writew(0, ioaddr + PerfFilterTable + 4);
 	writew(0, ioaddr + PerfFilterTable + 8);
 	for (i = 1; i < 16; i++) {
-		__be16 *eaddrs = (__be16 *)dev->dev_addr;
+		const __be16 *eaddrs = (const __be16 *)dev->dev_addr;
 		void __iomem *setup_frm = ioaddr + PerfFilterTable + i * 16;
 		writew(be16_to_cpu(eaddrs[2]), setup_frm); setup_frm += 4;
 		writew(be16_to_cpu(eaddrs[1]), setup_frm); setup_frm += 4;
@@ -1787,14 +1787,14 @@ static void set_rx_mode(struct net_device *dev)
 	} else if (netdev_mc_count(dev) <= 14) {
 		/* Use the 16 element perfect filter, skip first two entries. */
 		void __iomem *filter_addr = ioaddr + PerfFilterTable + 2 * 16;
-		__be16 *eaddrs;
+		const __be16 *eaddrs;
 		netdev_for_each_mc_addr(ha, dev) {
 			eaddrs = (__be16 *) ha->addr;
 			writew(be16_to_cpu(eaddrs[2]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 8;
 		}
-		eaddrs = (__be16 *)dev->dev_addr;
+		eaddrs = (const __be16 *)dev->dev_addr;
 		i = netdev_mc_count(dev) + 2;
 		while (i++ < 16) {
 			writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 4;
@@ -1805,7 +1805,7 @@ static void set_rx_mode(struct net_device *dev)
 	} else {
 		/* Must use a multicast hash table. */
 		void __iomem *filter_addr;
-		__be16 *eaddrs;
+		const __be16 *eaddrs;
 		__le16 mc_filter[32] __attribute__ ((aligned(sizeof(long))));	/* Multicast hash filter */
 
 		memset(mc_filter, 0, sizeof(mc_filter));
@@ -1819,7 +1819,7 @@ static void set_rx_mode(struct net_device *dev)
 		}
 		/* Clear the perfect filter list, skip first two entries. */
 		filter_addr = ioaddr + PerfFilterTable + 2 * 16;
-		eaddrs = (__be16 *)dev->dev_addr;
+		eaddrs = (const __be16 *)dev->dev_addr;
 		for (i = 2; i < 16; i++) {
 			writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4;
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
index 82f4f2608102..1fc9a1cd3ef8 100644
--- a/drivers/net/ethernet/alacritech/slicoss.c
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -1008,7 +1008,7 @@ static void slic_set_link_autoneg(struct slic_device *sdev)
 
 static void slic_set_mac_address(struct slic_device *sdev)
 {
-	u8 *addr = sdev->netdev->dev_addr;
+	const u8 *addr = sdev->netdev->dev_addr;
 	u32 val;
 
 	val = addr[5] | addr[4] << 8 | addr[3] << 16 | addr[2] << 24;
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 7aaef593b031..eeb86bd851f9 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -2712,7 +2712,7 @@ static int ace_set_mac_addr(struct net_device *dev, void *p)
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 	struct sockaddr *addr=p;
-	u8 *da;
+	const u8 *da;
 	struct cmd cmd;
 
 	if(netif_running(dev))
@@ -2720,7 +2720,7 @@ static int ace_set_mac_addr(struct net_device *dev, void *p)
 
 	eth_hw_addr_set(dev, addr->sa_data);
 
-	da = (u8 *)dev->dev_addr;
+	da = (const u8 *)dev->dev_addr;
 
 	writel(da[0] << 8 | da[1], &regs->MacAddrHi);
 	writel((da[2] << 24) | (da[3] << 16) | (da[4] << 8) | da[5],
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 7b75b0cd7ac9..d75d95a97dd9 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -849,7 +849,7 @@ static int init_phy(struct net_device *dev)
 	return 0;
 }
 
-static void tse_update_mac_addr(struct altera_tse_private *priv, u8 *addr)
+static void tse_update_mac_addr(struct altera_tse_private *priv, const u8 *addr)
 {
 	u32 msb;
 	u32 lsb;
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 2c07d15c8dfe..30ee5329bd7c 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -529,7 +529,8 @@ static void mace_write(mace_private *lp, unsigned int ioaddr, int reg,
 mace_init
 	Resets the MACE chip.
 ---------------------------------------------------------------------------- */
-static int mace_init(mace_private *lp, unsigned int ioaddr, char *enet_addr)
+static int mace_init(mace_private *lp, unsigned int ioaddr,
+		     const char *enet_addr)
 {
   int i;
   int ct = 0;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index d5fd49dd25f3..3936543a74d8 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1080,7 +1080,7 @@ static int xgbe_add_mac_addresses(struct xgbe_prv_data *pdata)
 	return 0;
 }
 
-static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, u8 *addr)
+static int xgbe_set_mac_address(struct xgbe_prv_data *pdata, const u8 *addr)
 {
 	unsigned int mac_addr_hi, mac_addr_lo;
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 3305979a9f7c..607a2c90513b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -729,7 +729,7 @@ struct xgbe_ext_stats {
 struct xgbe_hw_if {
 	int (*tx_complete)(struct xgbe_ring_desc *);
 
-	int (*set_mac_address)(struct xgbe_prv_data *, u8 *addr);
+	int (*set_mac_address)(struct xgbe_prv_data *, const u8 *addr);
 	int (*config_rx_mode)(struct xgbe_prv_data *);
 
 	int (*enable_rx_csum)(struct xgbe_prv_data *);
diff --git a/drivers/net/ethernet/apm/xgene-v2/mac.c b/drivers/net/ethernet/apm/xgene-v2/mac.c
index 2da979e4fad1..6423e22e05b2 100644
--- a/drivers/net/ethernet/apm/xgene-v2/mac.c
+++ b/drivers/net/ethernet/apm/xgene-v2/mac.c
@@ -65,7 +65,7 @@ void xge_mac_set_speed(struct xge_pdata *pdata)
 
 void xge_mac_set_station_addr(struct xge_pdata *pdata)
 {
-	u8 *dev_addr = pdata->ndev->dev_addr;
+	const u8 *dev_addr = pdata->ndev->dev_addr;
 	u32 addr0, addr1;
 
 	addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 5f657879134e..e641dbbea1e2 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -378,8 +378,8 @@ u32 xgene_enet_rd_stat(struct xgene_enet_pdata *pdata, u32 rd_addr)
 
 static void xgene_gmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
+	const u8 *dev_addr = pdata->ndev->dev_addr;
 	u32 addr0, addr1;
-	u8 *dev_addr = pdata->ndev->dev_addr;
 
 	addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
 		(dev_addr[1] << 8) | dev_addr[0];
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index f482ced2cadd..72b5e8eb0ec7 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -165,8 +165,8 @@ static void xgene_sgmac_reset(struct xgene_enet_pdata *p)
 
 static void xgene_sgmac_set_mac_addr(struct xgene_enet_pdata *p)
 {
+	const u8 *dev_addr = p->ndev->dev_addr;
 	u32 addr0, addr1;
-	u8 *dev_addr = p->ndev->dev_addr;
 
 	addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
 		(dev_addr[1] << 8) | dev_addr[0];
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 304b5d43f236..86607b79c09f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -207,8 +207,8 @@ static void xgene_pcs_reset(struct xgene_enet_pdata *pdata)
 
 static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata)
 {
+	const u8 *dev_addr = pdata->ndev->dev_addr;
 	u32 addr0, addr1;
-	u8 *dev_addr = pdata->ndev->dev_addr;
 
 	addr0 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
 		(dev_addr[1] << 8) | dev_addr[0];
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index a989d2df59ad..a63ec2005af3 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -308,7 +308,7 @@ bmac_init_registers(struct net_device *dev)
 {
 	struct bmac_data *bp = netdev_priv(dev);
 	volatile unsigned short regValue;
-	unsigned short *pWord16;
+	const unsigned short *pWord16;
 	int i;
 
 	/* XXDEBUG(("bmac: enter init_registers\n")); */
@@ -371,7 +371,7 @@ bmac_init_registers(struct net_device *dev)
 	bmwrite(dev, BHASH1, bp->hash_table_mask[2]); 	/* bits 47 - 32 */
 	bmwrite(dev, BHASH0, bp->hash_table_mask[3]); 	/* bits 63 - 48 */
 
-	pWord16 = (unsigned short *)dev->dev_addr;
+	pWord16 = (const unsigned short *)dev->dev_addr;
 	bmwrite(dev, MADD0, *pWord16++);
 	bmwrite(dev, MADD1, *pWord16++);
 	bmwrite(dev, MADD2, *pWord16);
@@ -522,7 +522,7 @@ static int bmac_set_address(struct net_device *dev, void *addr)
 {
 	struct bmac_data *bp = netdev_priv(dev);
 	unsigned char *p = addr;
-	unsigned short *pWord16;
+	const unsigned short *pWord16;
 	unsigned long flags;
 	int i;
 
@@ -533,7 +533,7 @@ static int bmac_set_address(struct net_device *dev, void *addr)
 		dev->dev_addr[i] = p[i];
 	}
 	/* load up the hardware address */
-	pWord16  = (unsigned short *)dev->dev_addr;
+	pWord16  = (const unsigned short *)dev->dev_addr;
 	bmwrite(dev, MADD0, *pWord16++);
 	bmwrite(dev, MADD1, *pWord16++);
 	bmwrite(dev, MADD2, *pWord16);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index bed481816ea3..062a300a566a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -217,7 +217,7 @@ struct aq_hw_ops {
 	int (*hw_ring_tx_head_update)(struct aq_hw_s *self,
 				      struct aq_ring_s *aq_ring);
 
-	int (*hw_set_mac_address)(struct aq_hw_s *self, u8 *mac_addr);
+	int (*hw_set_mac_address)(struct aq_hw_s *self, const u8 *mac_addr);
 
 	int (*hw_soft_reset)(struct aq_hw_s *self);
 
@@ -226,7 +226,7 @@ struct aq_hw_ops {
 
 	int (*hw_reset)(struct aq_hw_s *self);
 
-	int (*hw_init)(struct aq_hw_s *self, u8 *mac_addr);
+	int (*hw_init)(struct aq_hw_s *self, const u8 *mac_addr);
 
 	int (*hw_start)(struct aq_hw_s *self);
 
@@ -373,7 +373,7 @@ struct aq_fw_ops {
 	int (*set_phyloopback)(struct aq_hw_s *self, u32 mode, bool enable);
 
 	int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
-			 u8 *mac);
+			 const u8 *mac);
 
 	int (*send_fw_request)(struct aq_hw_s *self,
 			       const struct hw_fw_request_iface *fw_req,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
index 4a6dfac857ca..02058fe79f52 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
@@ -35,7 +35,7 @@ static int aq_apply_macsec_cfg(struct aq_nic_s *nic);
 static int aq_apply_secy_cfg(struct aq_nic_s *nic,
 			     const struct macsec_secy *secy);
 
-static void aq_ether_addr_to_mac(u32 mac[2], unsigned char *emac)
+static void aq_ether_addr_to_mac(u32 mac[2], const unsigned char *emac)
 {
 	u32 tmp[2] = { 0 };
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 611875ef2cd1..4625ccb79499 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -322,7 +322,7 @@ static int hw_atl_a0_hw_init_rx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_a0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
+static int hw_atl_a0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr)
 {
 	unsigned int h = 0U;
 	unsigned int l = 0U;
@@ -348,7 +348,7 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_a0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
+static int hw_atl_a0_hw_init(struct aq_hw_s *self, const u8 *mac_addr)
 {
 	static u32 aq_hw_atl_igcr_table_[4][2] = {
 		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 9f1b15077e7d..d875ce3ec759 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -533,7 +533,7 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
+int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr)
 {
 	unsigned int h = 0U;
 	unsigned int l = 0U;
@@ -558,7 +558,7 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
+static int hw_atl_b0_hw_init(struct aq_hw_s *self, const u8 *mac_addr)
 {
 	static u32 aq_hw_atl_igcr_table_[4][2] = {
 		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index d8db972113ec..5298846dd9f7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -58,7 +58,7 @@ int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring);
 
 void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self);
 
-int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr);
+int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, const u8 *mac_addr);
 
 int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc);
 int hw_atl_b0_set_loopback(struct aq_hw_s *self, u32 mode, bool enable);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 404cbf60d3f2..fc0e66006644 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -944,7 +944,7 @@ u32 hw_atl_utils_get_fw_version(struct aq_hw_s *self)
 }
 
 static int aq_fw1x_set_wake_magic(struct aq_hw_s *self, bool wol_enabled,
-				  u8 *mac)
+				  const u8 *mac)
 {
 	struct hw_atl_utils_fw_rpc *prpc = NULL;
 	unsigned int rpc_size = 0U;
@@ -987,7 +987,7 @@ err_exit:
 }
 
 static int aq_fw1x_set_power(struct aq_hw_s *self, unsigned int power_state,
-			     u8 *mac)
+			     const u8 *mac)
 {
 	struct hw_atl_utils_fw_rpc *prpc = NULL;
 	unsigned int rpc_size = 0U;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index ee0c22d04935..eac631c45c56 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -358,7 +358,7 @@ static int aq_fw2x_get_phy_temp(struct aq_hw_s *self, int *temp)
 	return 0;
 }
 
-static int aq_fw2x_set_wol(struct aq_hw_s *self, u8 *mac)
+static int aq_fw2x_set_wol(struct aq_hw_s *self, const u8 *mac)
 {
 	struct hw_atl_utils_fw_rpc *rpc = NULL;
 	struct offload_info *info = NULL;
@@ -404,7 +404,7 @@ err_exit:
 }
 
 static int aq_fw2x_set_power(struct aq_hw_s *self, unsigned int power_state,
-			     u8 *mac)
+			     const u8 *mac)
 {
 	int err = 0;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 92f64048bf69..c98708bb044c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -516,7 +516,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
+static int hw_atl2_hw_init(struct aq_hw_s *self, const u8 *mac_addr)
 {
 	static u32 aq_hw_atl2_igcr_table_[4][2] = {
 		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 55c9e6fcb471..969591bbc066 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -218,7 +218,8 @@ static inline void __b44_cam_read(struct b44 *bp, unsigned char *data, int index
 	data[1] = (val >> 0) & 0xFF;
 }
 
-static inline void __b44_cam_write(struct b44 *bp, unsigned char *data, int index)
+static inline void __b44_cam_write(struct b44 *bp,
+				   const unsigned char *data, int index)
 {
 	u32 val;
 
@@ -1507,7 +1508,8 @@ static void bwfilter_table(struct b44 *bp, u8 *pp, u32 bytes, u32 table_offset)
 	}
 }
 
-static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset)
+static int b44_magic_pattern(const u8 *macaddr, u8 *ppattern, u8 *pmask,
+			     int offset)
 {
 	int magicsync = 6;
 	int k, j, len = offset;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index b813745e8de3..40933bf5a710 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1818,7 +1818,7 @@ static inline void umac_reset(struct bcm_sysport_priv *priv)
 }
 
 static void umac_set_hw_addr(struct bcm_sysport_priv *priv,
-			     unsigned char *addr)
+			     const unsigned char *addr)
 {
 	u32 mac0 = (addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) |
 		    addr[3];
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index d2c7834850cc..7b525c65bacb 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -768,7 +768,7 @@ static void bgmac_umac_cmd_maskset(struct bgmac *bgmac, u32 mask, u32 set,
 	udelay(2);
 }
 
-static void bgmac_write_mac_address(struct bgmac *bgmac, u8 *addr)
+static void bgmac_write_mac_address(struct bgmac *bgmac, const u8 *addr)
 {
 	u32 tmp;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 248b81249cb0..babc955ba64e 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2704,7 +2704,7 @@ bnx2_alloc_bad_rbuf(struct bnx2 *bp)
 }
 
 static void
-bnx2_set_mac_addr(struct bnx2 *bp, u8 *mac_addr, u32 pos)
+bnx2_set_mac_addr(struct bnx2 *bp, const u8 *mac_addr, u32 pos)
 {
 	u32 val;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index e789430f407c..2b06d78baa08 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1994,7 +1994,7 @@ int bnx2x_idle_chk(struct bnx2x *bp);
  * operation has been successfully scheduled and a negative - if a requested
  * operations has failed.
  */
-int bnx2x_set_mac_one(struct bnx2x *bp, u8 *mac,
+int bnx2x_set_mac_one(struct bnx2x *bp, const u8 *mac,
 		      struct bnx2x_vlan_mac_obj *obj, bool set,
 		      int mac_type, unsigned long *ramrod_flags);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index ef49e38ae027..27e712178f95 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -8417,7 +8417,7 @@ alloc_mem_err:
  * Init service functions
  */
 
-int bnx2x_set_mac_one(struct bnx2x *bp, u8 *mac,
+int bnx2x_set_mac_one(struct bnx2x *bp, const u8 *mac,
 		      struct bnx2x_vlan_mac_obj *obj, bool set,
 		      int mac_type, unsigned long *ramrod_flags)
 {
@@ -9146,7 +9146,7 @@ u32 bnx2x_send_unload_req(struct bnx2x *bp, int unload_mode)
 
 	else if (bp->wol) {
 		u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
-		u8 *mac_addr = bp->dev->dev_addr;
+		const u8 *mac_addr = bp->dev->dev_addr;
 		struct pci_dev *pdev = bp->pdev;
 		u32 val;
 		u16 pmc;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index 966d5722c5e2..8c2cf5519787 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -508,7 +508,8 @@ int bnx2x_vfpf_init(struct bnx2x *bp);
 void bnx2x_vfpf_close_vf(struct bnx2x *bp);
 int bnx2x_vfpf_setup_q(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 		       bool is_leading);
-int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set);
+int bnx2x_vfpf_config_mac(struct bnx2x *bp, const u8 *addr, u8 vf_qid,
+			  bool set);
 int bnx2x_vfpf_config_rss(struct bnx2x *bp,
 			  struct bnx2x_config_rss_params *params);
 int bnx2x_vfpf_set_mcast(struct net_device *dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 7c96f943c6f3..c9129b9ba446 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -721,7 +721,7 @@ out:
 }
 
 /* request pf to add a mac for the vf */
-int bnx2x_vfpf_config_mac(struct bnx2x *bp, u8 *addr, u8 vf_qid, bool set)
+int bnx2x_vfpf_config_mac(struct bnx2x *bp, const u8 *addr, u8 vf_qid, bool set)
 {
 	struct vfpf_set_q_filters_tlv *req = &bp->vf2pf_mbox->req.set_q_filters;
 	struct pfvf_general_resp_tlv *resp = &bp->vf2pf_mbox->resp.general_resp;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 800020f4d79d..66263aa0d96b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -4869,7 +4869,7 @@ static int bnxt_hwrm_cfa_ntuple_filter_alloc(struct bnxt *bp,
 #endif
 
 static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx,
-				     u8 *mac_addr)
+				     const u8 *mac_addr)
 {
 	struct hwrm_cfa_l2_filter_alloc_output *resp;
 	struct hwrm_cfa_l2_filter_alloc_input *req;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index d4ebc5d710ba..1d177fed44a6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -1151,7 +1151,7 @@ void bnxt_hwrm_exec_fwd_req(struct bnxt *bp)
 	}
 }
 
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
+int bnxt_approve_mac(struct bnxt *bp, const u8 *mac, bool strict)
 {
 	struct hwrm_func_vf_cfg_input *req;
 	int rc = 0;
@@ -1246,7 +1246,7 @@ void bnxt_update_vf_mac(struct bnxt *bp)
 {
 }
 
-int bnxt_approve_mac(struct bnxt *bp, u8 *mac, bool strict)
+int bnxt_approve_mac(struct bnxt *bp, const u8 *mac, bool strict)
 {
 	return 0;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
index 995535e4c11b..9a4bacba477b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h
@@ -41,5 +41,5 @@ int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset);
 void bnxt_sriov_disable(struct bnxt *);
 void bnxt_hwrm_exec_fwd_req(struct bnxt *);
 void bnxt_update_vf_mac(struct bnxt *);
-int bnxt_approve_mac(struct bnxt *, u8 *, bool);
+int bnxt_approve_mac(struct bnxt *, const u8 *, bool);
 #endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 83c55e7b099f..ed53859b6f7d 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3266,7 +3266,7 @@ static void bcmgenet_umac_reset(struct bcmgenet_priv *priv)
 }
 
 static void bcmgenet_set_hw_addr(struct bcmgenet_priv *priv,
-				 unsigned char *addr)
+				 const unsigned char *addr)
 {
 	bcmgenet_umac_writel(priv, get_unaligned_be32(&addr[0]), UMAC_MAC0);
 	bcmgenet_umac_writel(priv, get_unaligned_be16(&addr[4]), UMAC_MAC1);
@@ -3560,7 +3560,7 @@ static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue)
 #define MAX_MDF_FILTER	17
 
 static inline void bcmgenet_set_mdf_addr(struct bcmgenet_priv *priv,
-					 unsigned char *addr,
+					 const unsigned char *addr,
 					 int *i)
 {
 	bcmgenet_umac_writel(priv, addr[0] << 8 | addr[1],
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index dc51f36fdf2a..9ad89a53c3e6 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -607,7 +607,7 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr)
 	writel(value, ioaddr + XGMAC_CONTROL);
 }
 
-static void xgmac_set_mac_addr(void __iomem *ioaddr, unsigned char *addr,
+static void xgmac_set_mac_addr(void __iomem *ioaddr, const unsigned char *addr,
 			       int num)
 {
 	u32 data;
diff --git a/drivers/net/ethernet/chelsio/cxgb/gmac.h b/drivers/net/ethernet/chelsio/cxgb/gmac.h
index dfa77491a910..5913eaf442b5 100644
--- a/drivers/net/ethernet/chelsio/cxgb/gmac.h
+++ b/drivers/net/ethernet/chelsio/cxgb/gmac.h
@@ -117,7 +117,7 @@ struct cmac_ops {
 	const struct cmac_statistics *(*statistics_update)(struct cmac *, int);
 
 	int (*macaddress_get)(struct cmac *, u8 mac_addr[6]);
-	int (*macaddress_set)(struct cmac *, u8 mac_addr[6]);
+	int (*macaddress_set)(struct cmac *, const u8 mac_addr[6]);
 };
 
 typedef struct _cmac_instance cmac_instance;
diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
index c27908e66f5e..0bb37e4680c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c
+++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
@@ -496,7 +496,7 @@ static int pm3393_macaddress_get(struct cmac *cmac, u8 mac_addr[6])
 	return 0;
 }
 
-static int pm3393_macaddress_set(struct cmac *cmac, u8 ma[6])
+static int pm3393_macaddress_set(struct cmac *cmac, const u8 ma[6])
 {
 	u32 val, lo, mid, hi, enabled = cmac->instance->enabled;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
index a19284bdb80e..2ad3efb550c2 100644
--- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
+++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
@@ -379,7 +379,7 @@ static int mac_intr_clear(struct cmac *mac)
 }
 
 /* Expect MAC address to be in network byte order. */
-static int mac_set_address(struct cmac* mac, u8 addr[6])
+static int mac_set_address(struct cmac* mac, const u8 addr[6])
 {
 	u32 val;
 	int port = mac->instance->index;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/common.h b/drivers/net/ethernet/chelsio/cxgb3/common.h
index b706f2fbe4f4..a309016f7f8c 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/common.h
@@ -710,7 +710,7 @@ int t3_mac_enable(struct cmac *mac, int which);
 int t3_mac_disable(struct cmac *mac, int which);
 int t3_mac_set_mtu(struct cmac *mac, unsigned int mtu);
 int t3_mac_set_rx_mode(struct cmac *mac, struct net_device *dev);
-int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6]);
+int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6]);
 int t3_mac_set_num_ucast(struct cmac *mac, int n);
 const struct mac_stats *t3_mac_update_stats(struct cmac *mac);
 int t3_mac_set_speed_duplex_fc(struct cmac *mac, int speed, int duplex, int fc);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
index 3af19a550372..1bdc6cad1e49 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/xgmac.c
@@ -240,7 +240,7 @@ static void set_addr_filter(struct cmac *mac, int idx, const u8 * addr)
 }
 
 /* Set one of the station's unicast MAC addresses. */
-int t3_mac_set_address(struct cmac *mac, unsigned int idx, u8 addr[6])
+int t3_mac_set_address(struct cmac *mac, unsigned int idx, const u8 addr[6])
 {
 	if (idx >= mac->nucast)
 		return -EINVAL;
diff --git a/drivers/net/ethernet/cisco/enic/enic_pp.c b/drivers/net/ethernet/cisco/enic/enic_pp.c
index e6a83198c3dd..80f46dbd5117 100644
--- a/drivers/net/ethernet/cisco/enic/enic_pp.c
+++ b/drivers/net/ethernet/cisco/enic/enic_pp.c
@@ -73,9 +73,9 @@ static int enic_set_port_profile(struct enic *enic, int vf)
 	struct vic_provinfo *vp;
 	const u8 oui[3] = VIC_PROVINFO_CISCO_OUI;
 	const __be16 os_type = htons(VIC_GENERIC_PROV_OS_TYPE_LINUX);
+	const u8 *client_mac;
 	char uuid_str[38];
 	char client_mac_str[18];
-	u8 *client_mac;
 	int err;
 
 	ENIC_PP_BY_INDEX(enic, vf, pp, &err);
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 202ecb132053..993bba0ffb16 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -567,7 +567,7 @@ static void rio_hw_init(struct net_device *dev)
 	 */
 	for (i = 0; i < 3; i++)
 		dw16(StationAddr0 + 2 * i,
-		     cpu_to_le16(((u16 *)dev->dev_addr)[i]));
+		     cpu_to_le16(((const u16 *)dev->dev_addr)[i]));
 
 	set_multicast (dev);
 	if (np->coalesce) {
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 6c51cf991dad..3ed21ba4eb99 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -60,11 +60,11 @@ static void __dnet_set_hwaddr(struct dnet *bp)
 {
 	u16 tmp;
 
-	tmp = be16_to_cpup((__be16 *)bp->dev->dev_addr);
+	tmp = be16_to_cpup((const __be16 *)bp->dev->dev_addr);
 	dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_0_REG, tmp);
-	tmp = be16_to_cpup((__be16 *)(bp->dev->dev_addr + 2));
+	tmp = be16_to_cpup((const __be16 *)(bp->dev->dev_addr + 2));
 	dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_1_REG, tmp);
-	tmp = be16_to_cpup((__be16 *)(bp->dev->dev_addr + 4));
+	tmp = be16_to_cpup((const __be16 *)(bp->dev->dev_addr + 4));
 	dnet_writew_mac(bp, DNET_INTERNAL_MAC_ADDR_2_REG, tmp);
 }
 
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 649c5c429bd7..528eb0f223b1 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1080,7 +1080,7 @@ err:
 }
 
 /* Uses synchronous MCCQ */
-int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr,
+int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr,
 		    u32 if_id, u32 *pmac_id, u32 domain)
 {
 	struct be_mcc_wrb *wrb;
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index c30d6d6f0f3a..db1f3b908582 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -2385,7 +2385,7 @@ int be_pci_fnum_get(struct be_adapter *adapter);
 int be_fw_wait_ready(struct be_adapter *adapter);
 int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr,
 			  bool permanent, u32 if_handle, u32 pmac_id);
-int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, u32 if_id,
+int be_cmd_pmac_add(struct be_adapter *adapter, const u8 *mac_addr, u32 if_id,
 		    u32 *pmac_id, u32 domain);
 int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id,
 		    u32 domain);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 736a6ea86eb1..d51f24c9e1b8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -272,7 +272,7 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
 	iowrite32(val, adapter->db + DB_CQ_OFFSET);
 }
 
-static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
+static int be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
 {
 	int i;
 
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index cd3a3b8f23b6..ed2ef167cdb2 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -802,8 +802,8 @@ static int ethoc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static void ethoc_do_set_mac_address(struct net_device *dev)
 {
+	const unsigned char *mac = dev->dev_addr;
 	struct ethoc *priv = netdev_priv(dev);
-	unsigned char *mac = dev->dev_addr;
 
 	ethoc_write(priv, MAC_ADDR0, (mac[2] << 24) | (mac[3] << 16) |
 				     (mac[4] <<  8) | (mac[5] <<  0));
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index 25c91b3c5fd3..63c935e7b625 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -827,7 +827,7 @@ static int netdev_open(struct net_device *dev)
 		return -EAGAIN;
 
 	for (i = 0; i < 3; i++)
-		iowrite16(((unsigned short*)dev->dev_addr)[i],
+		iowrite16(((const unsigned short *)dev->dev_addr)[i],
 				ioaddr + PAR0 + i*2);
 
 	init_ring(dev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index cd8a7d94f60c..6b2927d863e2 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -272,7 +272,7 @@ static int dpaa_netdev_init(struct net_device *net_dev,
 	} else {
 		eth_hw_addr_random(net_dev);
 		err = priv->mac_dev->change_addr(priv->mac_dev->fman_mac,
-			(enet_addr_t *)net_dev->dev_addr);
+			(const enet_addr_t *)net_dev->dev_addr);
 		if (err) {
 			dev_err(dev, "Failed to set random MAC address\n");
 			return -EINVAL;
@@ -452,7 +452,7 @@ static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
 	mac_dev = priv->mac_dev;
 
 	err = mac_dev->change_addr(mac_dev->fman_mac,
-				   (enet_addr_t *)net_dev->dev_addr);
+				   (const enet_addr_t *)net_dev->dev_addr);
 	if (err < 0) {
 		netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
 			  err);
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.c b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
index bce3c9398887..1950a8936bc0 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.c
@@ -366,7 +366,7 @@ static void set_dflts(struct dtsec_cfg *cfg)
 	cfg->maximum_frame = DEFAULT_MAXIMUM_FRAME;
 }
 
-static void set_mac_address(struct dtsec_regs __iomem *regs, u8 *adr)
+static void set_mac_address(struct dtsec_regs __iomem *regs, const u8 *adr)
 {
 	u32 tmp;
 
@@ -516,7 +516,7 @@ static int init(struct dtsec_regs __iomem *regs, struct dtsec_cfg *cfg,
 
 	if (addr) {
 		MAKE_ENET_ADDR_FROM_UINT64(addr, eth_addr);
-		set_mac_address(regs, (u8 *)eth_addr);
+		set_mac_address(regs, (const u8 *)eth_addr);
 	}
 
 	/* HASH */
@@ -1022,7 +1022,7 @@ int dtsec_accept_rx_pause_frames(struct fman_mac *dtsec, bool en)
 	return 0;
 }
 
-int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr)
+int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_addr)
 {
 	struct dtsec_regs __iomem *regs = dtsec->regs;
 	enum comm_mode mode = COMM_MODE_NONE;
@@ -1041,7 +1041,7 @@ int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr)
 	 * Station address have to be swapped (big endian to little endian
 	 */
 	dtsec->addr = ENET_ADDR_TO_UINT64(*enet_addr);
-	set_mac_address(dtsec->regs, (u8 *)(*enet_addr));
+	set_mac_address(dtsec->regs, (const u8 *)(*enet_addr));
 
 	graceful_start(dtsec, mode);
 
diff --git a/drivers/net/ethernet/freescale/fman/fman_dtsec.h b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
index 5149d96ec2c1..68512c3bd6e5 100644
--- a/drivers/net/ethernet/freescale/fman/fman_dtsec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_dtsec.h
@@ -37,7 +37,7 @@
 
 struct fman_mac *dtsec_config(struct fman_mac_params *params);
 int dtsec_set_promiscuous(struct fman_mac *dtsec, bool new_val);
-int dtsec_modify_mac_address(struct fman_mac *dtsec, enet_addr_t *enet_addr);
+int dtsec_modify_mac_address(struct fman_mac *dtsec, const enet_addr_t *enet_addr);
 int dtsec_adjust_link(struct fman_mac *dtsec,
 		      u16 speed);
 int dtsec_restart_autoneg(struct fman_mac *dtsec);
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 62f42921933d..2216b7f51d26 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -354,7 +354,7 @@ struct fman_mac {
 	bool allmulti_enabled;
 };
 
-static void add_addr_in_paddr(struct memac_regs __iomem *regs, u8 *adr,
+static void add_addr_in_paddr(struct memac_regs __iomem *regs, const u8 *adr,
 			      u8 paddr_num)
 {
 	u32 tmp0, tmp1;
@@ -897,12 +897,12 @@ int memac_accept_rx_pause_frames(struct fman_mac *memac, bool en)
 	return 0;
 }
 
-int memac_modify_mac_address(struct fman_mac *memac, enet_addr_t *enet_addr)
+int memac_modify_mac_address(struct fman_mac *memac, const enet_addr_t *enet_addr)
 {
 	if (!is_init_done(memac->memac_drv_param))
 		return -EINVAL;
 
-	add_addr_in_paddr(memac->regs, (u8 *)(*enet_addr), 0);
+	add_addr_in_paddr(memac->regs, (const u8 *)(*enet_addr), 0);
 
 	return 0;
 }
@@ -1058,7 +1058,7 @@ int memac_init(struct fman_mac *memac)
 	/* MAC Address */
 	if (memac->addr != 0) {
 		MAKE_ENET_ADDR_FROM_UINT64(memac->addr, eth_addr);
-		add_addr_in_paddr(memac->regs, (u8 *)eth_addr, 0);
+		add_addr_in_paddr(memac->regs, (const u8 *)eth_addr, 0);
 	}
 
 	fixed_link = memac_drv_param->fixed_link;
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.h b/drivers/net/ethernet/freescale/fman/fman_memac.h
index b2c671ec0ce7..3820f7a22983 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.h
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.h
@@ -40,7 +40,7 @@
 
 struct fman_mac *memac_config(struct fman_mac_params *params);
 int memac_set_promiscuous(struct fman_mac *memac, bool new_val);
-int memac_modify_mac_address(struct fman_mac *memac, enet_addr_t *enet_addr);
+int memac_modify_mac_address(struct fman_mac *memac, const enet_addr_t *enet_addr);
 int memac_adjust_link(struct fman_mac *memac, u16 speed);
 int memac_cfg_max_frame_len(struct fman_mac *memac, u16 new_val);
 int memac_cfg_reset_on_init(struct fman_mac *memac, bool enable);
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.c b/drivers/net/ethernet/freescale/fman/fman_tgec.c
index 41946b16f6c7..311c1906e044 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.c
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.c
@@ -221,7 +221,7 @@ struct fman_mac {
 	bool allmulti_enabled;
 };
 
-static void set_mac_address(struct tgec_regs __iomem *regs, u8 *adr)
+static void set_mac_address(struct tgec_regs __iomem *regs, const u8 *adr)
 {
 	u32 tmp0, tmp1;
 
@@ -514,13 +514,13 @@ int tgec_accept_rx_pause_frames(struct fman_mac *tgec, bool en)
 	return 0;
 }
 
-int tgec_modify_mac_address(struct fman_mac *tgec, enet_addr_t *p_enet_addr)
+int tgec_modify_mac_address(struct fman_mac *tgec, const enet_addr_t *p_enet_addr)
 {
 	if (!is_init_done(tgec->cfg))
 		return -EINVAL;
 
 	tgec->addr = ENET_ADDR_TO_UINT64(*p_enet_addr);
-	set_mac_address(tgec->regs, (u8 *)(*p_enet_addr));
+	set_mac_address(tgec->regs, (const u8 *)(*p_enet_addr));
 
 	return 0;
 }
@@ -704,7 +704,7 @@ int tgec_init(struct fman_mac *tgec)
 
 	if (tgec->addr) {
 		MAKE_ENET_ADDR_FROM_UINT64(tgec->addr, eth_addr);
-		set_mac_address(tgec->regs, (u8 *)eth_addr);
+		set_mac_address(tgec->regs, (const u8 *)eth_addr);
 	}
 
 	/* interrupts */
diff --git a/drivers/net/ethernet/freescale/fman/fman_tgec.h b/drivers/net/ethernet/freescale/fman/fman_tgec.h
index 3bfd1062b386..b28b20b26148 100644
--- a/drivers/net/ethernet/freescale/fman/fman_tgec.h
+++ b/drivers/net/ethernet/freescale/fman/fman_tgec.h
@@ -37,7 +37,7 @@
 
 struct fman_mac *tgec_config(struct fman_mac_params *params);
 int tgec_set_promiscuous(struct fman_mac *tgec, bool new_val);
-int tgec_modify_mac_address(struct fman_mac *tgec, enet_addr_t *enet_addr);
+int tgec_modify_mac_address(struct fman_mac *tgec, const enet_addr_t *enet_addr);
 int tgec_cfg_max_frame_len(struct fman_mac *tgec, u16 new_val);
 int tgec_enable(struct fman_mac *tgec, enum comm_mode mode);
 int tgec_disable(struct fman_mac *tgec, enum comm_mode mode);
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index 824a81a9f350..daa285a9b8b2 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -66,7 +66,7 @@ struct mac_device {
 	int (*stop)(struct mac_device *mac_dev);
 	void (*adjust_link)(struct mac_device *mac_dev);
 	int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
-	int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
+	int (*change_addr)(struct fman_mac *mac_dev, const enet_addr_t *enet_addr);
 	int (*set_allmulti)(struct fman_mac *mac_dev, bool enable);
 	int (*set_tstamp)(struct fman_mac *mac_dev, bool enable);
 	int (*set_multi)(struct net_device *net_dev,
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index 29190eb890c8..a6c18b6527f9 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -427,7 +427,7 @@ static void hisi_femac_free_skb_rings(struct hisi_femac_priv *priv)
 }
 
 static int hisi_femac_set_hw_mac_addr(struct hisi_femac_priv *priv,
-				      unsigned char *mac)
+				      const unsigned char *mac)
 {
 	u32 reg;
 
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 5f7ccdc834b7..d7e62eca050f 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -429,7 +429,7 @@ static void hix5hd2_port_disable(struct hix5hd2_priv *priv)
 static void hix5hd2_hw_set_mac_addr(struct net_device *dev)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
-	unsigned char *mac = dev->dev_addr;
+	const unsigned char *mac = dev->dev_addr;
 	u32 val;
 
 	val = mac[1] | (mac[0] << 8);
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index d46e8f999019..d72657444ef3 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -499,7 +499,7 @@ struct hnae_ae_ops {
 				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 	void (*set_promisc_mode)(struct hnae_handle *handle, u32 en);
 	int (*get_mac_addr)(struct hnae_handle *handle, void **p);
-	int (*set_mac_addr)(struct hnae_handle *handle, void *p);
+	int (*set_mac_addr)(struct hnae_handle *handle, const void *p);
 	int (*add_uc_addr)(struct hnae_handle *handle,
 			   const unsigned char *addr);
 	int (*rm_uc_addr)(struct hnae_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index e81116ad9bdf..bc3e406f0139 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -206,7 +206,7 @@ static void hns_ae_fini_queue(struct hnae_queue *q)
 		hns_rcb_reset_ring_hw(q);
 }
 
-static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p)
+static int hns_ae_set_mac_address(struct hnae_handle *handle, const void *p)
 {
 	int ret;
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index f387a859a201..8f391e2adcc0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -450,7 +450,7 @@ static void hns_gmac_update_stats(void *mac_drv)
 		+= dsaf_read_dev(drv, GMAC_TX_PAUSE_FRAMES_REG);
 }
 
-static void hns_gmac_set_mac_addr(void *mac_drv, char *mac_addr)
+static void hns_gmac_set_mac_addr(void *mac_drv, const char *mac_addr)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index f41379de2186..7edf8569514c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -240,7 +240,7 @@ int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb, u8 vmid, u8 *port_num)
  *@addr:mac address
  */
 int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb,
-			   u32 vmid, char *addr)
+			   u32 vmid, const char *addr)
 {
 	int ret;
 	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index 8943ffab4418..e3bb05959ba9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -348,7 +348,7 @@ struct mac_driver {
 	/*disable mac when disable nic or dsaf*/
 	void (*mac_disable)(void *mac_drv, enum mac_commom_mode mode);
 	/* config mac address*/
-	void (*set_mac_addr)(void *mac_drv,	char *mac_addr);
+	void (*set_mac_addr)(void *mac_drv,	const char *mac_addr);
 	/*adjust mac mode of port,include speed and duplex*/
 	int (*adjust_link)(void *mac_drv, enum mac_speed speed,
 			   u32 full_duplex);
@@ -425,7 +425,8 @@ int hns_mac_init(struct dsaf_device *dsaf_dev);
 void mac_adjust_link(struct net_device *net_dev);
 bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex);
 void hns_mac_get_link_status(struct hns_mac_cb *mac_cb,	u32 *link_status);
-int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr);
+int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid,
+			   const char *addr);
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
 		      u32 port_num, char *addr, bool enable);
 int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vm, bool enable);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 401fef5f1d07..fc26ffaae620 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -255,7 +255,7 @@ static void hns_xgmac_pausefrm_cfg(void *mac_drv, u32 rx_en, u32 tx_en)
 	dsaf_write_dev(drv, XGMAC_MAC_PAUSE_CTRL_REG, origin);
 }
 
-static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, char *mac_addr)
+static void hns_xgmac_set_pausefrm_mac_addr(void *mac_drv, const char *mac_addr)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5d188573c433..98d63e804903 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -595,7 +595,7 @@ struct hnae3_ae_ops {
 				   u32 *tx_usecs_high, u32 *rx_usecs_high);
 
 	void (*get_mac_addr)(struct hnae3_handle *handle, u8 *p);
-	int (*set_mac_addr)(struct hnae3_handle *handle, void *p,
+	int (*set_mac_addr)(struct hnae3_handle *handle, const void *p,
 			    bool is_first);
 	int (*do_ioctl)(struct hnae3_handle *handle,
 			struct ifreq *ifr, int cmd);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index cd981b33d4ff..7bb34af3981e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9442,7 +9442,7 @@ int hclge_update_mac_node_for_dev_addr(struct hclge_vport *vport,
 	return 0;
 }
 
-static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
+static int hclge_set_mac_addr(struct hnae3_handle *handle, const void *p,
 			      bool is_first)
 {
 	const unsigned char *new_addr = (const unsigned char *)p;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5fdac8685f95..2e6dcf75fba6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1349,7 +1349,7 @@ static void hclgevf_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 		ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
-static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
+static int hclgevf_set_mac_addr(struct hnae3_handle *handle, const void *p,
 				bool is_first)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 0696f723228a..18d32302c3c7 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -461,7 +461,7 @@ static int init586(struct net_device *dev)
 	ias_cmd->cmd_cmd	= swab16(CMD_IASETUP | CMD_LAST);
 	ias_cmd->cmd_link	= 0xffff;
 
-	memcpy((char *)&ias_cmd->iaddr,(char *) dev->dev_addr,ETH_ALEN);
+	memcpy((char *)&ias_cmd->iaddr,(const char *) dev->dev_addr,ETH_ALEN);
 
 	p->scb->cbl_offset = make16(ias_cmd);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 39fb3d57c057..3d528fba754b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -435,7 +435,7 @@ static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch)
 	return !!ch->fwd;
 }
 
-static inline u8 *i40e_channel_mac(struct i40e_channel *ch)
+static inline const u8 *i40e_channel_mac(struct i40e_channel *ch)
 {
 	if (i40e_is_channel_macvlan(ch))
 		return ch->fwd->netdev->dev_addr;
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
index a430871d1c27..c8d1e815ec6b 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
@@ -549,7 +549,7 @@ ixgb_mta_set(struct ixgb_hw *hw,
  *****************************************************************************/
 void
 ixgb_rar_set(struct ixgb_hw *hw,
-		  u8 *addr,
+		  const u8 *addr,
 		  u32 index)
 {
 	u32 rar_low, rar_high;
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
index 6064583095da..70bcff5fb3db 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
@@ -740,7 +740,7 @@ bool ixgb_adapter_start(struct ixgb_hw *hw);
 void ixgb_check_for_link(struct ixgb_hw *hw);
 bool ixgb_check_for_bad_link(struct ixgb_hw *hw);
 
-void ixgb_rar_set(struct ixgb_hw *hw, u8 *addr, u32 index);
+void ixgb_rar_set(struct ixgb_hw *hw, const u8 *addr, u32 index);
 
 /* Filters (multicast, vlan, receive) */
 void ixgb_mc_addr_list_update(struct ixgb_hw *hw, u8 *mc_addr_list,
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 654ec25e6705..a63d9a5c8059 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1770,7 +1770,7 @@ static void uc_addr_get(struct mv643xx_eth_private *mp, unsigned char *addr)
 	addr[5] = mac_l & 0xff;
 }
 
-static void uc_addr_set(struct mv643xx_eth_private *mp, unsigned char *addr)
+static void uc_addr_set(struct mv643xx_eth_private *mp, const u8 *addr)
 {
 	wrlp(mp, MAC_ADDR_HIGH,
 		(addr[0] << 24) | (addr[1] << 16) | (addr[2] << 8) | addr[3]);
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 761155af25d8..e2ce84ecb7a6 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1623,8 +1623,8 @@ static void mvneta_set_ucast_addr(struct mvneta_port *pp, u8 last_nibble,
 }
 
 /* Set mac address */
-static void mvneta_mac_addr_set(struct mvneta_port *pp, unsigned char *addr,
-				int queue)
+static void mvneta_mac_addr_set(struct mvneta_port *pp,
+				const unsigned char *addr, int queue)
 {
 	unsigned int mac_h;
 	unsigned int mac_l;
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 898b513f74e0..bb5341020803 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -389,7 +389,7 @@ static void inverse_every_nibble(unsigned char *mac_addr)
  * Outputs
  * return the calculated entry.
  */
-static u32 hash_function(unsigned char *mac_addr_orig)
+static u32 hash_function(const unsigned char *mac_addr_orig)
 {
 	u32 hash_result;
 	u32 addr0;
@@ -434,7 +434,7 @@ static u32 hash_function(unsigned char *mac_addr_orig)
  * -ENOSPC if table full
  */
 static int add_del_hash_entry(struct pxa168_eth_private *pep,
-			      unsigned char *mac_addr,
+			      const unsigned char *mac_addr,
 			      u32 rd, u32 skip, int del)
 {
 	struct addr_table_entry *entry, *start;
@@ -521,7 +521,7 @@ static int add_del_hash_entry(struct pxa168_eth_private *pep,
  */
 static void update_hash_table_mac_address(struct pxa168_eth_private *pep,
 					  unsigned char *oaddr,
-					  unsigned char *addr)
+					  const unsigned char *addr)
 {
 	/* Delete old entry */
 	if (oaddr)
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index e2ebfd8115a0..89ca7960b225 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -523,7 +523,7 @@ static void mtk_star_dma_resume_tx(struct mtk_star_priv *priv)
 static void mtk_star_set_mac_addr(struct net_device *ndev)
 {
 	struct mtk_star_priv *priv = netdev_priv(ndev);
-	u8 *mac_addr = ndev->dev_addr;
+	const u8 *mac_addr = ndev->dev_addr;
 	unsigned int high, low;
 
 	high = mac_addr[0] << 8 | mac_addr[1] << 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index c06b4b938ae7..73a377c8a2da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -71,12 +71,12 @@ struct mlx5e_l2_hash_node {
 	bool   mpfs;
 };
 
-static inline int mlx5e_hash_l2(u8 *addr)
+static inline int mlx5e_hash_l2(const u8 *addr)
 {
 	return addr[5];
 }
 
-static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr)
+static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr)
 {
 	struct mlx5e_l2_hash_node *hn;
 	int ix = mlx5e_hash_l2(addr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 269ebb53eda6..f7ebc1f9283f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -219,7 +219,7 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
 
 int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
 {
-	unsigned char *dev_addr = priv->netdev->dev_addr;
+	const unsigned char *dev_addr = priv->netdev->dev_addr;
 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
 	struct mlx5i_priv *ipriv = priv->ppriv;
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index 37ccf8c570b5..0f2cdcd4a4c0 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -380,7 +380,7 @@ static void ks8842_read_mac_addr(struct ks8842_adapter *adapter, u8 *dest)
 	}
 }
 
-static void ks8842_write_mac_addr(struct ks8842_adapter *adapter, u8 *mac)
+static void ks8842_write_mac_addr(struct ks8842_adapter *adapter, const u8 *mac)
 {
 	unsigned long flags;
 	unsigned i;
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index a1f7f45b9d08..03ad8bdc1f0d 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4033,7 +4033,7 @@ static void hw_set_add_addr(struct ksz_hw *hw)
 	}
 }
 
-static int hw_add_addr(struct ksz_hw *hw, u8 *mac_addr)
+static int hw_add_addr(struct ksz_hw *hw, const u8 *mac_addr)
 {
 	int i;
 	int j = ADDITIONAL_ENTRIES;
@@ -4054,7 +4054,7 @@ static int hw_add_addr(struct ksz_hw *hw, u8 *mac_addr)
 	return -1;
 }
 
-static int hw_del_addr(struct ksz_hw *hw, u8 *mac_addr)
+static int hw_del_addr(struct ksz_hw *hw, const u8 *mac_addr)
 {
 	int i;
 
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 32760f87bf8a..5ae59b1e5b48 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -796,7 +796,8 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt)
 	return status;
 }
 
-static int myri10ge_update_mac_address(struct myri10ge_priv *mgp, u8 * addr)
+static int myri10ge_update_mac_address(struct myri10ge_priv *mgp,
+				       const u8 * addr)
 {
 	struct myri10ge_cmd cmd;
 	int status;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 5454c1c2f8ad..ea0b2f3fd9c6 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -5217,7 +5217,7 @@ static int s2io_set_mac_addr(struct net_device *dev, void *p)
  *  as defined in errno.h file on failure.
  */
 
-static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr)
+static int do_s2io_prog_unicast(struct net_device *dev, const u8 *addr)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
 	register u64 mac_addr = 0, perm_addr = 0;
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 5a6032212c19..a4266d1544ab 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1073,7 +1073,7 @@ static void s2io_reset(struct s2io_nic * sp);
 static int s2io_poll_msix(struct napi_struct *napi, int budget);
 static int s2io_poll_inta(struct napi_struct *napi, int budget);
 static void s2io_init_pci(struct s2io_nic * sp);
-static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr);
+static int do_s2io_prog_unicast(struct net_device *dev, const u8 *addr);
 static void s2io_alarm_handle(struct timer_list *t);
 static irqreturn_t
 s2io_msix_ring_handle(int irq, void *dev_id);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index ab70179728f6..dfb4468fe287 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -837,7 +837,7 @@ nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry)
 }
 
 static int
-__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del)
+__nfp_tunnel_offload_mac(struct nfp_app *app, const u8 *mac, u16 idx, bool del)
 {
 	struct nfp_tun_mac_addr_offload payload;
 
@@ -886,7 +886,7 @@ static bool nfp_tunnel_is_mac_idx_global(u16 nfp_mac_idx)
 }
 
 static struct nfp_tun_offloaded_mac *
-nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, u8 *mac)
+nfp_tunnel_lookup_offloaded_macs(struct nfp_app *app, const u8 *mac)
 {
 	struct nfp_flower_priv *priv = app->priv;
 
@@ -1005,7 +1005,7 @@ err_free_ida:
 
 static int
 nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev,
-			  u8 *mac, bool mod)
+			  const u8 *mac, bool mod)
 {
 	struct nfp_flower_priv *priv = app->priv;
 	struct nfp_flower_repr_priv *repr_priv;
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 43fd569aa5f1..fbfbf94e0377 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -419,7 +419,7 @@ struct netdata_local {
 /*
  * MAC support functions
  */
-static void __lpc_set_mac(struct netdata_local *pldat, u8 *mac)
+static void __lpc_set_mac(struct netdata_local *pldat, const u8 *mac)
 {
 	u32 tmp;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6823016ab5a3..18f3bf7c4dfe 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -952,7 +952,7 @@ qed_llh_remove_filter(struct qed_hwfn *p_hwfn,
 }
 
 int qed_llh_add_mac_filter(struct qed_dev *cdev,
-			   u8 ppfid, u8 mac_addr[ETH_ALEN])
+			   u8 ppfid, const u8 mac_addr[ETH_ALEN])
 {
 	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index 6582bfc1b4a9..f8682356d0cf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -364,7 +364,7 @@ int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
  * Return: Int.
  */
 int qed_llh_add_mac_filter(struct qed_dev *cdev,
-			   u8 ppfid, u8 mac_addr[ETH_ALEN]);
+			   u8 ppfid, const u8 mac_addr[ETH_ALEN]);
 
 /**
  * qed_llh_remove_mac_filter(): Remove a LLH MAC filter from the given
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index a116fbc59725..2edd6bf64a3c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2848,7 +2848,7 @@ static int qed_fp_cqe_completion(struct qed_dev *dev,
 				      cqe);
 }
 
-static int qed_req_bulletin_update_mac(struct qed_dev *cdev, u8 *mac)
+static int qed_req_bulletin_update_mac(struct qed_dev *cdev, const u8 *mac)
 {
 	int i, ret;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 5e7242304ee2..29f9711bf438 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2887,7 +2887,7 @@ static int qed_update_drv_state(struct qed_dev *cdev, bool active)
 	return status;
 }
 
-static int qed_update_mac(struct qed_dev *cdev, u8 *mac)
+static int qed_update_mac(struct qed_dev *cdev, const u8 *mac)
 {
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_ptt *ptt;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 11a52a4a673b..64678a256f3b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -2851,7 +2851,7 @@ int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
 }
 
 int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt, u8 *mac)
+			  struct qed_ptt *p_ptt, const u8 *mac)
 {
 	struct qed_mcp_mb_params mb_params;
 	u32 mfw_mac[2];
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 9e1de1191bd4..564723800d15 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -536,7 +536,7 @@ int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
  * Return: Int - 0 - Operation was successul.
  */
 int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
-			  struct qed_ptt *p_ptt, u8 *mac);
+			  struct qed_ptt *p_ptt, const u8 *mac);
 
 /**
  * qed_mcp_ov_update_wol(): Send WOL mode to MFW.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index fe0bb11d0e43..7f3e84b8622d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1965,7 +1965,7 @@ static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
 
 static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
 				       u8 *old_mac_address,
-				       u8 *new_mac_address)
+				       const u8 *new_mac_address)
 {
 	int rc = 0;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 220a95fa96e1..597cd9cd57b5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -1373,7 +1373,7 @@ exit:
 
 int
 qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
-			      u8 *p_mac)
+			      const u8 *p_mac)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct vfpf_bulletin_update_mac_tlv *p_req;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 8718760443be..306b5f4bc632 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -1070,7 +1070,7 @@ u32 qed_vf_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id);
  *
  * Return: Int.
  */
-int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, u8 *p_mac);
+int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn, const u8 *p_mac);
 
 #else
 static inline void qed_vf_get_link_params(struct qed_hwfn *p_hwfn,
@@ -1259,7 +1259,7 @@ static inline int qed_vf_pf_tunnel_param_update(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_bulletin_update_mac(struct qed_hwfn *p_hwfn,
-						u8 *p_mac)
+						const u8 *p_mac)
 {
 	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index 03c51dd37e1f..3010833ddde3 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -617,7 +617,7 @@ void qede_fill_rss_params(struct qede_dev *edev,
 
 static int qede_set_ucast_rx_mac(struct qede_dev *edev,
 				 enum qed_filter_xcast_params_type opcode,
-				 unsigned char mac[ETH_ALEN])
+				 const unsigned char mac[ETH_ALEN])
 {
 	struct qed_filter_ucast_params ucast;
 
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 87b8c032195d..06104d2ff5b3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -420,7 +420,7 @@ static void emac_mac_dma_config(struct emac_adapter *adpt)
 }
 
 /* set MAC address */
-static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr)
+static void emac_set_mac_address(struct emac_adapter *adpt, const u8 *addr)
 {
 	u32 sta;
 
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 01ef5efd7bc2..a8f282c43a78 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -453,7 +453,7 @@ static void r6040_down(struct net_device *dev)
 {
 	struct r6040_private *lp = netdev_priv(dev);
 	void __iomem *ioaddr = lp->base;
-	u16 *adrp;
+	const u16 *adrp;
 
 	/* Stop MAC */
 	iowrite16(MSK_INT, ioaddr + MIER);	/* Mask Off Interrupt */
@@ -462,7 +462,7 @@ static void r6040_down(struct net_device *dev)
 	r6040_reset_mac(lp);
 
 	/* Restore MAC Address to MIDx */
-	adrp = (u16 *) dev->dev_addr;
+	adrp = (const u16 *) dev->dev_addr;
 	iowrite16(adrp[0], ioaddr + MID_0L);
 	iowrite16(adrp[1], ioaddr + MID_0M);
 	iowrite16(adrp[2], ioaddr + MID_0H);
@@ -731,13 +731,13 @@ static void r6040_mac_address(struct net_device *dev)
 {
 	struct r6040_private *lp = netdev_priv(dev);
 	void __iomem *ioaddr = lp->base;
-	u16 *adrp;
+	const u16 *adrp;
 
 	/* Reset MAC */
 	r6040_reset_mac(lp);
 
 	/* Restore MAC Address */
-	adrp = (u16 *) dev->dev_addr;
+	adrp = (const u16 *) dev->dev_addr;
 	iowrite16(adrp[0], ioaddr + MID_0L);
 	iowrite16(adrp[1], ioaddr + MID_0M);
 	iowrite16(adrp[2], ioaddr + MID_0H);
@@ -849,13 +849,13 @@ static void r6040_multicast_list(struct net_device *dev)
 	unsigned long flags;
 	struct netdev_hw_addr *ha;
 	int i;
-	u16 *adrp;
+	const u16 *adrp;
 	u16 hash_table[4] = { 0 };
 
 	spin_lock_irqsave(&lp->lock, flags);
 
 	/* Keep our MAC Address */
-	adrp = (u16 *)dev->dev_addr;
+	adrp = (const u16 *)dev->dev_addr;
 	iowrite16(adrp[0], ioaddr + MID_0L);
 	iowrite16(adrp[1], ioaddr + MID_0M);
 	iowrite16(adrp[2], ioaddr + MID_0H);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
index 049dc6cf4611..0f45107db8dd 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
@@ -329,7 +329,7 @@ struct sxgbe_core_ops {
 	/* Set power management mode (e.g. magic frame) */
 	void (*pmt)(void __iomem *ioaddr, unsigned long mode);
 	/* Set/Get Unicast MAC addresses */
-	void (*set_umac_addr)(void __iomem *ioaddr, unsigned char *addr,
+	void (*set_umac_addr)(void __iomem *ioaddr, const unsigned char *addr,
 			      unsigned int reg_n);
 	void (*get_umac_addr)(void __iomem *ioaddr, unsigned char *addr,
 			      unsigned int reg_n);
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c
index e96e2bd295ef..7d9f257de92a 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_core.c
@@ -85,7 +85,8 @@ static void sxgbe_core_pmt(void __iomem *ioaddr, unsigned long mode)
 }
 
 /* Set/Get Unicast MAC addresses */
-static void sxgbe_core_set_umac_addr(void __iomem *ioaddr, unsigned char *addr,
+static void sxgbe_core_set_umac_addr(void __iomem *ioaddr,
+				     const unsigned char *addr,
 				     unsigned int reg_n)
 {
 	u32 high_word, low_word;
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index e7e2223aebbf..cf366ed2557c 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1038,7 +1038,7 @@ int efx_ef10_vadaptor_free(struct efx_nic *efx, unsigned int port_id)
 }
 
 int efx_ef10_vport_add_mac(struct efx_nic *efx,
-			   unsigned int port_id, u8 *mac)
+			   unsigned int port_id, const u8 *mac)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_ADD_MAC_ADDRESS_IN_LEN);
 
@@ -1050,7 +1050,7 @@ int efx_ef10_vport_add_mac(struct efx_nic *efx,
 }
 
 int efx_ef10_vport_del_mac(struct efx_nic *efx,
-			   unsigned int port_id, u8 *mac)
+			   unsigned int port_id, const u8 *mac)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_VPORT_DEL_MAC_ADDRESS_IN_LEN);
 
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 06d23c708a5f..7f5aa4a8c451 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -480,7 +480,7 @@ static int efx_ef10_vport_del_vf_mac(struct efx_nic *efx, unsigned int port_id,
 	return rc;
 }
 
-int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
+int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct ef10_vf *vf;
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.h b/drivers/net/ethernet/sfc/ef10_sriov.h
index cfe556d17313..3c703ca878b0 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.h
+++ b/drivers/net/ethernet/sfc/ef10_sriov.h
@@ -39,7 +39,7 @@ static inline void efx_ef10_sriov_reset(struct efx_nic *efx) {}
 void efx_ef10_sriov_fini(struct efx_nic *efx);
 static inline void efx_ef10_sriov_flr(struct efx_nic *efx, unsigned vf_i) {}
 
-int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac);
+int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac);
 
 int efx_ef10_sriov_set_vf_vlan(struct efx_nic *efx, int vf_i,
 			       u16 vlan, u8 qos);
@@ -60,9 +60,9 @@ int efx_ef10_vswitching_restore_vf(struct efx_nic *efx);
 void efx_ef10_vswitching_remove_pf(struct efx_nic *efx);
 void efx_ef10_vswitching_remove_vf(struct efx_nic *efx);
 int efx_ef10_vport_add_mac(struct efx_nic *efx,
-			   unsigned int port_id, u8 *mac);
+			   unsigned int port_id, const u8 *mac);
 int efx_ef10_vport_del_mac(struct efx_nic *efx,
-			   unsigned int port_id, u8 *mac);
+			   unsigned int port_id, const u8 *mac);
 int efx_ef10_vadaptor_alloc(struct efx_nic *efx, unsigned int port_id);
 int efx_ef10_vadaptor_query(struct efx_nic *efx, unsigned int port_id,
 			    u32 *port_flags, u32 *vadaptor_flags,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index f6981810039d..cc15ee8812d9 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1440,7 +1440,7 @@ struct efx_nic_type {
 	bool (*sriov_wanted)(struct efx_nic *efx);
 	void (*sriov_reset)(struct efx_nic *efx);
 	void (*sriov_flr)(struct efx_nic *efx, unsigned vf_i);
-	int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, u8 *mac);
+	int (*sriov_set_vf_mac)(struct efx_nic *efx, int vf_i, const u8 *mac);
 	int (*sriov_set_vf_vlan)(struct efx_nic *efx, int vf_i, u16 vlan,
 				 u8 qos);
 	int (*sriov_set_vf_spoofchk)(struct efx_nic *efx, int vf_i,
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index 83dcfcae3d4b..e9095cf06368 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -1591,7 +1591,7 @@ void efx_fini_sriov(void)
 	destroy_workqueue(vfdi_workqueue);
 }
 
-int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
+int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, const u8 *mac)
 {
 	struct siena_nic_data *nic_data = efx->nic_data;
 	struct siena_vf *vf;
diff --git a/drivers/net/ethernet/sfc/siena_sriov.h b/drivers/net/ethernet/sfc/siena_sriov.h
index e441c89c25ce..e548c4daf189 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.h
+++ b/drivers/net/ethernet/sfc/siena_sriov.h
@@ -46,7 +46,7 @@ bool efx_siena_sriov_wanted(struct efx_nic *efx);
 void efx_siena_sriov_reset(struct efx_nic *efx);
 void efx_siena_sriov_flr(struct efx_nic *efx, unsigned flr);
 
-int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, u8 *mac);
+int efx_siena_sriov_set_vf_mac(struct efx_nic *efx, int vf, const u8 *mac);
 int efx_siena_sriov_set_vf_vlan(struct efx_nic *efx, int vf,
 				u16 vlan, u8 qos);
 int efx_siena_sriov_set_vf_spoofchk(struct efx_nic *efx, int vf,
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 60a0c0e9ded2..d105779ba3b2 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1098,7 +1098,7 @@ sis900_init_rxfilter (struct net_device * net_dev)
 
 	/* load MAC addr to filter data register */
 	for (i = 0 ; i < 3 ; i++) {
-		u32 w = (u32) *((u16 *)(net_dev->dev_addr)+i);
+		u32 w = (u32) *((const u16 *)(net_dev->dev_addr)+i);
 
 		sw32(rfcr, i << RFADDR_shift);
 		sw32(rfdr, w);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index fa387510c189..73bcc6f2bb6e 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -1503,7 +1503,7 @@ static int smsc911x_soft_reset(struct smsc911x_data *pdata)
 
 /* Sets the device MAC address to dev_addr, called with mac_lock held */
 static void
-smsc911x_set_hw_mac_address(struct smsc911x_data *pdata, u8 dev_addr[6])
+smsc911x_set_hw_mac_address(struct smsc911x_data *pdata, const u8 dev_addr[6])
 {
 	u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4];
 	u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 3d1176588f7d..d207c0b463ab 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -404,7 +404,7 @@ static const struct ethtool_ops smsc9420_ethtool_ops = {
 static void smsc9420_set_mac_address(struct net_device *dev)
 {
 	struct smsc9420_pdata *pd = netdev_priv(dev);
-	u8 *dev_addr = dev->dev_addr;
+	const u8 *dev_addr = dev->dev_addr;
 	u32 mac_high16 = (dev_addr[5] << 8) | dev_addr[4];
 	u32 mac_low32 = (dev_addr[3] << 24) | (dev_addr[2] << 16) |
 	    (dev_addr[1] << 8) | dev_addr[0];
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index b6d945ea903d..9160f9ed363a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -546,13 +546,13 @@ int dwmac4_setup(struct stmmac_priv *priv);
 int dwxgmac2_setup(struct stmmac_priv *priv);
 int dwxlgmac2_setup(struct stmmac_priv *priv);
 
-void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
+void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
 			 unsigned int high, unsigned int low);
 void stmmac_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
 			 unsigned int high, unsigned int low);
 void stmmac_set_mac(void __iomem *ioaddr, bool enable);
 
-void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
+void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
 				unsigned int high, unsigned int low);
 void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
 				unsigned int high, unsigned int low);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 4422baeed3d8..617d0e4c6495 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -634,7 +634,7 @@ static void sun8i_dwmac_set_mac(void __iomem *ioaddr, bool enable)
  * If addr is NULL, clear the slot
  */
 static void sun8i_dwmac_set_umac_addr(struct mac_device_info *hw,
-				      unsigned char *addr,
+				      const unsigned char *addr,
 				      unsigned int reg_n)
 {
 	void __iomem *ioaddr = hw->pcsr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index fc8759f146c7..76edb9b72675 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -104,7 +104,7 @@ static void dwmac1000_dump_regs(struct mac_device_info *hw, u32 *reg_space)
 }
 
 static void dwmac1000_set_umac_addr(struct mac_device_info *hw,
-				    unsigned char *addr,
+				    const unsigned char *addr,
 				    unsigned int reg_n)
 {
 	void __iomem *ioaddr = hw->pcsr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index ebcad8dd99db..75071a7d551a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -68,7 +68,7 @@ static int dwmac100_irq_status(struct mac_device_info *hw,
 }
 
 static void dwmac100_set_umac_addr(struct mac_device_info *hw,
-				   unsigned char *addr,
+				   const unsigned char *addr,
 				   unsigned int reg_n)
 {
 	void __iomem *ioaddr = hw->pcsr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index b21745368983..fd41db65fe1d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -322,7 +322,7 @@ static void dwmac4_pmt(struct mac_device_info *hw, unsigned long mode)
 }
 
 static void dwmac4_set_umac_addr(struct mac_device_info *hw,
-				 unsigned char *addr, unsigned int reg_n)
+				 const unsigned char *addr, unsigned int reg_n)
 {
 	void __iomem *ioaddr = hw->pcsr;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 9292a1fab7d3..d1c605777985 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -187,7 +187,7 @@ int dwmac4_dma_interrupt(void __iomem *ioaddr,
 	return ret;
 }
 
-void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
+void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
 				unsigned int high, unsigned int low)
 {
 	unsigned long data;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index d1c31200bb91..caa4bfc4c1d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -239,7 +239,7 @@ void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr)
 	do {} while ((readl(ioaddr + DMA_CONTROL) & DMA_CONTROL_FTF));
 }
 
-void stmmac_set_mac_addr(void __iomem *ioaddr, u8 addr[6],
+void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
 			 unsigned int high, unsigned int low)
 {
 	unsigned long data;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index c4d78fa93663..c6c4d7948fe5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -335,7 +335,8 @@ static void dwxgmac2_pmt(struct mac_device_info *hw, unsigned long mode)
 }
 
 static void dwxgmac2_set_umac_addr(struct mac_device_info *hw,
-				   unsigned char *addr, unsigned int reg_n)
+				   const unsigned char *addr,
+				   unsigned int reg_n)
 {
 	void __iomem *ioaddr = hw->pcsr;
 	u32 value;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 6dc1c98ebec8..6cf2c2107197 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -330,7 +330,8 @@ struct stmmac_ops {
 	/* Set power management mode (e.g. magic frame) */
 	void (*pmt)(struct mac_device_info *hw, unsigned long mode);
 	/* Set/Get Unicast MAC addresses */
-	void (*set_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
+	void (*set_umac_addr)(struct mac_device_info *hw,
+			      const unsigned char *addr,
 			      unsigned int reg_n);
 	void (*get_umac_addr)(struct mac_device_info *hw, unsigned char *addr,
 			      unsigned int reg_n);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index e649a3e6a529..be3cb63675a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -36,7 +36,7 @@ struct stmmac_packet_attrs {
 	int vlan_id_in;
 	int vlan_id_out;
 	unsigned char *src;
-	unsigned char *dst;
+	const unsigned char *dst;
 	u32 ip_src;
 	u32 ip_dst;
 	int tcp;
@@ -249,8 +249,8 @@ static int stmmac_test_loopback_validate(struct sk_buff *skb,
 					 struct net_device *orig_ndev)
 {
 	struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+	const unsigned char *dst = tpriv->packet->dst;
 	unsigned char *src = tpriv->packet->src;
-	unsigned char *dst = tpriv->packet->dst;
 	struct stmmachdr *shdr;
 	struct ethhdr *ehdr;
 	struct udphdr *uhdr;
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index c646575e79d5..d70426670c37 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -623,7 +623,7 @@ static int bigmac_init_hw(struct bigmac *bp, bool non_blocking)
 	void __iomem *cregs        = bp->creg;
 	void __iomem *bregs        = bp->bregs;
 	__u32 bblk_dvma = (__u32)bp->bblock_dvma;
-	unsigned char *e = &bp->dev->dev_addr[0];
+	const unsigned char *e = &bp->dev->dev_addr[0];
 
 	/* Latch current counters into statistics. */
 	bigmac_get_counters(bp, bregs);
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index 52b1053a0a77..efe0d33f6024 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -144,7 +144,7 @@ static int qe_init(struct sunqe *qep, int from_irq)
 	void __iomem *cregs = qep->qcregs;
 	void __iomem *mregs = qep->mregs;
 	void __iomem *gregs = qecp->gregs;
-	unsigned char *e = &qep->dev->dev_addr[0];
+	const unsigned char *e = &qep->dev->dev_addr[0];
 	__u32 qblk_dvma = (__u32)qep->qblock_dvma;
 	u32 tmp;
 	int i;
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
index bf6c1c6779ff..76eb7db80f13 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-hw.c
@@ -57,7 +57,7 @@ static int xlgmac_enable_rx_csum(struct xlgmac_pdata *pdata)
 	return 0;
 }
 
-static int xlgmac_set_mac_address(struct xlgmac_pdata *pdata, u8 *addr)
+static int xlgmac_set_mac_address(struct xlgmac_pdata *pdata, const u8 *addr)
 {
 	unsigned int mac_addr_hi, mac_addr_lo;
 
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac.h b/drivers/net/ethernet/synopsys/dwc-xlgmac.h
index 8598aaf3ec99..98e3a271e017 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac.h
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac.h
@@ -410,7 +410,7 @@ struct xlgmac_hw_ops {
 	void (*dev_xmit)(struct xlgmac_channel *channel);
 	int (*dev_read)(struct xlgmac_channel *channel);
 
-	int (*set_mac_address)(struct xlgmac_pdata *pdata, u8 *addr);
+	int (*set_mac_address)(struct xlgmac_pdata *pdata, const u8 *addr);
 	int (*config_rx_mode)(struct xlgmac_pdata *pdata);
 	int (*enable_rx_csum)(struct xlgmac_pdata *pdata);
 	int (*disable_rx_csum)(struct xlgmac_pdata *pdata);
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 77c448ad67ce..eab7d78d7c72 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -184,7 +184,7 @@ static void	tlan_print_list(struct tlan_list *, char *, int);
 static void	tlan_read_and_clear_stats(struct net_device *, int);
 static void	tlan_reset_adapter(struct net_device *);
 static void	tlan_finish_reset(struct net_device *);
-static void	tlan_set_mac(struct net_device *, int areg, char *mac);
+static void	tlan_set_mac(struct net_device *, int areg, const char *mac);
 
 static void	__tlan_phy_print(struct net_device *);
 static void	tlan_phy_print(struct net_device *);
@@ -2346,7 +2346,7 @@ tlan_finish_reset(struct net_device *dev)
  *
  **************************************************************/
 
-static void tlan_set_mac(struct net_device *dev, int areg, char *mac)
+static void tlan_set_mac(struct net_device *dev, int areg, const char *mac)
 {
 	int i;
 
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 93453e5713b2..f8b9d10dc056 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -1859,7 +1859,8 @@ static struct net_device_stats *tc35815_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-static void tc35815_set_cam_entry(struct net_device *dev, int index, unsigned char *addr)
+static void tc35815_set_cam_entry(struct net_device *dev, int index,
+				  const unsigned char *addr)
 {
 	struct tc35815_local *lp = netdev_priv(dev);
 	struct tc35815_regs __iomem *tr =
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index b95aee8607a4..0815de581c7f 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -206,12 +206,13 @@ static void xemaclite_disable_interrupts(struct net_local *drvdata)
  * This function writes data from a 16-bit aligned buffer to a 32-bit aligned
  * address in the EmacLite device.
  */
-static void xemaclite_aligned_write(void *src_ptr, u32 *dest_ptr,
+static void xemaclite_aligned_write(const void *src_ptr, u32 *dest_ptr,
 				    unsigned length)
 {
+	const u16 *from_u16_ptr;
 	u32 align_buffer;
 	u32 *to_u32_ptr;
-	u16 *from_u16_ptr, *to_u16_ptr;
+	u16 *to_u16_ptr;
 
 	to_u32_ptr = dest_ptr;
 	from_u16_ptr = src_ptr;
@@ -470,7 +471,7 @@ static u16 xemaclite_recv_data(struct net_local *drvdata, u8 *data, int maxlen)
  * buffers (if configured).
  */
 static void xemaclite_update_address(struct net_local *drvdata,
-				     u8 *address_ptr)
+				     const u8 *address_ptr)
 {
 	void __iomem *addr;
 	u32 reg_data;
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index ae611e46da6a..ab513dcc3b22 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -1271,7 +1271,7 @@ struct set_address_info {
 	unsigned int ioaddr;
 };
 
-static void set_address(struct set_address_info *sa_info, char *addr)
+static void set_address(struct set_address_info *sa_info, const char *addr)
 {
 	unsigned int ioaddr = sa_info->ioaddr;
 	int i;
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 6e32da28e138..ebfeeb3c67c1 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -273,12 +273,12 @@ static int vsc85xx_downshift_set(struct phy_device *phydev, u8 count)
 static int vsc85xx_wol_set(struct phy_device *phydev,
 			   struct ethtool_wolinfo *wol)
 {
+	const u8 *mac_addr = phydev->attached_dev->dev_addr;
 	int rc;
 	u16 reg_val;
 	u8  i;
 	u16 pwd[3] = {0, 0, 0};
 	struct ethtool_wolinfo *wol_conf = wol;
-	u8 *mac_addr = phydev->attached_dev->dev_addr;
 
 	mutex_lock(&phydev->lock);
 	rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2);
diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c
index 981ac1c33780..ea06d10e1c21 100644
--- a/drivers/net/usb/aqc111.c
+++ b/drivers/net/usb/aqc111.c
@@ -119,7 +119,7 @@ static int aqc111_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value,
 }
 
 static int aqc111_write_cmd(struct usbnet *dev, u8 cmd, u16 value,
-			    u16 index, u16 size, void *data)
+			    u16 index, u16 size, const void *data)
 {
 	int ret;
 
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 5ed59d9dd631..ea8aa8c33241 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -209,7 +209,7 @@ static int __ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 }
 
 static int __ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-			       u16 size, void *data, int in_pm)
+			       u16 size, const void *data, int in_pm)
 {
 	int ret;
 	int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
@@ -272,7 +272,7 @@ static int ax88179_read_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value,
 }
 
 static int ax88179_write_cmd_nopm(struct usbnet *dev, u8 cmd, u16 value,
-				  u16 index, u16 size, void *data)
+				  u16 index, u16 size, const void *data)
 {
 	int ret;
 
@@ -313,7 +313,7 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
 }
 
 static int ax88179_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index,
-			     u16 size, void *data)
+			     u16 size, const void *data)
 {
 	int ret;
 
@@ -463,7 +463,7 @@ static int ax88179_auto_detach(struct usbnet *dev, int in_pm)
 	u16 tmp16;
 	u8 tmp8;
 	int (*fnr)(struct usbnet *, u8, u16, u16, u16, void *);
-	int (*fnw)(struct usbnet *, u8, u16, u16, u16, void *);
+	int (*fnw)(struct usbnet *, u8, u16, u16, u16, const void *);
 
 	if (!in_pm) {
 		fnr = ax88179_read_cmd;
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 97ba67042d12..24db5768a3c0 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -615,7 +615,7 @@ static void catc_stats_timer(struct timer_list *t)
  * Receive modes. Broadcast, Multicast, Promisc.
  */
 
-static void catc_multicast(unsigned char *addr, u8 *multicast)
+static void catc_multicast(const unsigned char *addr, u8 *multicast)
 {
 	u32 crc;
 
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index dcdb46314685..48d7d278631e 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -93,7 +93,8 @@ static int dm_write_reg(struct usbnet *dev, u8 reg, u8 value)
 				value, reg, NULL, 0);
 }
 
-static void dm_write_async(struct usbnet *dev, u8 reg, u16 length, void *data)
+static void dm_write_async(struct usbnet *dev, u8 reg, u16 length,
+			   const void *data)
 {
 	usbnet_write_cmd_async(dev, DM_WRITE_REGS,
 			       USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index cead742da381..5f42db26d200 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -132,7 +132,8 @@ static int mcs7830_hif_get_mac_address(struct usbnet *dev, unsigned char *addr)
 	return 0;
 }
 
-static int mcs7830_hif_set_mac_address(struct usbnet *dev, unsigned char *addr)
+static int mcs7830_hif_set_mac_address(struct usbnet *dev,
+				       const unsigned char *addr)
 {
 	int ret = mcs7830_set_reg(dev, HIF_REG_ETHERNET_ADDR, ETH_ALEN, addr);
 
diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 068f197f1786..15209de1849e 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -56,7 +56,8 @@ static int sr_write_reg(struct usbnet *dev, u8 reg, u8 value)
 				value, reg, NULL, 0);
 }
 
-static void sr_write_async(struct usbnet *dev, u8 reg, u16 length, void *data)
+static void sr_write_async(struct usbnet *dev, u8 reg, u16 length,
+			   const void *data)
 {
 	usbnet_write_cmd_async(dev, SR_WR_REGS, SR_REQ_WR_REG,
 			       0, reg, data, length);
diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h
index 4df0bf0a0864..e1bf3219b4e6 100644
--- a/include/linux/qed/qed_eth_if.h
+++ b/include/linux/qed/qed_eth_if.h
@@ -331,7 +331,7 @@ struct qed_eth_ops {
 	int (*configure_arfs_searcher)(struct qed_dev *cdev,
 				       enum qed_filter_config_mode mode);
 	int (*get_coalesce)(struct qed_dev *cdev, u16 *coal, void *handle);
-	int (*req_bulletin_update_mac)(struct qed_dev *cdev, u8 *mac);
+	int (*req_bulletin_update_mac)(struct qed_dev *cdev, const u8 *mac);
 };
 
 const struct qed_eth_ops *qed_get_eth_ops(void);
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index ad220d5da18f..0dae7fcc5ef2 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1113,7 +1113,7 @@ struct qed_common_ops {
  *
  * Return: Int.
  */
-	int (*update_mac)(struct qed_dev *cdev, u8 *mac);
+	int (*update_mac)(struct qed_dev *cdev, const u8 *mac);
 
 /**
  * update_mtu(): API to inform the change in the mtu.
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index aeb242cefebf..3b76c07fbcf8 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -662,7 +662,8 @@ struct qed_rdma_ops {
 			     u8 connection_handle,
 			     struct qed_ll2_stats *p_stats);
 	int (*ll2_set_mac_filter)(struct qed_dev *cdev,
-				  u8 *old_mac_address, u8 *new_mac_address);
+				  u8 *old_mac_address,
+				  const u8 *new_mac_address);
 
 	int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset);
 
-- 
cgit v1.2.3


From 54f2d8d6ca99a3e24efc9c8b055bbcbe0b583227 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Oct 2021 13:44:30 -0700
Subject: ethernet: make eth_hw_addr_random() use dev_addr_set()

Commit 406f42fa0d3c ("net-next: When a bond have a massive amount
of VLANs...") introduced a rbtree for faster Ethernet address look
up. To maintain netdev->dev_addr in this tree we need to make all
the writes to it got through appropriate helpers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/etherdevice.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 3cf546d2ffd1..76f7ff684cbf 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -269,8 +269,11 @@ static inline void eth_zero_addr(u8 *addr)
  */
 static inline void eth_hw_addr_random(struct net_device *dev)
 {
+	u8 addr[ETH_ALEN];
+
+	eth_random_addr(addr);
+	__dev_addr_set(dev, addr, ETH_ALEN);
 	dev->addr_assign_type = NET_ADDR_RANDOM;
-	eth_random_addr(dev->dev_addr);
 }
 
 /**
-- 
cgit v1.2.3


From ba530fea8ca1b57ee71d4e62f287a5d7ed92f789 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 Oct 2021 13:54:50 -0700
Subject: ethernet: remove random_ether_addr()

random_ether_addr() was the original name of the helper which
was kept for backward compatibility (?) after the rename in
commit 0a4dd594982a ("etherdevice: Rename random_ether_addr
to eth_random_addr").

We have a single random_ether_addr() caller left in tree
while there are 70 callers of eth_random_addr().
Time to drop this define.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20211013205450.328092-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 2 +-
 include/linux/etherdevice.h              | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 6904bfaa5777..c092cb61416a 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -1918,7 +1918,7 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 							port->port_id,
 							port->slave.mac_addr);
 			if (!is_valid_ether_addr(port->slave.mac_addr)) {
-				random_ether_addr(port->slave.mac_addr);
+				eth_random_addr(port->slave.mac_addr);
 				dev_err(dev, "Use random MAC address\n");
 			}
 		}
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 76f7ff684cbf..23681c3d3b8a 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -234,8 +234,6 @@ static inline void eth_random_addr(u8 *addr)
 	addr[0] |= 0x02;	/* set local assignment bit (IEEE802) */
 }
 
-#define random_ether_addr(addr) eth_random_addr(addr)
-
 /**
  * eth_broadcast_addr - Assign broadcast address
  * @addr: Pointer to a six-byte array containing the Ethernet address
-- 
cgit v1.2.3


From 7463acfbe52ae8b7e0ea6890c1886b3f8ba8bddd Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 8 Oct 2021 22:06:01 +0200
Subject: netfilter: Rename ingress hook include file

Prepare for addition of a netfilter egress hook by renaming
<linux/netfilter_ingress.h> to <linux/netfilter_netdev.h>.

The egress hook also necessitates a refactoring of the include file,
but that is done in a separate commit to ease reviewing.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ingress.h | 58 ---------------------------------------
 include/linux/netfilter_netdev.h  | 58 +++++++++++++++++++++++++++++++++++++++
 net/core/dev.c                    |  2 +-
 3 files changed, 59 insertions(+), 59 deletions(-)
 delete mode 100644 include/linux/netfilter_ingress.h
 create mode 100644 include/linux/netfilter_netdev.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
deleted file mode 100644
index a13774be2eb5..000000000000
--- a/include/linux/netfilter_ingress.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NETFILTER_INGRESS_H_
-#define _NETFILTER_INGRESS_H_
-
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-
-#ifdef CONFIG_NETFILTER_INGRESS
-static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
-{
-#ifdef CONFIG_JUMP_LABEL
-	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
-		return false;
-#endif
-	return rcu_access_pointer(skb->dev->nf_hooks_ingress);
-}
-
-/* caller must hold rcu_read_lock */
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-	struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
-	struct nf_hook_state state;
-	int ret;
-
-	/* Must recheck the ingress hook head, in the event it became NULL
-	 * after the check in nf_hook_ingress_active evaluated to true.
-	 */
-	if (unlikely(!e))
-		return 0;
-
-	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
-			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
-			   dev_net(skb->dev), NULL);
-	ret = nf_hook_slow(skb, &state, e, 0);
-	if (ret == 0)
-		return -1;
-
-	return ret;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev)
-{
-	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
-}
-#else /* CONFIG_NETFILTER_INGRESS */
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
-{
-	return 0;
-}
-
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-	return 0;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev) {}
-#endif /* CONFIG_NETFILTER_INGRESS */
-#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
new file mode 100644
index 000000000000..a13774be2eb5
--- /dev/null
+++ b/include/linux/netfilter_netdev.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NETFILTER_INGRESS_H_
+#define _NETFILTER_INGRESS_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
+{
+#ifdef CONFIG_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+		return false;
+#endif
+	return rcu_access_pointer(skb->dev->nf_hooks_ingress);
+}
+
+/* caller must hold rcu_read_lock */
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+	struct nf_hook_state state;
+	int ret;
+
+	/* Must recheck the ingress hook head, in the event it became NULL
+	 * after the check in nf_hook_ingress_active evaluated to true.
+	 */
+	if (unlikely(!e))
+		return 0;
+
+	nf_hook_state_init(&state, NF_NETDEV_INGRESS,
+			   NFPROTO_NETDEV, skb->dev, NULL, NULL,
+			   dev_net(skb->dev), NULL);
+	ret = nf_hook_slow(skb, &state, e, 0);
+	if (ret == 0)
+		return -1;
+
+	return ret;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev)
+{
+	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
+}
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev) {}
+#endif /* CONFIG_NETFILTER_INGRESS */
+#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 16ab09b6a7f8..0fd3c6490e06 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -140,7 +140,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
-#include <linux/netfilter_ingress.h>
+#include <linux/netfilter_netdev.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
 #include <net/udp_tunnel.h>
-- 
cgit v1.2.3


From 17d20784223d52bf1671f984c9e8d5d9b8ea171b Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 8 Oct 2021 22:06:02 +0200
Subject: netfilter: Generalize ingress hook include file

Prepare for addition of a netfilter egress hook by generalizing the
ingress hook include file.

No functional change intended.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_netdev.h | 20 +++++++++++---------
 net/core/dev.c                   |  2 +-
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
index a13774be2eb5..5812b0fb0278 100644
--- a/include/linux/netfilter_netdev.h
+++ b/include/linux/netfilter_netdev.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NETFILTER_INGRESS_H_
-#define _NETFILTER_INGRESS_H_
+#ifndef _NETFILTER_NETDEV_H_
+#define _NETFILTER_NETDEV_H_
 
 #include <linux/netfilter.h>
 #include <linux/netdevice.h>
@@ -38,10 +38,6 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 	return ret;
 }
 
-static inline void nf_hook_ingress_init(struct net_device *dev)
-{
-	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
-}
 #else /* CONFIG_NETFILTER_INGRESS */
 static inline int nf_hook_ingress_active(struct sk_buff *skb)
 {
@@ -52,7 +48,13 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 {
 	return 0;
 }
-
-static inline void nf_hook_ingress_init(struct net_device *dev) {}
 #endif /* CONFIG_NETFILTER_INGRESS */
-#endif /* _NETFILTER_INGRESS_H_ */
+
+static inline void nf_hook_netdev_init(struct net_device *dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
+#endif
+}
+
+#endif /* _NETFILTER_NETDEV_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0fd3c6490e06..e4c683029c61 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10867,7 +10867,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
 
-	nf_hook_ingress_init(dev);
+	nf_hook_netdev_init(dev);
 
 	return dev;
 
-- 
cgit v1.2.3


From 42df6e1d221dddc0f2acf2be37e68d553ad65f96 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Fri, 8 Oct 2021 22:06:03 +0200
Subject: netfilter: Introduce egress hook
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Support classifying packets with netfilter on egress to satisfy user
requirements such as:
* outbound security policies for containers (Laura)
* filtering and mangling intra-node Direct Server Return (DSR) traffic
  on a load balancer (Laura)
* filtering locally generated traffic coming in through AF_PACKET,
  such as local ARP traffic generated for clustering purposes or DHCP
  (Laura; the AF_PACKET plumbing is contained in a follow-up commit)
* L2 filtering from ingress and egress for AVB (Audio Video Bridging)
  and gPTP with nftables (Pablo)
* in the future: in-kernel NAT64/NAT46 (Pablo)

The egress hook introduced herein complements the ingress hook added by
commit e687ad60af09 ("netfilter: add netfilter ingress hook after
handle_ing() under unique static key").  A patch for nftables to hook up
egress rules from user space has been submitted separately, so users may
immediately take advantage of the feature.

Alternatively or in addition to netfilter, packets can be classified
with traffic control (tc).  On ingress, packets are classified first by
tc, then by netfilter.  On egress, the order is reversed for symmetry.
Conceptually, tc and netfilter can be thought of as layers, with
netfilter layered above tc.

Traffic control is capable of redirecting packets to another interface
(man 8 tc-mirred).  E.g., an ingress packet may be redirected from the
host namespace to a container via a veth connection:
tc ingress (host) -> tc egress (veth host) -> tc ingress (veth container)

In this case, netfilter egress classifying is not performed when leaving
the host namespace!  That's because the packet is still on the tc layer.
If tc redirects the packet to a physical interface in the host namespace
such that it leaves the system, the packet is never subjected to
netfilter egress classifying.  That is only logical since it hasn't
passed through netfilter ingress classifying either.

Packets can alternatively be redirected at the netfilter layer using
nft fwd.  Such a packet *is* subjected to netfilter egress classifying
since it has reached the netfilter layer.

Internally, the skb->nf_skip_egress flag controls whether netfilter is
invoked on egress by __dev_queue_xmit().  Because __dev_queue_xmit() may
be called recursively by tunnel drivers such as vxlan, the flag is
reverted to false after sch_handle_egress().  This ensures that
netfilter is applied both on the overlay and underlying network.

Interaction between tc and netfilter is possible by setting and querying
skb->mark.

If netfilter egress classifying is not enabled on any interface, it is
patched out of the data path by way of a static_key and doesn't make a
performance difference that is discernible from noise:

Before:             1537 1538 1538 1537 1538 1537 Mb/sec
After:              1536 1534 1539 1539 1539 1540 Mb/sec
Before + tc accept: 1418 1418 1418 1419 1419 1418 Mb/sec
After  + tc accept: 1419 1424 1418 1419 1422 1420 Mb/sec
Before + tc drop:   1620 1619 1619 1619 1620 1620 Mb/sec
After  + tc drop:   1616 1624 1625 1624 1622 1619 Mb/sec

When netfilter egress classifying is enabled on at least one interface,
a minimal performance penalty is incurred for every egress packet, even
if the interface it's transmitted over doesn't have any netfilter egress
rules configured.  That is caused by checking dev->nf_hooks_egress
against NULL.

Measurements were performed on a Core i7-3615QM.  Commands to reproduce:
ip link add dev foo type dummy
ip link set dev foo up
modprobe pktgen
echo "add_device foo" > /proc/net/pktgen/kpktgend_3
samples/pktgen/pktgen_bench_xmit_mode_queue_xmit.sh -i foo -n 400000000 -m "11:11:11:11:11:11" -d 1.1.1.1

Accept all traffic with tc:
tc qdisc add dev foo clsact
tc filter add dev foo egress bpf da bytecode '1,6 0 0 0,'

Drop all traffic with tc:
tc qdisc add dev foo clsact
tc filter add dev foo egress bpf da bytecode '1,6 0 0 2,'

Apply this patch when measuring packet drops to avoid errors in dmesg:
https://lore.kernel.org/netdev/a73dda33-57f4-95d8-ea51-ed483abd6a7a@iogearbox.net/

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Cc: Laura García Liébana <nevola@gmail.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 drivers/net/ifb.c                |  3 ++
 include/linux/netdevice.h        |  4 ++
 include/linux/netfilter_netdev.h | 86 ++++++++++++++++++++++++++++++++++++++++
 include/linux/skbuff.h           |  4 ++
 include/uapi/linux/netfilter.h   |  1 +
 net/core/dev.c                   | 15 ++++++-
 net/netfilter/Kconfig            | 11 +++++
 net/netfilter/core.c             | 34 ++++++++++++++--
 net/netfilter/nfnetlink_hook.c   | 16 ++++++--
 net/netfilter/nft_chain_filter.c |  4 +-
 10 files changed, 168 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index e9258a9f3702..2c319dd27f29 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/netfilter_netdev.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
 
@@ -75,8 +76,10 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
 	}
 
 	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
+		/* Skip tc and netfilter to prevent redirection loop. */
 		skb->redirected = 0;
 		skb->tc_skip_classify = 1;
+		nf_skip_egress(skb, true);
 
 		u64_stats_update_begin(&txp->tsync);
 		txp->tx_packets++;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d79163208dfd..e9a48068f306 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type {
  *	@xps_maps:	XXX: need comments on this one
  *	@miniq_egress:		clsact qdisc specific data for
  *				egress processing
+ *	@nf_hooks_egress:	netfilter hooks executed for egress packets
  *	@qdisc_hash:		qdisc hash table
  *	@watchdog_timeo:	Represents the timeout that is used by
  *				the watchdog (see dev_watchdog())
@@ -2161,6 +2162,9 @@ struct net_device {
 #ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc __rcu	*miniq_egress;
 #endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
 
 #ifdef CONFIG_NET_SCHED
 	DECLARE_HASHTABLE	(qdisc_hash, 4);
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
index 5812b0fb0278..b71b57a83bb4 100644
--- a/include/linux/netfilter_netdev.h
+++ b/include/linux/netfilter_netdev.h
@@ -50,11 +50,97 @@ static inline int nf_hook_ingress(struct sk_buff *skb)
 }
 #endif /* CONFIG_NETFILTER_INGRESS */
 
+#ifdef CONFIG_NETFILTER_EGRESS
+static inline bool nf_hook_egress_active(void)
+{
+#ifdef CONFIG_JUMP_LABEL
+	if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS]))
+		return false;
+#endif
+	return true;
+}
+
+/**
+ * nf_hook_egress - classify packets before transmission
+ * @skb: packet to be classified
+ * @rc: result code which shall be returned by __dev_queue_xmit() on failure
+ * @dev: netdev whose egress hooks shall be applied to @skb
+ *
+ * Returns @skb on success or %NULL if the packet was consumed or filtered.
+ * Caller must hold rcu_read_lock.
+ *
+ * On ingress, packets are classified first by tc, then by netfilter.
+ * On egress, the order is reversed for symmetry.  Conceptually, tc and
+ * netfilter can be thought of as layers, with netfilter layered above tc:
+ * When tc redirects a packet to another interface, netfilter is not applied
+ * because the packet is on the tc layer.
+ *
+ * The nf_skip_egress flag controls whether netfilter is applied on egress.
+ * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the
+ * packet passes through tc and netfilter.  Because __dev_queue_xmit() may be
+ * called recursively by tunnel drivers such as vxlan, the flag is reverted to
+ * false after sch_handle_egress().  This ensures that netfilter is applied
+ * both on the overlay and underlying network.
+ */
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+					     struct net_device *dev)
+{
+	struct nf_hook_entries *e;
+	struct nf_hook_state state;
+	int ret;
+
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+	if (skb->nf_skip_egress)
+		return skb;
+#endif
+
+	e = rcu_dereference(dev->nf_hooks_egress);
+	if (!e)
+		return skb;
+
+	nf_hook_state_init(&state, NF_NETDEV_EGRESS,
+			   NFPROTO_NETDEV, dev, NULL, NULL,
+			   dev_net(dev), NULL);
+	ret = nf_hook_slow(skb, &state, e, 0);
+
+	if (ret == 1) {
+		return skb;
+	} else if (ret < 0) {
+		*rc = NET_XMIT_DROP;
+		return NULL;
+	} else { /* ret == 0 */
+		*rc = NET_XMIT_SUCCESS;
+		return NULL;
+	}
+}
+#else /* CONFIG_NETFILTER_EGRESS */
+static inline bool nf_hook_egress_active(void)
+{
+	return false;
+}
+
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+					     struct net_device *dev)
+{
+	return skb;
+}
+#endif /* CONFIG_NETFILTER_EGRESS */
+
+static inline void nf_skip_egress(struct sk_buff *skb, bool skip)
+{
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+	skb->nf_skip_egress = skip;
+#endif
+}
+
 static inline void nf_hook_netdev_init(struct net_device *dev)
 {
 #ifdef CONFIG_NETFILTER_INGRESS
 	RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
 #endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
+#endif
 }
 
 #endif /* _NETFILTER_NETDEV_H_ */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 841e2f0f5240..cb96f1e6460c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_at_ingress: used within tc_classify to distinguish in/egress
  *	@redirected: packet was redirected by packet classifier
  *	@from_ingress: packet was redirected from the ingress path
+ *	@nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
  *	@peeked: this packet has been seen already, so stats have been
  *		done for it, don't do them again
  *	@nf_trace: netfilter packet trace flag
@@ -868,6 +869,9 @@ struct sk_buff {
 #ifdef CONFIG_NET_REDIRECT
 	__u8			from_ingress:1;
 #endif
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+	__u8			nf_skip_egress:1;
+#endif
 #ifdef CONFIG_TLS_DEVICE
 	__u8			decrypted:1;
 #endif
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ef9a44286e23..53411ccc69db 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -51,6 +51,7 @@ enum nf_inet_hooks {
 
 enum nf_dev_hooks {
 	NF_NETDEV_INGRESS,
+	NF_NETDEV_EGRESS,
 	NF_NETDEV_NUMHOOKS
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e4c683029c61..09d74798b440 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3920,6 +3920,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
 static struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
+#ifdef CONFIG_NET_CLS_ACT
 	struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
 	struct tcf_result cl_res;
 
@@ -3955,6 +3956,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 	default:
 		break;
 	}
+#endif /* CONFIG_NET_CLS_ACT */
 
 	return skb;
 }
@@ -4148,13 +4150,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	qdisc_pkt_len_init(skb);
 #ifdef CONFIG_NET_CLS_ACT
 	skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
 	if (static_branch_unlikely(&egress_needed_key)) {
+		if (nf_hook_egress_active()) {
+			skb = nf_hook_egress(skb, &rc, dev);
+			if (!skb)
+				goto out;
+		}
+		nf_skip_egress(skb, true);
 		skb = sch_handle_egress(skb, &rc, dev);
 		if (!skb)
 			goto out;
+		nf_skip_egress(skb, false);
 	}
-# endif
 #endif
 	/* If device/qdisc don't need skb->dst, release it right now while
 	 * its hot in this cpu cache.
@@ -5296,6 +5305,7 @@ skip_taps:
 	if (static_branch_unlikely(&ingress_needed_key)) {
 		bool another = false;
 
+		nf_skip_egress(skb, true);
 		skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
 					 &another);
 		if (another)
@@ -5303,6 +5313,7 @@ skip_taps:
 		if (!skb)
 			goto out;
 
+		nf_skip_egress(skb, false);
 		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
 			goto out;
 	}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54395266339d..49c9fae9c62c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -10,6 +10,17 @@ config NETFILTER_INGRESS
 	  This allows you to classify packets from ingress using the Netfilter
 	  infrastructure.
 
+config NETFILTER_EGRESS
+	bool "Netfilter egress support"
+	default y
+	select NET_EGRESS
+	help
+	  This allows you to classify packets before transmission using the
+	  Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+	def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
 config NETFILTER_NETLINK
 	tristate
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 63d032191e62..3a32a813fcde 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -316,6 +316,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
 		if (dev && dev_net(dev) == net)
 			return &dev->nf_hooks_ingress;
 	}
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	if (hooknum == NF_NETDEV_EGRESS) {
+		if (dev && dev_net(dev) == net)
+			return &dev->nf_hooks_egress;
+	}
 #endif
 	WARN_ON_ONCE(1);
 	return NULL;
@@ -344,6 +350,11 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
 	return false;
 }
 
+static inline bool nf_egress_hook(const struct nf_hook_ops *reg, int pf)
+{
+	return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
 static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
 {
 #ifdef CONFIG_JUMP_LABEL
@@ -383,9 +394,18 @@ static int __nf_register_net_hook(struct net *net, int pf,
 
 	switch (pf) {
 	case NFPROTO_NETDEV:
-		err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
-		if (err < 0)
-			return err;
+#ifndef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS)
+			return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+		if (reg->hooknum == NF_NETDEV_EGRESS)
+			return -EOPNOTSUPP;
+#endif
+		if ((reg->hooknum != NF_NETDEV_INGRESS &&
+		     reg->hooknum != NF_NETDEV_EGRESS) ||
+		    !reg->dev || dev_net(reg->dev) != net)
+			return -EINVAL;
 		break;
 	case NFPROTO_INET:
 		if (reg->hooknum != NF_INET_INGRESS)
@@ -417,6 +437,10 @@ static int __nf_register_net_hook(struct net *net, int pf,
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (nf_ingress_hook(reg, pf))
 		net_inc_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+	if (nf_egress_hook(reg, pf))
+		net_inc_egress_queue();
 #endif
 	nf_static_key_inc(reg, pf);
 
@@ -474,6 +498,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
 #ifdef CONFIG_NETFILTER_INGRESS
 		if (nf_ingress_hook(reg, pf))
 			net_dec_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+		if (nf_egress_hook(reg, pf))
+			net_dec_egress_queue();
 #endif
 		nf_static_key_dec(reg, pf);
 	} else {
diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c
index f554e2ea32ee..d5c719c9e36c 100644
--- a/net/netfilter/nfnetlink_hook.c
+++ b/net/netfilter/nfnetlink_hook.c
@@ -185,7 +185,7 @@ static const struct nf_hook_entries *
 nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
 {
 	const struct nf_hook_entries *hook_head = NULL;
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
 	struct net_device *netdev;
 #endif
 
@@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
 		hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
 		break;
 #endif
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
 	case NFPROTO_NETDEV:
-		if (hook != NF_NETDEV_INGRESS)
+		if (hook >= NF_NETDEV_NUMHOOKS)
 			return ERR_PTR(-EOPNOTSUPP);
 
 		if (!dev)
@@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
 		if (!netdev)
 			return ERR_PTR(-ENODEV);
 
-		return rcu_dereference(netdev->nf_hooks_ingress);
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (hook == NF_NETDEV_INGRESS)
+			return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+		if (hook == NF_NETDEV_EGRESS)
+			return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+		fallthrough;
 #endif
 	default:
 		return ERR_PTR(-EPROTONOSUPPORT);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 5b02408a920b..680fe557686e 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
 	.name		= "filter",
 	.type		= NFT_CHAIN_T_DEFAULT,
 	.family		= NFPROTO_NETDEV,
-	.hook_mask	= (1 << NF_NETDEV_INGRESS),
+	.hook_mask	= (1 << NF_NETDEV_INGRESS) |
+			  (1 << NF_NETDEV_EGRESS),
 	.hooks		= {
 		[NF_NETDEV_INGRESS]	= nft_do_chain_netdev,
+		[NF_NETDEV_EGRESS]	= nft_do_chain_netdev,
 	},
 };
 
-- 
cgit v1.2.3


From 8844e01062ddd8196c4550df9803cc1835d123c2 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Oct 2021 17:15:10 +0200
Subject: netfilter: iptables: allow use of ipt_do_table as hookfn

This is possible now that the xt_table structure is passed in via *priv.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv4/ip_tables.h |  6 +++---
 net/ipv4/netfilter/ip_tables.c           |  7 ++++---
 net/ipv4/netfilter/iptable_filter.c      |  9 +--------
 net/ipv4/netfilter/iptable_mangle.c      |  8 ++++----
 net/ipv4/netfilter/iptable_nat.c         | 15 ++++-----------
 net/ipv4/netfilter/iptable_raw.c         | 10 +---------
 net/ipv4/netfilter/iptable_security.c    |  9 +--------
 7 files changed, 18 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 8d09bfe850dc..132b0e4a6d4d 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -63,9 +63,9 @@ struct ipt_error {
 }
 
 extern void *ipt_alloc_initial_table(const struct xt_table *);
-extern unsigned int ipt_do_table(struct sk_buff *skb,
-				 const struct nf_hook_state *state,
-				 struct xt_table *table);
+extern unsigned int ipt_do_table(void *priv,
+				 struct sk_buff *skb,
+				 const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 13acb687c19a..2ed7c58b471a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -222,10 +222,11 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ipt_do_table(struct sk_buff *skb,
-	     const struct nf_hook_state *state,
-	     struct xt_table *table)
+ipt_do_table(void *priv,
+	     struct sk_buff *skb,
+	     const struct nf_hook_state *state)
 {
+	const struct xt_table *table = priv;
 	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 0eb0e2ab9bfc..b9062f4552ac 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -28,13 +28,6 @@ static const struct xt_table packet_filter = {
 	.priority	= NF_IP_PRI_FILTER,
 };
 
-static unsigned int
-iptable_filter_hook(void *priv, struct sk_buff *skb,
-		    const struct nf_hook_state *state)
-{
-	return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
@@ -90,7 +83,7 @@ static int __init iptable_filter_init(void)
 	if (ret < 0)
 		return ret;
 
-	filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+	filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
 	if (IS_ERR(filter_ops)) {
 		xt_unregister_template(&packet_filter);
 		return PTR_ERR(filter_ops);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 40417a3f930b..3abb430af9e6 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -34,7 +34,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	const struct iphdr *iph;
@@ -50,7 +50,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pri
 	daddr = iph->daddr;
 	tos = iph->tos;
 
-	ret = ipt_do_table(skb, state, priv);
+	ret = ipt_do_table(priv, skb, state);
 	/* Reroute for ANY change. */
 	if (ret != NF_DROP && ret != NF_STOLEN) {
 		iph = ip_hdr(skb);
@@ -75,8 +75,8 @@ iptable_mangle_hook(void *priv,
 		     const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
-		return ipt_mangle_out(skb, state, priv);
-	return ipt_do_table(skb, state, priv);
+		return ipt_mangle_out(priv, skb, state);
+	return ipt_do_table(priv, skb, state);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 45d7e072e6a5..56f6ecc43451 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -29,34 +29,27 @@ static const struct xt_table nf_nat_ipv4_table = {
 	.af		= NFPROTO_IPV4,
 };
 
-static unsigned int iptable_nat_do_chain(void *priv,
-					 struct sk_buff *skb,
-					 const struct nf_hook_state *state)
-{
-	return ipt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
 	{
-		.hook		= iptable_nat_do_chain,
+		.hook		= ipt_do_table,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP_PRI_NAT_DST,
 	},
 	{
-		.hook		= iptable_nat_do_chain,
+		.hook		= ipt_do_table,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP_PRI_NAT_SRC,
 	},
 	{
-		.hook		= iptable_nat_do_chain,
+		.hook		= ipt_do_table,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP_PRI_NAT_DST,
 	},
 	{
-		.hook		= iptable_nat_do_chain,
+		.hook		= ipt_do_table,
 		.pf		= NFPROTO_IPV4,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 8265c6765705..ca5e5b21587c 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -32,14 +32,6 @@ static const struct xt_table packet_raw_before_defrag = {
 	.priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-iptable_raw_hook(void *priv, struct sk_buff *skb,
-		 const struct nf_hook_state *state)
-{
-	return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int iptable_raw_table_init(struct net *net)
@@ -90,7 +82,7 @@ static int __init iptable_raw_init(void)
 	if (ret < 0)
 		return ret;
 
-	rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
+	rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
 	if (IS_ERR(rawtable_ops)) {
 		xt_unregister_template(table);
 		return PTR_ERR(rawtable_ops);
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index f519162a2fa5..d885443cb267 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -33,13 +33,6 @@ static const struct xt_table security_table = {
 	.priority	= NF_IP_PRI_SECURITY,
 };
 
-static unsigned int
-iptable_security_hook(void *priv, struct sk_buff *skb,
-		      const struct nf_hook_state *state)
-{
-	return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int iptable_security_table_init(struct net *net)
@@ -78,7 +71,7 @@ static int __init iptable_security_init(void)
 	if (ret < 0)
 		return ret;
 
-	sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+	sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
 	if (IS_ERR(sectbl_ops)) {
 		xt_unregister_template(&security_table);
 		return PTR_ERR(sectbl_ops);
-- 
cgit v1.2.3


From e8d225b6002673366abc2e40e30c991bdc8d62ca Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Oct 2021 17:15:12 +0200
Subject: netfilter: arp_tables: allow use of arpt_do_table as hookfn

This is possible now that the xt_table structure is passed in via *priv.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_arp/arp_tables.h |  5 ++---
 net/ipv4/netfilter/arp_tables.c          |  7 ++++---
 net/ipv4/netfilter/arptable_filter.c     | 10 +---------
 3 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 4f9a4b3c5892..a40aaf645fa4 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table,
 			const struct nf_hook_ops *ops);
 void arpt_unregister_table(struct net *net, const char *name);
 void arpt_unregister_table_pre_exit(struct net *net, const char *name);
-extern unsigned int arpt_do_table(struct sk_buff *skb,
-				  const struct nf_hook_state *state,
-				  struct xt_table *table);
+extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb,
+				  const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c53f14b94356..ffc0cab7cf18 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -179,10 +179,11 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
 	return (void *)entry + entry->next_offset;
 }
 
-unsigned int arpt_do_table(struct sk_buff *skb,
-			   const struct nf_hook_state *state,
-			   struct xt_table *table)
+unsigned int arpt_do_table(void *priv,
+			   struct sk_buff *skb,
+			   const struct nf_hook_state *state)
 {
+	const struct xt_table *table = priv;
 	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 3de78416ec76..78cd5ee24448 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -26,14 +26,6 @@ static const struct xt_table packet_filter = {
 	.priority	= NF_IP_PRI_FILTER,
 };
 
-/* The work comes in here from netfilter.c */
-static unsigned int
-arptable_filter_hook(void *priv, struct sk_buff *skb,
-		     const struct nf_hook_state *state)
-{
-	return arpt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int arptable_filter_table_init(struct net *net)
@@ -72,7 +64,7 @@ static int __init arptable_filter_init(void)
 	if (ret < 0)
 		return ret;
 
-	arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+	arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
 	if (IS_ERR(arpfilter_ops)) {
 		xt_unregister_template(&packet_filter);
 		return PTR_ERR(arpfilter_ops);
-- 
cgit v1.2.3


From 44b5990e7b463240e4c116c9e8670c67dad960cc Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Oct 2021 17:15:13 +0200
Subject: netfilter: ip6tables: allow use of ip6t_do_table as hookfn

This is possible now that the xt_table structure is passed via *priv.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  5 ++---
 net/ipv6/netfilter/ip6_tables.c           |  6 +++---
 net/ipv6/netfilter/ip6table_filter.c      | 10 +---------
 net/ipv6/netfilter/ip6table_mangle.c      |  8 ++++----
 net/ipv6/netfilter/ip6table_nat.c         | 15 ++++-----------
 net/ipv6/netfilter/ip6table_raw.c         | 10 +---------
 net/ipv6/netfilter/ip6table_security.c    |  9 +--------
 7 files changed, 16 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 79e73fd7d965..8b8885a73c76 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
 			const struct nf_hook_ops *ops);
 void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
 void ip6t_unregister_table_exit(struct net *net, const char *name);
-extern unsigned int ip6t_do_table(struct sk_buff *skb,
-				  const struct nf_hook_state *state,
-				  struct xt_table *table);
+extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb,
+				  const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a579ea14a69b..2d816277f2c5 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ip6t_do_table(struct sk_buff *skb,
-	      const struct nf_hook_state *state,
-	      struct xt_table *table)
+ip6t_do_table(void *priv, struct sk_buff *skb,
+	      const struct nf_hook_state *state)
 {
+	const struct xt_table *table = priv;
 	unsigned int hook = state->hook;
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 727ee8097012..df785ebda0ca 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -27,14 +27,6 @@ static const struct xt_table packet_filter = {
 	.priority	= NF_IP6_PRI_FILTER,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_filter_hook(void *priv, struct sk_buff *skb,
-		     const struct nf_hook_state *state)
-{
-	return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
@@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void)
 	if (ret < 0)
 		return ret;
 
-	filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+	filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
 	if (IS_ERR(filter_ops)) {
 		xt_unregister_template(&packet_filter);
 		return PTR_ERR(filter_ops);
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 9b518ce37d6a..a88b2ce4a3cb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
 	unsigned int ret;
 	struct in6_addr saddr, daddr;
@@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr
 	/* flowlabel and prio (includes version, which shouldn't change either */
 	flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-	ret = ip6t_do_table(skb, state, priv);
+	ret = ip6t_do_table(priv, skb, state);
 
 	if (ret != NF_DROP && ret != NF_STOLEN &&
 	    (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
 		     const struct nf_hook_state *state)
 {
 	if (state->hook == NF_INET_LOCAL_OUT)
-		return ip6t_mangle_out(skb, state, priv);
-	return ip6t_do_table(skb, state, priv);
+		return ip6t_mangle_out(priv, skb, state);
+	return ip6t_do_table(priv, skb, state);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 921c1723a01e..bf3cb3a13600 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = {
 	.af		= NFPROTO_IPV6,
 };
 
-static unsigned int ip6table_nat_do_chain(void *priv,
-					  struct sk_buff *skb,
-					  const struct nf_hook_state *state)
-{
-	return ip6t_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
 	{
-		.hook		= ip6table_nat_do_chain,
+		.hook		= ip6t_do_table,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_PRE_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_DST,
 	},
 	{
-		.hook		= ip6table_nat_do_chain,
+		.hook		= ip6t_do_table,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_POST_ROUTING,
 		.priority	= NF_IP6_PRI_NAT_SRC,
 	},
 	{
-		.hook		= ip6table_nat_do_chain,
+		.hook		= ip6t_do_table,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_OUT,
 		.priority	= NF_IP6_PRI_NAT_DST,
 	},
 	{
-		.hook		= ip6table_nat_do_chain,
+		.hook		= ip6t_do_table,
 		.pf		= NFPROTO_IPV6,
 		.hooknum	= NF_INET_LOCAL_IN,
 		.priority	= NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 4f2a04af71d3..08861d5d1f4d 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = {
 	.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_raw_hook(void *priv, struct sk_buff *skb,
-		  const struct nf_hook_state *state)
-{
-	return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int ip6table_raw_table_init(struct net *net)
@@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void)
 		return ret;
 
 	/* Register hooks */
-	rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
+	rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
 	if (IS_ERR(rawtable_ops)) {
 		xt_unregister_template(table);
 		return PTR_ERR(rawtable_ops);
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 931674034d8b..4df14a9bae78 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -32,13 +32,6 @@ static const struct xt_table security_table = {
 	.priority	= NF_IP6_PRI_SECURITY,
 };
 
-static unsigned int
-ip6table_security_hook(void *priv, struct sk_buff *skb,
-		       const struct nf_hook_state *state)
-{
-	return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int ip6table_security_table_init(struct net *net)
@@ -77,7 +70,7 @@ static int __init ip6table_security_init(void)
 	if (ret < 0)
 		return ret;
 
-	sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+	sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
 	if (IS_ERR(sectbl_ops)) {
 		xt_unregister_template(&security_table);
 		return PTR_ERR(sectbl_ops);
-- 
cgit v1.2.3


From f0d6764f7ddbf6d57302155b0f83eadb25ab0f0c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 Oct 2021 17:15:14 +0200
Subject: netfilter: ebtables: allow use of ebt_do_table as hookfn

This is possible now that the xt_table structure is passed via *priv.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h |  5 ++---
 net/bridge/netfilter/ebtable_broute.c     |  2 +-
 net/bridge/netfilter/ebtable_filter.c     | 13 +++----------
 net/bridge/netfilter/ebtable_nat.c        | 12 +++---------
 net/bridge/netfilter/ebtables.c           |  6 +++---
 5 files changed, 12 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 10a01978bc0d..a13296d6c7ce 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net,
 			      const struct nf_hook_ops *ops);
 extern void ebt_unregister_table(struct net *net, const char *tablename);
 void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
-extern unsigned int ebt_do_table(struct sk_buff *skb,
-				 const struct nf_hook_state *state,
-				 struct ebt_table *table);
+extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+				 const struct nf_hook_state *state);
 
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index a7af4eaff17d..1a11064f9990 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
 			   NFPROTO_BRIDGE, s->in, NULL, NULL,
 			   s->net, NULL);
 
-	ret = ebt_do_table(skb, &state, priv);
+	ret = ebt_do_table(priv, skb, &state);
 	if (ret != NF_DROP)
 		return ret;
 
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index c0b121df4a9a..cb949436bc0e 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -58,28 +58,21 @@ static const struct ebt_table frame_filter = {
 	.me		= THIS_MODULE,
 };
 
-static unsigned int
-ebt_filter_hook(void *priv, struct sk_buff *skb,
-		const struct nf_hook_state *state)
-{
-	return ebt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops ebt_ops_filter[] = {
 	{
-		.hook		= ebt_filter_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_IN,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_filter_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_FORWARD,
 		.priority	= NF_BR_PRI_FILTER_BRIDGED,
 	},
 	{
-		.hook		= ebt_filter_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 4078151c224f..5ee0531ae506 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -58,27 +58,21 @@ static const struct ebt_table frame_nat = {
 	.me		= THIS_MODULE,
 };
 
-static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
-				 const struct nf_hook_state *state)
-{
-	return ebt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops ebt_ops_nat[] = {
 	{
-		.hook		= ebt_nat_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_LOCAL_OUT,
 		.priority	= NF_BR_PRI_NAT_DST_OTHER,
 	},
 	{
-		.hook		= ebt_nat_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_POST_ROUTING,
 		.priority	= NF_BR_PRI_NAT_SRC,
 	},
 	{
-		.hook		= ebt_nat_hook,
+		.hook		= ebt_do_table,
 		.pf		= NFPROTO_BRIDGE,
 		.hooknum	= NF_BR_PRE_ROUTING,
 		.priority	= NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 83d1798dfbb4..4a1508a1c566 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e)
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table(struct sk_buff *skb,
-			  const struct nf_hook_state *state,
-			  struct ebt_table *table)
+unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+			  const struct nf_hook_state *state)
 {
+	struct ebt_table *table = priv;
 	unsigned int hook = state->hook;
 	int i, nentries;
 	struct ebt_entry *point;
-- 
cgit v1.2.3


From 8b017fbe0bbb98dd71fb4850f6b9cc0e136a26b8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 14 Oct 2021 11:00:37 +0200
Subject: net: of: fix stub of_net helpers for CONFIG_NET=n

Moving the of_net code from drivers/of/ to net/core means we
no longer stub out the helpers when networking is disabled,
which leads to a randconfig build failure with at least one
ARM platform that calls this from non-networking code:

arm-linux-gnueabi-ld: arch/arm/mach-mvebu/kirkwood.o: in function `kirkwood_dt_eth_fixup':
kirkwood.c:(.init.text+0x54): undefined reference to `of_get_mac_address'

Restore the way this worked before by changing that #ifdef
check back to testing for both CONFIG_OF and CONFIG_NET.

Fixes: e330fb14590c ("of: net: move of_net under net/")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20211014090055.2058949-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/of_net.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of_net.h b/include/linux/of_net.h
index 0797e2edb8c2..0484b613ca64 100644
--- a/include/linux/of_net.h
+++ b/include/linux/of_net.h
@@ -8,7 +8,7 @@
 
 #include <linux/phy.h>
 
-#ifdef CONFIG_OF
+#if defined(CONFIG_OF) && defined(CONFIG_NET)
 #include <linux/of.h>
 
 struct net_device;
-- 
cgit v1.2.3


From 52f88693378a58094c538662ba652aff0253c4fe Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Tue, 12 Oct 2021 09:56:13 -0700
Subject: binder: use cred instead of task for selinux checks

Since binder was integrated with selinux, it has passed
'struct task_struct' associated with the binder_proc
to represent the source and target of transactions.
The conversion of task to SID was then done in the hook
implementations. It turns out that there are race conditions
which can result in an incorrect security context being used.

Fix by using the 'struct cred' saved during binder_open and pass
it to the selinux subsystem.

Cc: stable@vger.kernel.org # 5.14 (need backport for earlier stables)
Fixes: 79af73079d75 ("Add security hooks to binder and implement the hooks for SELinux.")
Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Todd Kjos <tkjos@google.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 drivers/android/binder.c      | 12 +++++------
 include/linux/lsm_hook_defs.h | 14 ++++++-------
 include/linux/lsm_hooks.h     | 14 ++++++-------
 include/linux/security.h      | 28 ++++++++++++-------------
 security/security.c           | 14 ++++++-------
 security/selinux/hooks.c      | 48 ++++++++++++-------------------------------
 6 files changed, 54 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 231cff9b3b75..1571e01cfa52 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2047,7 +2047,7 @@ static int binder_translate_binder(struct flat_binder_object *fp,
 		ret = -EINVAL;
 		goto done;
 	}
-	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+	if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
 		ret = -EPERM;
 		goto done;
 	}
@@ -2093,7 +2093,7 @@ static int binder_translate_handle(struct flat_binder_object *fp,
 				  proc->pid, thread->pid, fp->handle);
 		return -EINVAL;
 	}
-	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+	if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
 		ret = -EPERM;
 		goto done;
 	}
@@ -2181,7 +2181,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset,
 		ret = -EBADF;
 		goto err_fget;
 	}
-	ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
+	ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
 	if (ret < 0) {
 		ret = -EPERM;
 		goto err_security;
@@ -2586,8 +2586,8 @@ static void binder_transaction(struct binder_proc *proc,
 			return_error_line = __LINE__;
 			goto err_invalid_target_handle;
 		}
-		if (security_binder_transaction(proc->tsk,
-						target_proc->tsk) < 0) {
+		if (security_binder_transaction(proc->cred,
+						target_proc->cred) < 0) {
 			return_error = BR_FAILED_REPLY;
 			return_error_param = -EPERM;
 			return_error_line = __LINE__;
@@ -4555,7 +4555,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp,
 		ret = -EBUSY;
 		goto out;
 	}
-	ret = security_binder_set_context_mgr(proc->tsk);
+	ret = security_binder_set_context_mgr(proc->cred);
 	if (ret < 0)
 		goto out;
 	if (uid_valid(context->binder_context_mgr_uid)) {
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index b3c525353769..4c7ed0268ce3 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -26,13 +26,13 @@
  *   #undef LSM_HOOK
  * };
  */
-LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr)
-LSM_HOOK(int, 0, binder_transaction, struct task_struct *from,
-	 struct task_struct *to)
-LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from,
-	 struct task_struct *to)
-LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from,
-	 struct task_struct *to, struct file *file)
+LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr)
+LSM_HOOK(int, 0, binder_transaction, const struct cred *from,
+	 const struct cred *to)
+LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from,
+	 const struct cred *to)
+LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from,
+	 const struct cred *to, struct file *file)
 LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child,
 	 unsigned int mode)
 LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent)
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0eb0ae95c4c4..528554e9b90c 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1313,22 +1313,22 @@
  *
  * @binder_set_context_mgr:
  *	Check whether @mgr is allowed to be the binder context manager.
- *	@mgr contains the task_struct for the task being registered.
+ *	@mgr contains the struct cred for the current binder process.
  *	Return 0 if permission is granted.
  * @binder_transaction:
  *	Check whether @from is allowed to invoke a binder transaction call
  *	to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
+ *	@from contains the struct cred for the sending process.
+ *	@to contains the struct cred for the receiving process.
  * @binder_transfer_binder:
  *	Check whether @from is allowed to transfer a binder reference to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
+ *	@from contains the struct cred for the sending process.
+ *	@to contains the struct cred for the receiving process.
  * @binder_transfer_file:
  *	Check whether @from is allowed to transfer @file to @to.
- *	@from contains the task_struct for the sending task.
+ *	@from contains the struct cred for the sending process.
  *	@file contains the struct file being transferred.
- *	@to contains the task_struct for the receiving task.
+ *	@to contains the struct cred for the receiving process.
  *
  * @ptrace_access_check:
  *	Check permission before allowing the current process to trace the
diff --git a/include/linux/security.h b/include/linux/security.h
index 7979b9629a42..9be72166e859 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -258,13 +258,13 @@ extern int security_init(void);
 extern int early_security_init(void);
 
 /* Security operations */
-int security_binder_set_context_mgr(struct task_struct *mgr);
-int security_binder_transaction(struct task_struct *from,
-				struct task_struct *to);
-int security_binder_transfer_binder(struct task_struct *from,
-				    struct task_struct *to);
-int security_binder_transfer_file(struct task_struct *from,
-				  struct task_struct *to, struct file *file);
+int security_binder_set_context_mgr(const struct cred *mgr);
+int security_binder_transaction(const struct cred *from,
+				const struct cred *to);
+int security_binder_transfer_binder(const struct cred *from,
+				    const struct cred *to);
+int security_binder_transfer_file(const struct cred *from,
+				  const struct cred *to, struct file *file);
 int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
@@ -508,25 +508,25 @@ static inline int early_security_init(void)
 	return 0;
 }
 
-static inline int security_binder_set_context_mgr(struct task_struct *mgr)
+static inline int security_binder_set_context_mgr(const struct cred *mgr)
 {
 	return 0;
 }
 
-static inline int security_binder_transaction(struct task_struct *from,
-					      struct task_struct *to)
+static inline int security_binder_transaction(const struct cred *from,
+					      const struct cred *to)
 {
 	return 0;
 }
 
-static inline int security_binder_transfer_binder(struct task_struct *from,
-						  struct task_struct *to)
+static inline int security_binder_transfer_binder(const struct cred *from,
+						  const struct cred *to)
 {
 	return 0;
 }
 
-static inline int security_binder_transfer_file(struct task_struct *from,
-						struct task_struct *to,
+static inline int security_binder_transfer_file(const struct cred *from,
+						const struct cred *to,
 						struct file *file)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index 40518c340571..d9d53c1e466a 100644
--- a/security/security.c
+++ b/security/security.c
@@ -747,25 +747,25 @@ static int lsm_superblock_alloc(struct super_block *sb)
 
 /* Security operations */
 
-int security_binder_set_context_mgr(struct task_struct *mgr)
+int security_binder_set_context_mgr(const struct cred *mgr)
 {
 	return call_int_hook(binder_set_context_mgr, 0, mgr);
 }
 
-int security_binder_transaction(struct task_struct *from,
-				struct task_struct *to)
+int security_binder_transaction(const struct cred *from,
+				const struct cred *to)
 {
 	return call_int_hook(binder_transaction, 0, from, to);
 }
 
-int security_binder_transfer_binder(struct task_struct *from,
-				    struct task_struct *to)
+int security_binder_transfer_binder(const struct cred *from,
+				    const struct cred *to)
 {
 	return call_int_hook(binder_transfer_binder, 0, from, to);
 }
 
-int security_binder_transfer_file(struct task_struct *from,
-				  struct task_struct *to, struct file *file)
+int security_binder_transfer_file(const struct cred *from,
+				  const struct cred *to, struct file *file)
 {
 	return call_int_hook(binder_transfer_file, 0, from, to, file);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 49d52d7a7714..b4a1bde20261 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -255,29 +255,6 @@ static inline u32 task_sid_obj(const struct task_struct *task)
 	return sid;
 }
 
-/*
- * get the security ID of a task for use with binder
- */
-static inline u32 task_sid_binder(const struct task_struct *task)
-{
-	/*
-	 * In many case where this function is used we should be using the
-	 * task's subjective SID, but we can't reliably access the subjective
-	 * creds of a task other than our own so we must use the objective
-	 * creds/SID, which are safe to access.  The downside is that if a task
-	 * is temporarily overriding it's creds it will not be reflected here;
-	 * however, it isn't clear that binder would handle that case well
-	 * anyway.
-	 *
-	 * If this ever changes and we can safely reference the subjective
-	 * creds/SID of another task, this function will make it easier to
-	 * identify the various places where we make use of the task SIDs in
-	 * the binder code.  It is also likely that we will need to adjust
-	 * the main drivers/android binder code as well.
-	 */
-	return task_sid_obj(task);
-}
-
 static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dentry);
 
 /*
@@ -2067,18 +2044,19 @@ static inline u32 open_file_to_av(struct file *file)
 
 /* Hook functions begin here. */
 
-static int selinux_binder_set_context_mgr(struct task_struct *mgr)
+static int selinux_binder_set_context_mgr(const struct cred *mgr)
 {
 	return avc_has_perm(&selinux_state,
-			    current_sid(), task_sid_binder(mgr), SECCLASS_BINDER,
+			    current_sid(), cred_sid(mgr), SECCLASS_BINDER,
 			    BINDER__SET_CONTEXT_MGR, NULL);
 }
 
-static int selinux_binder_transaction(struct task_struct *from,
-				      struct task_struct *to)
+static int selinux_binder_transaction(const struct cred *from,
+				      const struct cred *to)
 {
 	u32 mysid = current_sid();
-	u32 fromsid = task_sid_binder(from);
+	u32 fromsid = cred_sid(from);
+	u32 tosid = cred_sid(to);
 	int rc;
 
 	if (mysid != fromsid) {
@@ -2089,24 +2067,24 @@ static int selinux_binder_transaction(struct task_struct *from,
 			return rc;
 	}
 
-	return avc_has_perm(&selinux_state, fromsid, task_sid_binder(to),
+	return avc_has_perm(&selinux_state, fromsid, tosid,
 			    SECCLASS_BINDER, BINDER__CALL, NULL);
 }
 
-static int selinux_binder_transfer_binder(struct task_struct *from,
-					  struct task_struct *to)
+static int selinux_binder_transfer_binder(const struct cred *from,
+					  const struct cred *to)
 {
 	return avc_has_perm(&selinux_state,
-			    task_sid_binder(from), task_sid_binder(to),
+			    cred_sid(from), cred_sid(to),
 			    SECCLASS_BINDER, BINDER__TRANSFER,
 			    NULL);
 }
 
-static int selinux_binder_transfer_file(struct task_struct *from,
-					struct task_struct *to,
+static int selinux_binder_transfer_file(const struct cred *from,
+					const struct cred *to,
 					struct file *file)
 {
-	u32 sid = task_sid_binder(to);
+	u32 sid = cred_sid(to);
 	struct file_security_struct *fsec = selinux_file(file);
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode_security_struct *isec;
-- 
cgit v1.2.3


From 4d5b5539742d2554591751b4248b0204d20dcc9d Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Tue, 12 Oct 2021 09:56:14 -0700
Subject: binder: use cred instead of task for getsecid

Use the 'struct cred' saved at binder_open() to lookup
the security ID via security_cred_getsecid(). This
ensures that the security context that opened binder
is the one used to generate the secctx.

Cc: stable@vger.kernel.org # 5.4+
Fixes: ec74136ded79 ("binder: create node flag to request sender's security context")
Signed-off-by: Todd Kjos <tkjos@google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Reported-by: kernel test robot <lkp@intel.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 drivers/android/binder.c | 11 +----------
 include/linux/security.h |  5 +++++
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 1571e01cfa52..49b08c04fa09 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2713,16 +2713,7 @@ static void binder_transaction(struct binder_proc *proc,
 		u32 secid;
 		size_t added_size;
 
-		/*
-		 * Arguably this should be the task's subjective LSM secid but
-		 * we can't reliably access the subjective creds of a task
-		 * other than our own so we must use the objective creds, which
-		 * are safe to access.  The downside is that if a task is
-		 * temporarily overriding it's creds it will not be reflected
-		 * here; however, it isn't clear that binder would handle that
-		 * case well anyway.
-		 */
-		security_task_getsecid_obj(proc->tsk, &secid);
+		security_cred_getsecid(proc->cred, &secid);
 		ret = security_secid_to_secctx(secid, &secctx, &secctx_sz);
 		if (ret) {
 			return_error = BR_FAILED_REPLY;
diff --git a/include/linux/security.h b/include/linux/security.h
index 9be72166e859..cc6d39358336 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1041,6 +1041,11 @@ static inline void security_transfer_creds(struct cred *new,
 {
 }
 
+static inline void security_cred_getsecid(const struct cred *c, u32 *secid)
+{
+	*secid = 0;
+}
+
 static inline int security_kernel_act_as(struct cred *cred, u32 secid)
 {
 	return 0;
-- 
cgit v1.2.3


From c072c4ef7ef09e1d6470c48cf52570487589b76a Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Thu, 14 Oct 2021 16:35:06 +0300
Subject: soc: samsung: exynos-chipid: Pass revision reg offsets

Old Exynos SoCs have both Product ID and Revision ID in one single
register, while new SoCs tend to have two separate registers for those
IDs. Implement handling of both cases by passing Revision ID register
offsets in driver data.

Previously existing macros for Exynos4210 (removed in this patch) were
incorrect:

    #define EXYNOS_SUBREV_MASK         (0xf << 4)
    #define EXYNOS_MAINREV_MASK        (0xf << 0)

Actual format of PRO_ID register in Exynos4210 (offset 0x0):

    [31:12] Product ID
      [9:8] Package information
      [7:4] Main Revision Number
      [3:0] Sub Revision Number

This patch doesn't change the behavior on existing platforms, so
'/sys/devices/soc0/revision' will show the same string as before.

Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Tested-by: Henrik Grimler <henrik@grimler.se>
Link: https://lore.kernel.org/r/20211014133508.1210-1-semen.protsenko@linaro.org
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
---
 drivers/soc/samsung/exynos-chipid.c       | 69 ++++++++++++++++++++++++++-----
 include/linux/soc/samsung/exynos-chipid.h |  6 +--
 2 files changed, 60 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/samsung/exynos-chipid.c b/drivers/soc/samsung/exynos-chipid.c
index b2627a3a127a..986978e83661 100644
--- a/drivers/soc/samsung/exynos-chipid.c
+++ b/drivers/soc/samsung/exynos-chipid.c
@@ -17,6 +17,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
@@ -25,6 +26,17 @@
 
 #include "exynos-asv.h"
 
+struct exynos_chipid_variant {
+	unsigned int rev_reg;		/* revision register offset */
+	unsigned int main_rev_shift;	/* main revision offset in rev_reg */
+	unsigned int sub_rev_shift;	/* sub revision offset in rev_reg */
+};
+
+struct exynos_chipid_info {
+	u32 product_id;
+	u32 revision;
+};
+
 static const struct exynos_soc_id {
 	const char *name;
 	unsigned int id;
@@ -50,31 +62,57 @@ static const char *product_id_to_soc_id(unsigned int product_id)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(soc_ids); i++)
-		if ((product_id & EXYNOS_MASK) == soc_ids[i].id)
+		if (product_id == soc_ids[i].id)
 			return soc_ids[i].name;
 	return NULL;
 }
 
+static int exynos_chipid_get_chipid_info(struct regmap *regmap,
+		const struct exynos_chipid_variant *data,
+		struct exynos_chipid_info *soc_info)
+{
+	int ret;
+	unsigned int val, main_rev, sub_rev;
+
+	ret = regmap_read(regmap, EXYNOS_CHIPID_REG_PRO_ID, &val);
+	if (ret < 0)
+		return ret;
+	soc_info->product_id = val & EXYNOS_MASK;
+
+	if (data->rev_reg != EXYNOS_CHIPID_REG_PRO_ID) {
+		ret = regmap_read(regmap, data->rev_reg, &val);
+		if (ret < 0)
+			return ret;
+	}
+	main_rev = (val >> data->main_rev_shift) & EXYNOS_REV_PART_MASK;
+	sub_rev = (val >> data->sub_rev_shift) & EXYNOS_REV_PART_MASK;
+	soc_info->revision = (main_rev << EXYNOS_REV_PART_SHIFT) | sub_rev;
+
+	return 0;
+}
+
 static int exynos_chipid_probe(struct platform_device *pdev)
 {
+	const struct exynos_chipid_variant *drv_data;
+	struct exynos_chipid_info soc_info;
 	struct soc_device_attribute *soc_dev_attr;
 	struct soc_device *soc_dev;
 	struct device_node *root;
 	struct regmap *regmap;
-	u32 product_id;
-	u32 revision;
 	int ret;
 
+	drv_data = of_device_get_match_data(&pdev->dev);
+	if (!drv_data)
+		return -EINVAL;
+
 	regmap = device_node_to_regmap(pdev->dev.of_node);
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 
-	ret = regmap_read(regmap, EXYNOS_CHIPID_REG_PRO_ID, &product_id);
+	ret = exynos_chipid_get_chipid_info(regmap, drv_data, &soc_info);
 	if (ret < 0)
 		return ret;
 
-	revision = product_id & EXYNOS_REV_MASK;
-
 	soc_dev_attr = devm_kzalloc(&pdev->dev, sizeof(*soc_dev_attr),
 				    GFP_KERNEL);
 	if (!soc_dev_attr)
@@ -87,8 +125,8 @@ static int exynos_chipid_probe(struct platform_device *pdev)
 	of_node_put(root);
 
 	soc_dev_attr->revision = devm_kasprintf(&pdev->dev, GFP_KERNEL,
-						"%x", revision);
-	soc_dev_attr->soc_id = product_id_to_soc_id(product_id);
+						"%x", soc_info.revision);
+	soc_dev_attr->soc_id = product_id_to_soc_id(soc_info.product_id);
 	if (!soc_dev_attr->soc_id) {
 		pr_err("Unknown SoC\n");
 		return -ENODEV;
@@ -106,7 +144,7 @@ static int exynos_chipid_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, soc_dev);
 
 	dev_info(&pdev->dev, "Exynos: CPU[%s] PRO_ID[0x%x] REV[0x%x] Detected\n",
-		 soc_dev_attr->soc_id, product_id, revision);
+		 soc_dev_attr->soc_id, soc_info.product_id, soc_info.revision);
 
 	return 0;
 
@@ -125,9 +163,18 @@ static int exynos_chipid_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct exynos_chipid_variant exynos4210_chipid_drv_data = {
+	.rev_reg	= 0x0,
+	.main_rev_shift	= 4,
+	.sub_rev_shift	= 0,
+};
+
 static const struct of_device_id exynos_chipid_of_device_ids[] = {
-	{ .compatible = "samsung,exynos4210-chipid" },
-	{}
+	{
+		.compatible	= "samsung,exynos4210-chipid",
+		.data		= &exynos4210_chipid_drv_data,
+	},
+	{ }
 };
 MODULE_DEVICE_TABLE(of, exynos_chipid_of_device_ids);
 
diff --git a/include/linux/soc/samsung/exynos-chipid.h b/include/linux/soc/samsung/exynos-chipid.h
index 8bca6763f99c..62f0e2531068 100644
--- a/include/linux/soc/samsung/exynos-chipid.h
+++ b/include/linux/soc/samsung/exynos-chipid.h
@@ -9,10 +9,8 @@
 #define __LINUX_SOC_EXYNOS_CHIPID_H
 
 #define EXYNOS_CHIPID_REG_PRO_ID	0x00
-#define EXYNOS_SUBREV_MASK		(0xf << 4)
-#define EXYNOS_MAINREV_MASK		(0xf << 0)
-#define EXYNOS_REV_MASK			(EXYNOS_SUBREV_MASK | \
-					 EXYNOS_MAINREV_MASK)
+#define EXYNOS_REV_PART_MASK		0xf
+#define EXYNOS_REV_PART_SHIFT		4
 #define EXYNOS_MASK			0xfffff000
 
 #define EXYNOS_CHIPID_REG_PKG_ID	0x04
-- 
cgit v1.2.3


From 42a20f86dc19f9282d974df0ba4d226c865ab9dd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 29 Sep 2021 15:02:14 -0700
Subject: sched: Add wrapper for get_wchan() to keep task blocked

Having a stable wchan means the process must be blocked and for it to
stay that way while performing stack unwinding.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> [arm]
Tested-by: Mark Rutland <mark.rutland@arm.com> [arm64]
Link: https://lkml.kernel.org/r/20211008111626.332092234@infradead.org
---
 arch/alpha/include/asm/processor.h      |  2 +-
 arch/alpha/kernel/process.c             |  5 ++---
 arch/arc/include/asm/processor.h        |  2 +-
 arch/arc/kernel/stacktrace.c            |  4 ++--
 arch/arm/include/asm/processor.h        |  2 +-
 arch/arm/kernel/process.c               |  4 +---
 arch/arm64/include/asm/processor.h      |  2 +-
 arch/arm64/kernel/process.c             |  4 +---
 arch/csky/include/asm/processor.h       |  2 +-
 arch/csky/kernel/stacktrace.c           |  5 ++---
 arch/h8300/include/asm/processor.h      |  2 +-
 arch/h8300/kernel/process.c             |  5 +----
 arch/hexagon/include/asm/processor.h    |  2 +-
 arch/hexagon/kernel/process.c           |  4 +---
 arch/ia64/include/asm/processor.h       |  2 +-
 arch/ia64/kernel/process.c              |  5 +----
 arch/m68k/include/asm/processor.h       |  2 +-
 arch/m68k/kernel/process.c              |  4 +---
 arch/microblaze/include/asm/processor.h |  2 +-
 arch/microblaze/kernel/process.c        |  2 +-
 arch/mips/include/asm/processor.h       |  2 +-
 arch/mips/kernel/process.c              |  8 +++-----
 arch/nds32/include/asm/processor.h      |  2 +-
 arch/nds32/kernel/process.c             |  7 +------
 arch/nios2/include/asm/processor.h      |  2 +-
 arch/nios2/kernel/process.c             |  5 +----
 arch/openrisc/include/asm/processor.h   |  2 +-
 arch/openrisc/kernel/process.c          |  2 +-
 arch/parisc/include/asm/processor.h     |  2 +-
 arch/parisc/kernel/process.c            |  5 +----
 arch/powerpc/include/asm/processor.h    |  2 +-
 arch/powerpc/kernel/process.c           |  9 +++------
 arch/riscv/include/asm/processor.h      |  2 +-
 arch/riscv/kernel/stacktrace.c          | 12 +++++-------
 arch/s390/include/asm/processor.h       |  2 +-
 arch/s390/kernel/process.c              |  4 ++--
 arch/sh/include/asm/processor_32.h      |  2 +-
 arch/sh/kernel/process_32.c             |  5 +----
 arch/sparc/include/asm/processor_32.h   |  2 +-
 arch/sparc/include/asm/processor_64.h   |  2 +-
 arch/sparc/kernel/process_32.c          |  5 +----
 arch/sparc/kernel/process_64.c          |  5 +----
 arch/um/include/asm/processor-generic.h |  2 +-
 arch/um/kernel/process.c                |  5 +----
 arch/x86/include/asm/processor.h        |  2 +-
 arch/x86/kernel/process.c               |  5 +----
 arch/xtensa/include/asm/processor.h     |  2 +-
 arch/xtensa/kernel/process.c            |  5 +----
 include/linux/sched.h                   |  1 +
 kernel/sched/core.c                     | 19 +++++++++++++++++++
 50 files changed, 80 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 6100431da07a..090499c99c1c 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -42,7 +42,7 @@ extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
 struct task_struct;
 extern void release_thread(struct task_struct *);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
 
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index a5123ea426ce..5f8527081da9 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -376,12 +376,11 @@ thread_saved_pc(struct task_struct *t)
 }
 
 unsigned long
-get_wchan(struct task_struct *p)
+__get_wchan(struct task_struct *p)
 {
 	unsigned long schedule_frame;
 	unsigned long pc;
-	if (!p || p == current || task_is_running(p))
-		return 0;
+
 	/*
 	 * This one depends on the frame size of schedule().  Do a
 	 * "disass schedule" in gdb to find the frame size.  Also, the
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index f28afcf5c6d1..54db9d7bb562 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -70,7 +70,7 @@ struct task_struct;
 extern void start_thread(struct pt_regs * regs, unsigned long pc,
 			 unsigned long usp);
 
-extern unsigned int get_wchan(struct task_struct *p);
+extern unsigned int __get_wchan(struct task_struct *p);
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index c376ff3147e7..5372dc04e784 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -15,7 +15,7 @@
  *      = specifics of data structs where trace is saved(CONFIG_STACKTRACE etc)
  *
  *  vineetg: March 2009
- *  -Implemented correct versions of thread_saved_pc() and get_wchan()
+ *  -Implemented correct versions of thread_saved_pc() and __get_wchan()
  *
  *  rajeshwarr: 2008
  *  -Initial implementation
@@ -248,7 +248,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
  * Of course just returning schedule( ) would be pointless so unwind until
  * the function is not in schedular code
  */
-unsigned int get_wchan(struct task_struct *tsk)
+unsigned int __get_wchan(struct task_struct *tsk)
 {
 	return arc_unwind_core(tsk, NULL, __get_first_nonsched, NULL);
 }
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 9e6b97286307..6af68edfa53a 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -84,7 +84,7 @@ struct task_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 0e2d3051741e..96f577e59595 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -276,13 +276,11 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || task_is_running(p))
-		return 0;
 
 	frame.fp = thread_saved_fp(p);
 	frame.sp = thread_saved_sp(p);
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index ee2bdc1b9f5b..55ca034238ea 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -257,7 +257,7 @@ struct task_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 void update_sctlr_el1(u64 sctlr);
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 40adb8cdbf5a..aacf2f5559a8 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -528,13 +528,11 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	return last;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	struct stackframe frame;
 	unsigned long stack_page, ret = 0;
 	int count = 0;
-	if (!p || p == current || task_is_running(p))
-		return 0;
 
 	stack_page = (unsigned long)try_get_task_stack(p);
 	if (!stack_page)
diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h
index 9e933021fe8e..817dd60ff152 100644
--- a/arch/csky/include/asm/processor.h
+++ b/arch/csky/include/asm/processor.h
@@ -81,7 +81,7 @@ static inline void release_thread(struct task_struct *dead_task)
 
 extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->usp)
diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c
index 1b280ef08004..9f78f5d21511 100644
--- a/arch/csky/kernel/stacktrace.c
+++ b/arch/csky/kernel/stacktrace.c
@@ -111,12 +111,11 @@ static bool save_wchan(unsigned long pc, void *arg)
 	return false;
 }
 
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
 {
 	unsigned long pc = 0;
 
-	if (likely(task && task != current && !task_is_running(task)))
-		walk_stackframe(task, NULL, save_wchan, &pc);
+	walk_stackframe(task, NULL, save_wchan, &pc);
 	return pc;
 }
 
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index a060b41b2d31..141a23eb62b7 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -105,7 +105,7 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
 	({			 \
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 2ac27e4248a4..8833fa4f5d51 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -128,15 +128,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	stack_page = (unsigned long)p;
 	fp = ((struct pt_regs *)p->thread.ksp)->er6;
 	do {
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index 9f0cc99420be..615f7e49968e 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -64,7 +64,7 @@ struct thread_struct {
 extern void release_thread(struct task_struct *dead_task);
 
 /* Get wait channel for task P.  */
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 /*  The following stuff is pretty HEXAGON specific.  */
 
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 6a6835fb4242..232dfd8956aa 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -130,13 +130,11 @@ void flush_thread(void)
  * is an identification of the point at which the scheduler
  * was invoked by a blocked thread.
  */
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || task_is_running(p))
-		return 0;
 
 	stack_page = (unsigned long)task_stack_page(p);
 	fp = ((struct hexagon_switch_stack *)p->thread.switch_sp)->fp;
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 2d8bcdc27d7f..45365c2ef598 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -330,7 +330,7 @@ struct task_struct;
 #define release_thread(dead_task)
 
 /* Get wait channel for task P.  */
-extern unsigned long get_wchan (struct task_struct *p);
+extern unsigned long __get_wchan (struct task_struct *p);
 
 /* Return instruction pointer of blocked task TSK.  */
 #define KSTK_EIP(tsk)					\
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index e56d63f4abf9..834df24a88f1 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -523,15 +523,12 @@ exit_thread (struct task_struct *tsk)
 }
 
 unsigned long
-get_wchan (struct task_struct *p)
+__get_wchan (struct task_struct *p)
 {
 	struct unw_frame_info info;
 	unsigned long ip;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	/*
 	 * Note: p may not be a blocked task (it could be current or
 	 * another process running on some other CPU.  Rather than
diff --git a/arch/m68k/include/asm/processor.h b/arch/m68k/include/asm/processor.h
index f4d82c619a5c..ffeda9aa526a 100644
--- a/arch/m68k/include/asm/processor.h
+++ b/arch/m68k/include/asm/processor.h
@@ -150,7 +150,7 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define	KSTK_EIP(tsk)	\
     ({			\
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 1ab692b952cd..a6030dbaa089 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -263,13 +263,11 @@ int dump_fpu (struct pt_regs *regs, struct user_m68kfp_struct *fpu)
 }
 EXPORT_SYMBOL(dump_fpu);
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
-	if (!p || p == current || task_is_running(p))
-		return 0;
 
 	stack_page = (unsigned long)task_stack_page(p);
 	fp = ((struct switch_stack *)p->thread.ksp)->a6;
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index 06c6e493590a..7e9e92670df3 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -68,7 +68,7 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 /* The size allocated for kernel stacks. This _must_ be a power of two! */
 # define KERNEL_STACK_SIZE	0x2000
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 62aa237180b6..5e2b91c1e8ce 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
 	return 0;
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 0c3550c82b72..252ed38ce8c5 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -369,7 +369,7 @@ static inline void flush_thread(void)
 {
 }
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define __KSTK_TOS(tsk) ((unsigned long)task_stack_page(tsk) + \
 			 THREAD_SIZE - 32 - sizeof(struct pt_regs))
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index 95aa86fa6077..cbff1b974f88 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -511,7 +511,7 @@ static int __init frame_info_init(void)
 
 	/*
 	 * Without schedule() frame info, result given by
-	 * thread_saved_pc() and get_wchan() are not reliable.
+	 * thread_saved_pc() and __get_wchan() are not reliable.
 	 */
 	if (schedule_mfi.pc_offset < 0)
 		printk("Can't analyze schedule() prologue at %p\n", schedule);
@@ -652,9 +652,9 @@ unsigned long unwind_stack(struct task_struct *task, unsigned long *sp,
 #endif
 
 /*
- * get_wchan - a maintenance nightmare^W^Wpain in the ass ...
+ * __get_wchan - a maintenance nightmare^W^Wpain in the ass ...
  */
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
 {
 	unsigned long pc = 0;
 #ifdef CONFIG_KALLSYMS
@@ -662,8 +662,6 @@ unsigned long get_wchan(struct task_struct *task)
 	unsigned long ra = 0;
 #endif
 
-	if (!task || task == current || task_is_running(task))
-		goto out;
 	if (!task_stack_page(task))
 		goto out;
 
diff --git a/arch/nds32/include/asm/processor.h b/arch/nds32/include/asm/processor.h
index b82369c7659d..e6bfc74972bb 100644
--- a/arch/nds32/include/asm/processor.h
+++ b/arch/nds32/include/asm/processor.h
@@ -83,7 +83,7 @@ extern struct task_struct *last_task_used_math;
 /* Prepare to copy thread state - unlazy all lazy status */
 #define prepare_to_copy(tsk)	do { } while (0)
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define cpu_relax()			barrier()
 
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index 391895b54d13..49fab9e39cbf 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -233,15 +233,12 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
 
 EXPORT_SYMBOL(dump_fpu);
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long fp, lr;
 	unsigned long stack_start, stack_end;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	if (IS_ENABLED(CONFIG_FRAME_POINTER)) {
 		stack_start = (unsigned long)end_of_stack(p);
 		stack_end = (unsigned long)task_stack_page(p) + THREAD_SIZE;
@@ -258,5 +255,3 @@ unsigned long get_wchan(struct task_struct *p)
 	}
 	return 0;
 }
-
-EXPORT_SYMBOL(get_wchan);
diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 94bcb86f679f..b8125dfbcad2 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -69,7 +69,7 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #define task_pt_regs(p) \
 	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 9ff37ba2bb60..f8ea522a1588 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -217,15 +217,12 @@ void dump(struct pt_regs *fp)
 	pr_emerg("\n\n");
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
 	unsigned long stack_page;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	stack_page = (unsigned long)p;
 	fp = ((struct switch_stack *)p->thread.ksp)->fp;	/* ;dgt2 */
 	do {
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index ad53b3184885..aa1699c18add 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -73,7 +73,7 @@ struct thread_struct {
 
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define cpu_relax()     barrier()
 
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index b0698d9ce14f..3c0c91bcdcba 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -263,7 +263,7 @@ void dump_elf_thread(elf_greg_t *dest, struct pt_regs* regs)
 	dest[35] = 0;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	/* TODO */
 
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index eeb7da064289..85a2dbfe5278 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -273,7 +273,7 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)	((tsk)->thread.regs.iaoq[0])
 #define KSTK_ESP(tsk)	((tsk)->thread.regs.gr[30])
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 38ec4ae81239..4f562dee65a2 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -240,15 +240,12 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
 }
 
 unsigned long
-get_wchan(struct task_struct *p)
+__get_wchan(struct task_struct *p)
 {
 	struct unwind_frame_info info;
 	unsigned long ip;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	/*
 	 * These bracket the sleeping functions..
 	 */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index f348e564f7dd..e39bd0ff69f3 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -300,7 +300,7 @@ struct thread_struct {
 
 #define task_pt_regs(tsk)	((tsk)->thread.regs)
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
 #define KSTK_ESP(tsk)  ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 50436b52c213..406d7ee9e322 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2111,14 +2111,11 @@ int validate_sp(unsigned long sp, struct task_struct *p,
 
 EXPORT_SYMBOL(validate_sp);
 
-static unsigned long __get_wchan(struct task_struct *p)
+static unsigned long ___get_wchan(struct task_struct *p)
 {
 	unsigned long ip, sp;
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	sp = p->thread.ksp;
 	if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
 		return 0;
@@ -2137,14 +2134,14 @@ static unsigned long __get_wchan(struct task_struct *p)
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long ret;
 
 	if (!try_get_task_stack(p))
 		return 0;
 
-	ret = __get_wchan(p);
+	ret = ___get_wchan(p);
 
 	put_task_stack(p);
 
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 46b492c78cbb..0749924d9e55 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -66,7 +66,7 @@ static inline void release_thread(struct task_struct *dead_task)
 {
 }
 
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 
 static inline void wait_for_interrupt(void)
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 315db3d0229b..0fcdc0233fac 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -128,16 +128,14 @@ static bool save_wchan(void *arg, unsigned long pc)
 	return true;
 }
 
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
 {
 	unsigned long pc = 0;
 
-	if (likely(task && task != current && !task_is_running(task))) {
-		if (!try_get_task_stack(task))
-			return 0;
-		walk_stackframe(task, NULL, save_wchan, &pc);
-		put_task_stack(task);
-	}
+	if (!try_get_task_stack(task))
+		return 0;
+	walk_stackframe(task, NULL, save_wchan, &pc);
+	put_task_stack(task);
 	return pc;
 }
 
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 879b8e3f609c..f54c152bf2bf 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -192,7 +192,7 @@ static inline void release_thread(struct task_struct *tsk) { }
 void guarded_storage_release(struct task_struct *tsk);
 void gs_load_bc_cb(struct pt_regs *regs);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 #define task_pt_regs(tsk) ((struct pt_regs *) \
         (task_stack_page(tsk) + THREAD_SIZE) - 1)
 #define KSTK_EIP(tsk)	(task_pt_regs(tsk)->psw.addr)
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 350e94d0cac2..e5dd46b1bff8 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -181,12 +181,12 @@ void execve_tail(void)
 	asm volatile("sfpc %0" : : "d" (0));
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	struct unwind_state state;
 	unsigned long ip = 0;
 
-	if (!p || p == current || task_is_running(p) || !task_stack_page(p))
+	if (!task_stack_page(p))
 		return 0;
 
 	if (!try_get_task_stack(p))
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index aa92cc933889..45240ec6b85a 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -180,7 +180,7 @@ static inline void show_code(struct pt_regs *regs)
 }
 #endif
 
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)  (task_pt_regs(tsk)->regs[15])
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 717de05c81f4..1c28e3cddb60 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -182,13 +182,10 @@ __switch_to(struct task_struct *prev, struct task_struct *next)
 	return prev;
 }
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long pc;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	/*
 	 * The same comment as on the Alpha applies here, too ...
 	 */
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index b6242f7771e9..647bf0ac7beb 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -89,7 +89,7 @@ static inline void start_thread(struct pt_regs * regs, unsigned long pc,
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while(0)
 
-unsigned long get_wchan(struct task_struct *);
+unsigned long __get_wchan(struct task_struct *);
 
 #define task_pt_regs(tsk) ((tsk)->thread.kregs)
 #define KSTK_EIP(tsk)  ((tsk)->thread.kregs->pc)
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 5cf145f18f36..ae851e8fce4c 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -183,7 +183,7 @@ do { \
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while (0)
 
-unsigned long get_wchan(struct task_struct *task);
+unsigned long __get_wchan(struct task_struct *task);
 
 #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
 #define KSTK_EIP(tsk)  (task_pt_regs(tsk)->tpc)
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index bbbe0cfef746..2dc0bf9fe62e 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -365,7 +365,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
 	unsigned long task_base = (unsigned long) task;
@@ -373,9 +373,6 @@ unsigned long get_wchan(struct task_struct *task)
 	struct reg_window32 *rw;
 	int count = 0;
 
-	if (!task || task == current || task_is_running(task))
-		goto out;
-
 	fp = task_thread_info(task)->ksp + bias;
 	do {
 		/* Bogus frame pointer? */
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index d1cc410d2f64..f5b2cac8669f 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -663,7 +663,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
-unsigned long get_wchan(struct task_struct *task)
+unsigned long __get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
 	struct thread_info *tp;
@@ -671,9 +671,6 @@ unsigned long get_wchan(struct task_struct *task)
         unsigned long ret = 0;
 	int count = 0; 
 
-	if (!task || task == current || task_is_running(task))
-		goto out;
-
 	tp = task_thread_info(task);
 	bias = STACK_BIAS;
 	fp = task_thread_info(task)->ksp + bias;
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index b5cf0ed116d9..579692a40a55 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -106,6 +106,6 @@ extern struct cpuinfo_um boot_cpu_data;
 #define cache_line_size()	(boot_cpu_data.cache_alignment)
 
 #define KSTK_REG(tsk, reg) get_thread_reg(reg, &tsk->thread.switch_buf)
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #endif
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 457a38db368b..82107373ac7e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -364,14 +364,11 @@ unsigned long arch_align_stack(unsigned long sp)
 }
 #endif
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long stack_page, sp, ip;
 	bool seen_sched = 0;
 
-	if ((p == NULL) || (p == current) || task_is_running(p))
-		return 0;
-
 	stack_page = (unsigned long) task_stack_page(p);
 	/* Bail if the process has no kernel stack for some reason */
 	if (stack_page == 0)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9ad2acaaae9b..e81177edc681 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -589,7 +589,7 @@ static inline void load_sp0(unsigned long sp0)
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
 
 /*
  * Generic CPUID function
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e645925f9f02..a69885a9d711 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -942,13 +942,10 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
  * because the task might wake up and we might look at a stack
  * changing under us.
  */
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long entry = 0;
 
-	if (p == current || task_is_running(p))
-		return 0;
-
 	stack_trace_save_tsk(p, &entry, 1, 0);
 	return entry;
 }
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 7f63aca6a0d3..ad15fbc57283 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -215,7 +215,7 @@ struct mm_struct;
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-extern unsigned long get_wchan(struct task_struct *p);
+extern unsigned long __get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)		(task_pt_regs(tsk)->pc)
 #define KSTK_ESP(tsk)		(task_pt_regs(tsk)->areg[1])
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 060165340612..47f933fed870 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -298,15 +298,12 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
  * These bracket the sleeping functions..
  */
 
-unsigned long get_wchan(struct task_struct *p)
+unsigned long __get_wchan(struct task_struct *p)
 {
 	unsigned long sp, pc;
 	unsigned long stack_page = (unsigned long) task_stack_page(p);
 	int count = 0;
 
-	if (!p || p == current || task_is_running(p))
-		return 0;
-
 	sp = p->thread.sp;
 	pc = MAKE_PC_FROM_RA(p->thread.ra, p->thread.sp);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 343603f77f8b..5f8db54226af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2139,6 +2139,7 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 #endif /* CONFIG_SMP */
 
 extern bool sched_task_on_rq(struct task_struct *p);
+extern unsigned long get_wchan(struct task_struct *p);
 
 /*
  * In order to reduce various lock holder preemption latencies provide an
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 935c2da00339..f2611b9cf503 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1966,6 +1966,25 @@ bool sched_task_on_rq(struct task_struct *p)
 	return task_on_rq_queued(p);
 }
 
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long ip = 0;
+	unsigned int state;
+
+	if (!p || p == current)
+		return 0;
+
+	/* Only get wchan if task is blocked and we can keep it that way. */
+	raw_spin_lock_irq(&p->pi_lock);
+	state = READ_ONCE(p->__state);
+	smp_rmb(); /* see try_to_wake_up() */
+	if (state != TASK_RUNNING && state != TASK_WAKING && !p->on_rq)
+		ip = __get_wchan(p);
+	raw_spin_unlock_irq(&p->pi_lock);
+
+	return ip;
+}
+
 static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (!(flags & ENQUEUE_NOCLOCK))
-- 
cgit v1.2.3


From c5e22feffdd736cb02b98b0f5b375c8ebc858dd4 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Fri, 24 Sep 2021 20:51:02 +1200
Subject: topology: Represent clusters of CPUs within a die

Both ACPI and DT provide the ability to describe additional layers of
topology between that of individual cores and higher level constructs
such as the level at which the last level cache is shared.
In ACPI this can be represented in PPTT as a Processor Hierarchy
Node Structure [1] that is the parent of the CPU cores and in turn
has a parent Processor Hierarchy Nodes Structure representing
a higher level of topology.

For example Kunpeng 920 has 6 or 8 clusters in each NUMA node, and each
cluster has 4 cpus. All clusters share L3 cache data, but each cluster
has local L3 tag. On the other hand, each clusters will share some
internal system bus.

+-----------------------------------+                          +---------+
|  +------+    +------+             +--------------------------+         |
|  | CPU0 |    | cpu1 |             |    +-----------+         |         |
|  +------+    +------+             |    |           |         |         |
|                                   +----+    L3     |         |         |
|  +------+    +------+   cluster   |    |    tag    |         |         |
|  | CPU2 |    | CPU3 |             |    |           |         |         |
|  +------+    +------+             |    +-----------+         |         |
|                                   |                          |         |
+-----------------------------------+                          |         |
+-----------------------------------+                          |         |
|  +------+    +------+             +--------------------------+         |
|  |      |    |      |             |    +-----------+         |         |
|  +------+    +------+             |    |           |         |         |
|                                   |    |    L3     |         |         |
|  +------+    +------+             +----+    tag    |         |         |
|  |      |    |      |             |    |           |         |         |
|  +------+    +------+             |    +-----------+         |         |
|                                   |                          |         |
+-----------------------------------+                          |   L3    |
                                                               |   data  |
+-----------------------------------+                          |         |
|  +------+    +------+             |    +-----------+         |         |
|  |      |    |      |             |    |           |         |         |
|  +------+    +------+             +----+    L3     |         |         |
|                                   |    |    tag    |         |         |
|  +------+    +------+             |    |           |         |         |
|  |      |    |      |             |    +-----------+         |         |
|  +------+    +------+             +--------------------------+         |
+-----------------------------------|                          |         |
+-----------------------------------|                          |         |
|  +------+    +------+             +--------------------------+         |
|  |      |    |      |             |    +-----------+         |         |
|  +------+    +------+             |    |           |         |         |
|                                   +----+    L3     |         |         |
|  +------+    +------+             |    |    tag    |         |         |
|  |      |    |      |             |    |           |         |         |
|  +------+    +------+             |    +-----------+         |         |
|                                   |                          |         |
+-----------------------------------+                          |         |
+-----------------------------------+                          |         |
|  +------+    +------+             +--------------------------+         |
|  |      |    |      |             |   +-----------+          |         |
|  +------+    +------+             |   |           |          |         |
|                                   |   |    L3     |          |         |
|  +------+    +------+             +---+    tag    |          |         |
|  |      |    |      |             |   |           |          |         |
|  +------+    +------+             |   +-----------+          |         |
|                                   |                          |         |
+-----------------------------------+                          |         |
+-----------------------------------+                          |         |
|  +------+    +------+             +--------------------------+         |
|  |      |    |      |             |  +-----------+           |         |
|  +------+    +------+             |  |           |           |         |
|                                   |  |    L3     |           |         |
|  +------+    +------+             +--+    tag    |           |         |
|  |      |    |      |             |  |           |           |         |
|  +------+    +------+             |  +-----------+           |         |
|                                   |                          +---------+
+-----------------------------------+

That means spreading tasks among clusters will bring more bandwidth
while packing tasks within one cluster will lead to smaller cache
synchronization latency. So both kernel and userspace will have
a chance to leverage this topology to deploy tasks accordingly to
achieve either smaller cache latency within one cluster or an even
distribution of load among clusters for higher throughput.

This patch exposes cluster topology to both kernel and userspace.
Libraried like hwloc will know cluster by cluster_cpus and related
sysfs attributes. PoC of HWLOC support at [2].

Note this patch only handle the ACPI case.

Special consideration is needed for SMT processors, where it is
necessary to move 2 levels up the hierarchy from the leaf nodes
(thus skipping the processor core level).

Note that arm64 / ACPI does not provide any means of identifying
a die level in the topology but that may be unrelate to the cluster
level.

[1] ACPI Specification 6.3 - section 5.2.29.1 processor hierarchy node
    structure (Type 0)
[2] https://github.com/hisilicon/hwloc/tree/linux-cluster

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210924085104.44806-2-21cnbao@gmail.com
---
 Documentation/ABI/stable/sysfs-devices-system-cpu | 15 +++++
 Documentation/admin-guide/cputopology.rst         | 12 ++--
 arch/arm64/kernel/topology.c                      |  2 +
 drivers/acpi/pptt.c                               | 67 +++++++++++++++++++++++
 drivers/base/arch_topology.c                      | 15 +++++
 drivers/base/topology.c                           | 10 ++++
 include/linux/acpi.h                              |  5 ++
 include/linux/arch_topology.h                     |  5 ++
 include/linux/topology.h                          |  6 ++
 9 files changed, 133 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
index 516dafea03eb..3965ce504484 100644
--- a/Documentation/ABI/stable/sysfs-devices-system-cpu
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -42,6 +42,12 @@ Description:    the CPU core ID of cpuX. Typically it is the hardware platform's
                 architecture and platform dependent.
 Values:         integer
 
+What:           /sys/devices/system/cpu/cpuX/topology/cluster_id
+Description:    the cluster ID of cpuX.  Typically it is the hardware platform's
+                identifier (rather than the kernel's). The actual value is
+                architecture and platform dependent.
+Values:         integer
+
 What:           /sys/devices/system/cpu/cpuX/topology/book_id
 Description:    the book ID of cpuX. Typically it is the hardware platform's
                 identifier (rather than the kernel's). The actual value is
@@ -85,6 +91,15 @@ Description:    human-readable list of CPUs within the same die.
                 The format is like 0-3, 8-11, 14,17.
 Values:         decimal list.
 
+What:           /sys/devices/system/cpu/cpuX/topology/cluster_cpus
+Description:    internal kernel map of CPUs within the same cluster.
+Values:         hexadecimal bitmask.
+
+What:           /sys/devices/system/cpu/cpuX/topology/cluster_cpus_list
+Description:    human-readable list of CPUs within the same cluster.
+                The format is like 0-3, 8-11, 14,17.
+Values:         decimal list.
+
 What:           /sys/devices/system/cpu/cpuX/topology/book_siblings
 Description:    internal kernel map of cpuX's hardware threads within the same
                 book_id. it's only used on s390.
diff --git a/Documentation/admin-guide/cputopology.rst b/Documentation/admin-guide/cputopology.rst
index b085dbac60a5..6b62e182baf4 100644
--- a/Documentation/admin-guide/cputopology.rst
+++ b/Documentation/admin-guide/cputopology.rst
@@ -19,11 +19,13 @@ these macros in include/asm-XXX/topology.h::
 
 	#define topology_physical_package_id(cpu)
 	#define topology_die_id(cpu)
+	#define topology_cluster_id(cpu)
 	#define topology_core_id(cpu)
 	#define topology_book_id(cpu)
 	#define topology_drawer_id(cpu)
 	#define topology_sibling_cpumask(cpu)
 	#define topology_core_cpumask(cpu)
+	#define topology_cluster_cpumask(cpu)
 	#define topology_die_cpumask(cpu)
 	#define topology_book_cpumask(cpu)
 	#define topology_drawer_cpumask(cpu)
@@ -39,10 +41,12 @@ not defined by include/asm-XXX/topology.h:
 
 1) topology_physical_package_id: -1
 2) topology_die_id: -1
-3) topology_core_id: 0
-4) topology_sibling_cpumask: just the given CPU
-5) topology_core_cpumask: just the given CPU
-6) topology_die_cpumask: just the given CPU
+3) topology_cluster_id: -1
+4) topology_core_id: 0
+5) topology_sibling_cpumask: just the given CPU
+6) topology_core_cpumask: just the given CPU
+7) topology_cluster_cpumask: just the given CPU
+8) topology_die_cpumask: just the given CPU
 
 For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
 default definitions for topology_book_id() and topology_book_cpumask().
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 4dd14a6620c1..9ab78ad826e2 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -103,6 +103,8 @@ int __init parse_acpi_topology(void)
 			cpu_topology[cpu].thread_id  = -1;
 			cpu_topology[cpu].core_id    = topology_id;
 		}
+		topology_id = find_acpi_cpu_topology_cluster(cpu);
+		cpu_topology[cpu].cluster_id = topology_id;
 		topology_id = find_acpi_cpu_topology_package(cpu);
 		cpu_topology[cpu].package_id = topology_id;
 
diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index fe69dc518f31..701f61c01359 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -746,6 +746,73 @@ int find_acpi_cpu_topology_package(unsigned int cpu)
 					  ACPI_PPTT_PHYSICAL_PACKAGE);
 }
 
+/**
+ * find_acpi_cpu_topology_cluster() - Determine a unique CPU cluster value
+ * @cpu: Kernel logical CPU number
+ *
+ * Determine a topology unique cluster ID for the given CPU/thread.
+ * This ID can then be used to group peers, which will have matching ids.
+ *
+ * The cluster, if present is the level of topology above CPUs. In a
+ * multi-thread CPU, it will be the level above the CPU, not the thread.
+ * It may not exist in single CPU systems. In simple multi-CPU systems,
+ * it may be equal to the package topology level.
+ *
+ * Return: -ENOENT if the PPTT doesn't exist, the CPU cannot be found
+ * or there is no toplogy level above the CPU..
+ * Otherwise returns a value which represents the package for this CPU.
+ */
+
+int find_acpi_cpu_topology_cluster(unsigned int cpu)
+{
+	struct acpi_table_header *table;
+	acpi_status status;
+	struct acpi_pptt_processor *cpu_node, *cluster_node;
+	u32 acpi_cpu_id;
+	int retval;
+	int is_thread;
+
+	status = acpi_get_table(ACPI_SIG_PPTT, 0, &table);
+	if (ACPI_FAILURE(status)) {
+		acpi_pptt_warn_missing();
+		return -ENOENT;
+	}
+
+	acpi_cpu_id = get_acpi_id_for_cpu(cpu);
+	cpu_node = acpi_find_processor_node(table, acpi_cpu_id);
+	if (cpu_node == NULL || !cpu_node->parent) {
+		retval = -ENOENT;
+		goto put_table;
+	}
+
+	is_thread = cpu_node->flags & ACPI_PPTT_ACPI_PROCESSOR_IS_THREAD;
+	cluster_node = fetch_pptt_node(table, cpu_node->parent);
+	if (cluster_node == NULL) {
+		retval = -ENOENT;
+		goto put_table;
+	}
+	if (is_thread) {
+		if (!cluster_node->parent) {
+			retval = -ENOENT;
+			goto put_table;
+		}
+		cluster_node = fetch_pptt_node(table, cluster_node->parent);
+		if (cluster_node == NULL) {
+			retval = -ENOENT;
+			goto put_table;
+		}
+	}
+	if (cluster_node->flags & ACPI_PPTT_ACPI_PROCESSOR_ID_VALID)
+		retval = cluster_node->acpi_processor_id;
+	else
+		retval = ACPI_PTR_DIFF(cluster_node, table);
+
+put_table:
+	acpi_put_table(table);
+
+	return retval;
+}
+
 /**
  * find_acpi_cpu_topology_hetero_id() - Get a core architecture tag
  * @cpu: Kernel logical CPU number
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 43407665918f..fc0836f460fb 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -600,6 +600,11 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 	return core_mask;
 }
 
+const struct cpumask *cpu_clustergroup_mask(int cpu)
+{
+	return &cpu_topology[cpu].cluster_sibling;
+}
+
 void update_siblings_masks(unsigned int cpuid)
 {
 	struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid];
@@ -617,6 +622,12 @@ void update_siblings_masks(unsigned int cpuid)
 		if (cpuid_topo->package_id != cpu_topo->package_id)
 			continue;
 
+		if (cpuid_topo->cluster_id == cpu_topo->cluster_id &&
+		    cpuid_topo->cluster_id != -1) {
+			cpumask_set_cpu(cpu, &cpuid_topo->cluster_sibling);
+			cpumask_set_cpu(cpuid, &cpu_topo->cluster_sibling);
+		}
+
 		cpumask_set_cpu(cpuid, &cpu_topo->core_sibling);
 		cpumask_set_cpu(cpu, &cpuid_topo->core_sibling);
 
@@ -635,6 +646,9 @@ static void clear_cpu_topology(int cpu)
 	cpumask_clear(&cpu_topo->llc_sibling);
 	cpumask_set_cpu(cpu, &cpu_topo->llc_sibling);
 
+	cpumask_clear(&cpu_topo->cluster_sibling);
+	cpumask_set_cpu(cpu, &cpu_topo->cluster_sibling);
+
 	cpumask_clear(&cpu_topo->core_sibling);
 	cpumask_set_cpu(cpu, &cpu_topo->core_sibling);
 	cpumask_clear(&cpu_topo->thread_sibling);
@@ -650,6 +664,7 @@ void __init reset_cpu_topology(void)
 
 		cpu_topo->thread_id = -1;
 		cpu_topo->core_id = -1;
+		cpu_topo->cluster_id = -1;
 		cpu_topo->package_id = -1;
 		cpu_topo->llc_id = -1;
 
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 43c0940643f5..8f2b641d0b8c 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -48,6 +48,9 @@ static DEVICE_ATTR_RO(physical_package_id);
 define_id_show_func(die_id);
 static DEVICE_ATTR_RO(die_id);
 
+define_id_show_func(cluster_id);
+static DEVICE_ATTR_RO(cluster_id);
+
 define_id_show_func(core_id);
 static DEVICE_ATTR_RO(core_id);
 
@@ -63,6 +66,10 @@ define_siblings_read_func(core_siblings, core_cpumask);
 static BIN_ATTR_RO(core_siblings, 0);
 static BIN_ATTR_RO(core_siblings_list, 0);
 
+define_siblings_read_func(cluster_cpus, cluster_cpumask);
+static BIN_ATTR_RO(cluster_cpus, 0);
+static BIN_ATTR_RO(cluster_cpus_list, 0);
+
 define_siblings_read_func(die_cpus, die_cpumask);
 static BIN_ATTR_RO(die_cpus, 0);
 static BIN_ATTR_RO(die_cpus_list, 0);
@@ -94,6 +101,8 @@ static struct bin_attribute *bin_attrs[] = {
 	&bin_attr_thread_siblings_list,
 	&bin_attr_core_siblings,
 	&bin_attr_core_siblings_list,
+	&bin_attr_cluster_cpus,
+	&bin_attr_cluster_cpus_list,
 	&bin_attr_die_cpus,
 	&bin_attr_die_cpus_list,
 	&bin_attr_package_cpus,
@@ -112,6 +121,7 @@ static struct bin_attribute *bin_attrs[] = {
 static struct attribute *default_attrs[] = {
 	&dev_attr_physical_package_id.attr,
 	&dev_attr_die_id.attr,
+	&dev_attr_cluster_id.attr,
 	&dev_attr_core_id.attr,
 #ifdef CONFIG_SCHED_BOOK
 	&dev_attr_book_id.attr,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 974d497a897d..fbc2146050a4 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1353,6 +1353,7 @@ static inline int lpit_read_residency_count_address(u64 *address)
 #ifdef CONFIG_ACPI_PPTT
 int acpi_pptt_cpu_is_thread(unsigned int cpu);
 int find_acpi_cpu_topology(unsigned int cpu, int level);
+int find_acpi_cpu_topology_cluster(unsigned int cpu);
 int find_acpi_cpu_topology_package(unsigned int cpu);
 int find_acpi_cpu_topology_hetero_id(unsigned int cpu);
 int find_acpi_cpu_cache_topology(unsigned int cpu, int level);
@@ -1365,6 +1366,10 @@ static inline int find_acpi_cpu_topology(unsigned int cpu, int level)
 {
 	return -EINVAL;
 }
+static inline int find_acpi_cpu_topology_cluster(unsigned int cpu)
+{
+	return -EINVAL;
+}
 static inline int find_acpi_cpu_topology_package(unsigned int cpu)
 {
 	return -EINVAL;
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index f180240dc95f..b97cea83b25e 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -62,10 +62,12 @@ void topology_set_thermal_pressure(const struct cpumask *cpus,
 struct cpu_topology {
 	int thread_id;
 	int core_id;
+	int cluster_id;
 	int package_id;
 	int llc_id;
 	cpumask_t thread_sibling;
 	cpumask_t core_sibling;
+	cpumask_t cluster_sibling;
 	cpumask_t llc_sibling;
 };
 
@@ -73,13 +75,16 @@ struct cpu_topology {
 extern struct cpu_topology cpu_topology[NR_CPUS];
 
 #define topology_physical_package_id(cpu)	(cpu_topology[cpu].package_id)
+#define topology_cluster_id(cpu)	(cpu_topology[cpu].cluster_id)
 #define topology_core_id(cpu)		(cpu_topology[cpu].core_id)
 #define topology_core_cpumask(cpu)	(&cpu_topology[cpu].core_sibling)
 #define topology_sibling_cpumask(cpu)	(&cpu_topology[cpu].thread_sibling)
+#define topology_cluster_cpumask(cpu)	(&cpu_topology[cpu].cluster_sibling)
 #define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
 void init_cpu_topology(void);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
+const struct cpumask *cpu_clustergroup_mask(int cpu);
 void update_siblings_masks(unsigned int cpu);
 void remove_cpu_topology(unsigned int cpuid);
 void reset_cpu_topology(void);
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7634cd737061..80d27d717631 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -186,6 +186,9 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_die_id
 #define topology_die_id(cpu)			((void)(cpu), -1)
 #endif
+#ifndef topology_cluster_id
+#define topology_cluster_id(cpu)		((void)(cpu), -1)
+#endif
 #ifndef topology_core_id
 #define topology_core_id(cpu)			((void)(cpu), 0)
 #endif
@@ -195,6 +198,9 @@ static inline int cpu_to_mem(int cpu)
 #ifndef topology_core_cpumask
 #define topology_core_cpumask(cpu)		cpumask_of(cpu)
 #endif
+#ifndef topology_cluster_cpumask
+#define topology_cluster_cpumask(cpu)		cpumask_of(cpu)
+#endif
 #ifndef topology_die_cpumask
 #define topology_die_cpumask(cpu)		cpumask_of(cpu)
 #endif
-- 
cgit v1.2.3


From 778c558f49a2cb3dc7b18a80ff515e82aa813627 Mon Sep 17 00:00:00 2001
From: Barry Song <song.bao.hua@hisilicon.com>
Date: Fri, 24 Sep 2021 20:51:03 +1200
Subject: sched: Add cluster scheduler level in core and related Kconfig for
 ARM64

This patch adds scheduler level for clusters and automatically enables
the load balance among clusters. It will directly benefit a lot of
workload which loves more resources such as memory bandwidth, caches.

Testing has widely been done in two different hardware configurations of
Kunpeng920:

 24 cores in one NUMA(6 clusters in each NUMA node);
 32 cores in one NUMA(8 clusters in each NUMA node)

Workload is running on either one NUMA node or four NUMA nodes, thus,
this can estimate the effect of cluster spreading w/ and w/o NUMA load
balance.

* Stream benchmark:

4threads stream (on 1NUMA * 24cores = 24cores)
                stream                 stream
                w/o patch              w/ patch
MB/sec copy     29929.64 (   0.00%)    32932.68 (  10.03%)
MB/sec scale    29861.10 (   0.00%)    32710.58 (   9.54%)
MB/sec add      27034.42 (   0.00%)    32400.68 (  19.85%)
MB/sec triad    27225.26 (   0.00%)    31965.36 (  17.41%)

6threads stream (on 1NUMA * 24cores = 24cores)
                stream                 stream
                w/o patch              w/ patch
MB/sec copy     40330.24 (   0.00%)    42377.68 (   5.08%)
MB/sec scale    40196.42 (   0.00%)    42197.90 (   4.98%)
MB/sec add      37427.00 (   0.00%)    41960.78 (  12.11%)
MB/sec triad    37841.36 (   0.00%)    42513.64 (  12.35%)

12threads stream (on 1NUMA * 24cores = 24cores)
                stream                 stream
                w/o patch              w/ patch
MB/sec copy     52639.82 (   0.00%)    53818.04 (   2.24%)
MB/sec scale    52350.30 (   0.00%)    53253.38 (   1.73%)
MB/sec add      53607.68 (   0.00%)    55198.82 (   2.97%)
MB/sec triad    54776.66 (   0.00%)    56360.40 (   2.89%)

Thus, it could help memory-bound workload especially under medium load.
Similar improvement is also seen in lkp-pbzip2:

* lkp-pbzip2 benchmark

2-96 threads (on 4NUMA * 24cores = 96cores)
                  lkp-pbzip2              lkp-pbzip2
                  w/o patch               w/ patch
Hmean     tput-2   11062841.57 (   0.00%)  11341817.51 *   2.52%*
Hmean     tput-5   26815503.70 (   0.00%)  27412872.65 *   2.23%*
Hmean     tput-8   41873782.21 (   0.00%)  43326212.92 *   3.47%*
Hmean     tput-12  61875980.48 (   0.00%)  64578337.51 *   4.37%*
Hmean     tput-21 105814963.07 (   0.00%) 111381851.01 *   5.26%*
Hmean     tput-30 150349470.98 (   0.00%) 156507070.73 *   4.10%*
Hmean     tput-48 237195937.69 (   0.00%) 242353597.17 *   2.17%*
Hmean     tput-79 360252509.37 (   0.00%) 362635169.23 *   0.66%*
Hmean     tput-96 394571737.90 (   0.00%) 400952978.48 *   1.62%*

2-24 threads (on 1NUMA * 24cores = 24cores)
                 lkp-pbzip2               lkp-pbzip2
                 w/o patch                w/ patch
Hmean     tput-2   11071705.49 (   0.00%)  11296869.10 *   2.03%*
Hmean     tput-4   20782165.19 (   0.00%)  21949232.15 *   5.62%*
Hmean     tput-6   30489565.14 (   0.00%)  33023026.96 *   8.31%*
Hmean     tput-8   40376495.80 (   0.00%)  42779286.27 *   5.95%*
Hmean     tput-12  61264033.85 (   0.00%)  62995632.78 *   2.83%*
Hmean     tput-18  86697139.39 (   0.00%)  86461545.74 (  -0.27%)
Hmean     tput-24 104854637.04 (   0.00%) 104522649.46 *  -0.32%*

In the case of 6 threads and 8 threads, we see the greatest performance
improvement.

Similar improvement can be seen on lkp-pixz though the improvement is
smaller:

* lkp-pixz benchmark

2-24 threads lkp-pixz (on 1NUMA * 24cores = 24cores)
                  lkp-pixz               lkp-pixz
                  w/o patch              w/ patch
Hmean     tput-2   6486981.16 (   0.00%)  6561515.98 *   1.15%*
Hmean     tput-4  11645766.38 (   0.00%) 11614628.43 (  -0.27%)
Hmean     tput-6  15429943.96 (   0.00%) 15957350.76 *   3.42%*
Hmean     tput-8  19974087.63 (   0.00%) 20413746.98 *   2.20%*
Hmean     tput-12 28172068.18 (   0.00%) 28751997.06 *   2.06%*
Hmean     tput-18 39413409.54 (   0.00%) 39896830.55 *   1.23%*
Hmean     tput-24 49101815.85 (   0.00%) 49418141.47 *   0.64%*

* SPECrate benchmark

4,8,16 copies mcf_r(on 1NUMA * 32cores = 32cores)
		Base     	 	Base
		Run Time   	 	Rate
		-------  	 	---------
4 Copies	w/o 580 (w/ 570)       	w/o 11.1 (w/ 11.3)
8 Copies	w/o 647 (w/ 605)       	w/o 20.0 (w/ 21.4, +7%)
16 Copies	w/o 844 (w/ 844)       	w/o 30.6 (w/ 30.6)

32 Copies(on 4NUMA * 32 cores = 128cores)
[w/o patch]
                 Base     Base        Base
Benchmarks       Copies  Run Time     Rate
--------------- -------  ---------  ---------
500.perlbench_r      32        584       87.2  *
502.gcc_r            32        503       90.2  *
505.mcf_r            32        745       69.4  *
520.omnetpp_r        32       1031       40.7  *
523.xalancbmk_r      32        597       56.6  *
525.x264_r            1         --            CE
531.deepsjeng_r      32        336      109    *
541.leela_r          32        556       95.4  *
548.exchange2_r      32        513      163    *
557.xz_r             32        530       65.2  *
 Est. SPECrate2017_int_base              80.3

[w/ patch]
                  Base     Base        Base
Benchmarks       Copies  Run Time     Rate
--------------- -------  ---------  ---------
500.perlbench_r      32        580      87.8 (+0.688%)  *
502.gcc_r            32        477      95.1 (+5.432%)  *
505.mcf_r            32        644      80.3 (+13.574%) *
520.omnetpp_r        32        942      44.6 (+9.58%)   *
523.xalancbmk_r      32        560      60.4 (+6.714%%) *
525.x264_r            1         --           CE
531.deepsjeng_r      32        337      109  (+0.000%) *
541.leela_r          32        554      95.6 (+0.210%) *
548.exchange2_r      32        515      163  (+0.000%) *
557.xz_r             32        524      66.0 (+1.227%) *
 Est. SPECrate2017_int_base              83.7 (+4.062%)

On the other hand, it is slightly helpful to CPU-bound tasks like
kernbench:

* 24-96 threads kernbench (on 4NUMA * 24cores = 96cores)
                     kernbench              kernbench
                     w/o cluster            w/ cluster
Min       user-24    12054.67 (   0.00%)    12024.19 (   0.25%)
Min       syst-24     1751.51 (   0.00%)     1731.68 (   1.13%)
Min       elsp-24      600.46 (   0.00%)      598.64 (   0.30%)
Min       user-48    12361.93 (   0.00%)    12315.32 (   0.38%)
Min       syst-48     1917.66 (   0.00%)     1892.73 (   1.30%)
Min       elsp-48      333.96 (   0.00%)      332.57 (   0.42%)
Min       user-96    12922.40 (   0.00%)    12921.17 (   0.01%)
Min       syst-96     2143.94 (   0.00%)     2110.39 (   1.56%)
Min       elsp-96      211.22 (   0.00%)      210.47 (   0.36%)
Amean     user-24    12063.99 (   0.00%)    12030.78 *   0.28%*
Amean     syst-24     1755.20 (   0.00%)     1735.53 *   1.12%*
Amean     elsp-24      601.60 (   0.00%)      600.19 (   0.23%)
Amean     user-48    12362.62 (   0.00%)    12315.56 *   0.38%*
Amean     syst-48     1921.59 (   0.00%)     1894.95 *   1.39%*
Amean     elsp-48      334.10 (   0.00%)      332.82 *   0.38%*
Amean     user-96    12925.27 (   0.00%)    12922.63 (   0.02%)
Amean     syst-96     2146.66 (   0.00%)     2122.20 *   1.14%*
Amean     elsp-96      211.96 (   0.00%)      211.79 (   0.08%)

Note this patch isn't an universal win, it might hurt those workload
which can benefit from packing. Though tasks which want to take
advantages of lower communication latency of one cluster won't
necessarily been packed in one cluster while kernel is not aware of
clusters, they have some chance to be randomly packed. But this
patch will make them more likely spread.

Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Tested-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/arm64/Kconfig             | 9 +++++++++
 include/linux/sched/topology.h | 7 +++++++
 include/linux/topology.h       | 7 +++++++
 kernel/sched/topology.c        | 5 +++++
 4 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5c7ae4c3954b..d13677f4731d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -989,6 +989,15 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
+config SCHED_CLUSTER
+	bool "Cluster scheduler support"
+	help
+	  Cluster scheduler support improves the CPU scheduler's decision
+	  making when dealing with machines that have clusters of CPUs.
+	  Cluster usually means a couple of CPUs which are placed closely
+	  by sharing mid-level caches, last-level cache tags or internal
+	  busses.
+
 config SCHED_SMT
 	bool "SMT scheduler support"
 	help
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 8f0f778b7c91..2f9166f6dec8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -42,6 +42,13 @@ static inline int cpu_smt_flags(void)
 }
 #endif
 
+#ifdef CONFIG_SCHED_CLUSTER
+static inline int cpu_cluster_flags(void)
+{
+	return SD_SHARE_PKG_RESOURCES;
+}
+#endif
+
 #ifdef CONFIG_SCHED_MC
 static inline int cpu_core_flags(void)
 {
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 80d27d717631..0b3704ad13c8 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -212,6 +212,13 @@ static inline const struct cpumask *cpu_smt_mask(int cpu)
 }
 #endif
 
+#if defined(CONFIG_SCHED_CLUSTER) && !defined(cpu_cluster_mask)
+static inline const struct cpumask *cpu_cluster_mask(int cpu)
+{
+	return topology_cluster_cpumask(cpu);
+}
+#endif
+
 static inline const struct cpumask *cpu_cpu_mask(int cpu)
 {
 	return cpumask_of_node(cpu_to_node(cpu));
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5af3edd34d6d..c1729f9a715f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1638,6 +1638,11 @@ static struct sched_domain_topology_level default_topology[] = {
 #ifdef CONFIG_SCHED_SMT
 	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
 #endif
+
+#ifdef CONFIG_SCHED_CLUSTER
+	{ cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
+
 #ifdef CONFIG_SCHED_MC
 	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
 #endif
-- 
cgit v1.2.3


From 810979682ccc98dbd83f341c18a2e556c30a7164 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 6 Oct 2021 13:18:50 +0200
Subject: irq_work: Allow irq_work_sync() to sleep if irq_work() no IRQ
 support.

irq_work() triggers instantly an interrupt if supported by the
architecture. Otherwise the work will be processed on the next timer
tick. In worst case irq_work_sync() could spin up to a jiffy.

irq_work_sync() is usually used in tear down context which is fully
preemptible. Based on review irq_work_sync() is invoked from preemptible
context and there is one waiter at a time. This qualifies it to use
rcuwait for synchronisation.

Let irq_work_sync() synchronize with rcuwait if the architecture
processes irqwork via the timer tick.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20211006111852.1514359-3-bigeasy@linutronix.de
---
 include/linux/irq_work.h |  3 +++
 kernel/irq_work.c        | 10 ++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index ec2a47a81e42..b48955e9c920 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,7 @@
 #define _LINUX_IRQ_WORK_H
 
 #include <linux/smp_types.h>
+#include <linux/rcuwait.h>
 
 /*
  * An entry can be in one of four states:
@@ -16,11 +17,13 @@
 struct irq_work {
 	struct __call_single_node node;
 	void (*func)(struct irq_work *);
+	struct rcuwait irqwait;
 };
 
 #define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){	\
 	.node = { .u_flags = (_flags), },			\
 	.func = (_func),					\
+	.irqwait = __RCUWAIT_INITIALIZER(irqwait),		\
 }
 
 #define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index db8c248ebc8c..e789beda8297 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -160,6 +160,9 @@ void irq_work_single(void *arg)
 	 * else claimed it meanwhile.
 	 */
 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
+
+	if (!arch_irq_work_has_interrupt())
+		rcuwait_wake_up(&work->irqwait);
 }
 
 static void irq_work_run_list(struct llist_head *list)
@@ -204,6 +207,13 @@ void irq_work_tick(void)
 void irq_work_sync(struct irq_work *work)
 {
 	lockdep_assert_irqs_enabled();
+	might_sleep();
+
+	if (!arch_irq_work_has_interrupt()) {
+		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
+				   TASK_UNINTERRUPTIBLE);
+		return;
+	}
 
 	while (irq_work_is_busy(work))
 		cpu_relax();
-- 
cgit v1.2.3


From 09089db79859cbccccd8df95b034f36f7027efa6 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 6 Oct 2021 13:18:52 +0200
Subject: irq_work: Also rcuwait for !IRQ_WORK_HARD_IRQ on PREEMPT_RT

On PREEMPT_RT most items are processed as LAZY via softirq context.
Avoid to spin-wait for them because irq_work_sync() could have higher
priority and not allow the irq-work to be completed.

Wait additionally for !IRQ_WORK_HARD_IRQ irq_work items on PREEMPT_RT.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20211006111852.1514359-5-bigeasy@linutronix.de
---
 include/linux/irq_work.h | 5 +++++
 kernel/irq_work.c        | 6 ++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index b48955e9c920..8cd11a223260 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -49,6 +49,11 @@ static inline bool irq_work_is_busy(struct irq_work *work)
 	return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
 }
 
+static inline bool irq_work_is_hard(struct irq_work *work)
+{
+	return atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ;
+}
+
 bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index 90b6b56f92e9..f7df715ec28e 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -217,7 +217,8 @@ void irq_work_single(void *arg)
 	 */
 	(void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
 
-	if (!arch_irq_work_has_interrupt())
+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+	    !arch_irq_work_has_interrupt())
 		rcuwait_wake_up(&work->irqwait);
 }
 
@@ -277,7 +278,8 @@ void irq_work_sync(struct irq_work *work)
 	lockdep_assert_irqs_enabled();
 	might_sleep();
 
-	if (!arch_irq_work_has_interrupt()) {
+	if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) ||
+	    !arch_irq_work_has_interrupt()) {
 		rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work),
 				   TASK_UNINTERRUPTIBLE);
 		return;
-- 
cgit v1.2.3


From 8b8ff8cc3b8155c18162e8b1f70e1230db176862 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 7 Sep 2021 19:39:01 +0300
Subject: perf/x86: Add new event for AUX output counter index

PEBS-via-PT records contain a mask of applicable counters. To identify
which event belongs to which counter, a side-band event is needed. Until
now, there has been no side-band event, and consequently users were limited
to using a single event.

Add such a side-band event. Note the event is optimised to output only
when the counter index changes for an event. That works only so long as
all PEBS-via-PT events are scheduled together, which they are for a
recording session because they are in a single group.

Also no attribute bit is used to select the new event, so a new
kernel is not compatible with older perf tools.  The assumption
being that PEBS-via-PT is sufficiently esoteric that users will not
be troubled by this.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210907163903.11820-2-adrian.hunter@intel.com
---
 arch/x86/events/core.c          |  6 ++++++
 arch/x86/events/intel/core.c    | 16 ++++++++++++++++
 arch/x86/events/perf_event.h    |  1 +
 include/linux/perf_event.h      |  1 +
 include/uapi/linux/perf_event.h | 15 +++++++++++++++
 kernel/events/core.c            | 30 ++++++++++++++++++++++++++++++
 6 files changed, 69 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 2a57dbed4894..be33423e9762 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -66,6 +66,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_enable_all,  *x86_pmu.enable_all);
 DEFINE_STATIC_CALL_NULL(x86_pmu_enable,	     *x86_pmu.enable);
 DEFINE_STATIC_CALL_NULL(x86_pmu_disable,     *x86_pmu.disable);
 
+DEFINE_STATIC_CALL_NULL(x86_pmu_assign, *x86_pmu.assign);
+
 DEFINE_STATIC_CALL_NULL(x86_pmu_add,  *x86_pmu.add);
 DEFINE_STATIC_CALL_NULL(x86_pmu_del,  *x86_pmu.del);
 DEFINE_STATIC_CALL_NULL(x86_pmu_read, *x86_pmu.read);
@@ -1215,6 +1217,8 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 	hwc->last_cpu = smp_processor_id();
 	hwc->last_tag = ++cpuc->tags[i];
 
+	static_call_cond(x86_pmu_assign)(event, idx);
+
 	switch (hwc->idx) {
 	case INTEL_PMC_IDX_FIXED_BTS:
 	case INTEL_PMC_IDX_FIXED_VLBR:
@@ -2005,6 +2009,8 @@ static void x86_pmu_static_call_update(void)
 	static_call_update(x86_pmu_enable, x86_pmu.enable);
 	static_call_update(x86_pmu_disable, x86_pmu.disable);
 
+	static_call_update(x86_pmu_assign, x86_pmu.assign);
+
 	static_call_update(x86_pmu_add, x86_pmu.add);
 	static_call_update(x86_pmu_del, x86_pmu.del);
 	static_call_update(x86_pmu_read, x86_pmu.read);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..a555e7c2dce9 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2402,6 +2402,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
 		intel_pmu_pebs_disable(event);
 }
 
+static void intel_pmu_assign_event(struct perf_event *event, int idx)
+{
+	if (is_pebs_pt(event))
+		perf_report_aux_output_id(event, idx);
+}
+
 static void intel_pmu_del_event(struct perf_event *event)
 {
 	if (needs_branch_stack(event))
@@ -4494,8 +4500,16 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
 	return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
 }
 
+static void intel_aux_output_init(void)
+{
+	/* Refer also intel_pmu_aux_output_match() */
+	if (x86_pmu.intel_cap.pebs_output_pt_available)
+		x86_pmu.assign = intel_pmu_assign_event;
+}
+
 static int intel_pmu_aux_output_match(struct perf_event *event)
 {
+	/* intel_pmu_assign_event() is needed, refer intel_aux_output_init() */
 	if (!x86_pmu.intel_cap.pebs_output_pt_available)
 		return 0;
 
@@ -6301,6 +6315,8 @@ __init int intel_pmu_init(void)
 	if (is_hybrid())
 		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
 
+	intel_aux_output_init();
+
 	return 0;
 }
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..76436a55d9ba 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -726,6 +726,7 @@ struct x86_pmu {
 	void		(*enable_all)(int added);
 	void		(*enable)(struct perf_event *);
 	void		(*disable)(struct perf_event *);
+	void		(*assign)(struct perf_event *event, int idx);
 	void		(*add)(struct perf_event *);
 	void		(*del)(struct perf_event *);
 	void		(*read)(struct perf_event *event);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2d510ad750ed..126b3a314f3a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1397,6 +1397,7 @@ perf_event_addr_filters(struct perf_event *event)
 }
 
 extern void perf_event_addr_filters_sync(struct perf_event *event);
+extern void perf_report_aux_output_id(struct perf_event *event, u64 hw_id);
 
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_sample_data *data,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index f92880a15645..c89535de1ec8 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1141,6 +1141,21 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_TEXT_POKE			= 20,
 
+	/*
+	 * Data written to the AUX area by hardware due to aux_output, may need
+	 * to be matched to the event by an architecture-specific hardware ID.
+	 * This records the hardware ID, but requires sample_id to provide the
+	 * event ID. e.g. Intel PT uses this record to disambiguate PEBS-via-PT
+	 * records from multiple events.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				hw_id;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_AUX_OUTPUT_HW_ID		= 21,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cb1f9b8392e..0e90a5084f18 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9062,6 +9062,36 @@ static void perf_log_itrace_start(struct perf_event *event)
 	perf_output_end(&handle);
 }
 
+void perf_report_aux_output_id(struct perf_event *event, u64 hw_id)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct perf_aux_event {
+		struct perf_event_header        header;
+		u64				hw_id;
+	} rec;
+	int ret;
+
+	if (event->parent)
+		event = event->parent;
+
+	rec.header.type	= PERF_RECORD_AUX_OUTPUT_HW_ID;
+	rec.header.misc	= 0;
+	rec.header.size	= sizeof(rec);
+	rec.hw_id	= hw_id;
+
+	perf_event_header__init_id(&rec.header, &sample, event);
+	ret = perf_output_begin(&handle, &sample, event, rec.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, rec);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
 static int
 __perf_event_account_interrupt(struct perf_event *event, int throttle)
 {
-- 
cgit v1.2.3


From d00e60ee54b12de945b8493cf18c1ada9e422514 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 13 Oct 2021 17:19:20 +0800
Subject: page_pool: disable dma mapping support for 32-bit arch with 64-bit
 DMA

As the 32-bit arch with 64-bit DMA seems to rare those days,
and page pool might carry a lot of code and complexity for
systems that possibly.

So disable dma mapping support for such systems, if drivers
really want to work on such systems, they have to implement
their own DMA-mapping fallback tracking outside page_pool.

Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mm_types.h | 13 +------------
 include/net/page_pool.h  | 12 +-----------
 net/core/page_pool.c     | 10 ++++++----
 3 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..436e0946d691 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -104,18 +104,7 @@ struct page {
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			union {
-				/**
-				 * dma_addr_upper: might require a 64-bit
-				 * value on 32-bit architectures.
-				 */
-				unsigned long dma_addr_upper;
-				/**
-				 * For frag page support, not supported in
-				 * 32-bit architectures with 64-bit DMA.
-				 */
-				atomic_long_t pp_frag_count;
-			};
+			atomic_long_t pp_frag_count;
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index a4082406a003..3855f069627f 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -216,24 +216,14 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 	page_pool_put_full_page(pool, page, true);
 }
 
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
-		(sizeof(dma_addr_t) > sizeof(unsigned long))
-
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	dma_addr_t ret = page->dma_addr;
-
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
-		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
-
-	return ret;
+	return page->dma_addr;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
 	page->dma_addr = addr;
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
-		page->dma_addr_upper = upper_32_bits(addr);
 }
 
 static inline void page_pool_set_frag_count(struct page *page, long nr)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 1a6978427d6c..9b60e4301a44 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -49,6 +49,12 @@ static int page_pool_init(struct page_pool *pool,
 	 * which is the XDP_TX use-case.
 	 */
 	if (pool->p.flags & PP_FLAG_DMA_MAP) {
+		/* DMA-mapping is not supported on 32-bit systems with
+		 * 64-bit DMA mapping.
+		 */
+		if (sizeof(dma_addr_t) > sizeof(unsigned long))
+			return -EOPNOTSUPP;
+
 		if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
 		    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
 			return -EINVAL;
@@ -69,10 +75,6 @@ static int page_pool_init(struct page_pool *pool,
 		 */
 	}
 
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
-	    pool->p.flags & PP_FLAG_PAGE_FRAG)
-		return -EINVAL;
-
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 9028b2463c1ea96f51c3ba53e2479346019ff6ad Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Thu, 25 Mar 2021 15:08:44 +0100
Subject: tee: add sec_world_id to struct tee_shm

Adds sec_world_id to struct tee_shm which describes a shared memory
object. sec_world_id can be used by a driver to store an id assigned by
secure world.

Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index 3ebfea0781f1..a1f03461369b 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -197,7 +197,11 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method,
  * @num_pages:	number of locked pages
  * @dmabuf:	dmabuf used to for exporting to user space
  * @flags:	defined by TEE_SHM_* in tee_drv.h
- * @id:		unique id of a shared memory object on this device
+ * @id:		unique id of a shared memory object on this device, shared
+ *		with user space
+ * @sec_world_id:
+ *		secure world assigned id of this shared memory object, not
+ *		used by all drivers
  *
  * This pool is only supposed to be accessed directly from the TEE
  * subsystem and from drivers that implements their own shm pool manager.
@@ -213,6 +217,7 @@ struct tee_shm {
 	struct dma_buf *dmabuf;
 	u32 flags;
 	int id;
+	u64 sec_world_id;
 };
 
 /**
-- 
cgit v1.2.3


From 9955548919c47a6987b40d90a30fd56bbc043e7b Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Date: Fri, 1 Oct 2021 12:12:30 +0200
Subject: remoteproc: Remove vdev_to_rvdev and vdev_to_rproc from remoteproc
 API

These both functions are only used by the remoteproc_virtio.
There is no reason to expose them in the API.
Move the functions in remoteproc_virtio.c

Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20211001101234.4247-4-arnaud.pouliquen@foss.st.com
---
 drivers/remoteproc/remoteproc_virtio.c | 12 ++++++++++++
 include/linux/remoteproc.h             | 12 ------------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index cf4d54e98e6a..70ab496d0431 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -23,6 +23,18 @@
 
 #include "remoteproc_internal.h"
 
+static struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
+{
+	return container_of(vdev->dev.parent, struct rproc_vdev, dev);
+}
+
+static  struct rproc *vdev_to_rproc(struct virtio_device *vdev)
+{
+	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
+
+	return rvdev->rproc;
+}
+
 /* kick the remote processor, and let it know which virtqueue to poke at */
 static bool rproc_virtio_notify(struct virtqueue *vq)
 {
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 83c09ac36b13..e0600e1e5c17 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -684,18 +684,6 @@ int rproc_coredump_add_custom_segment(struct rproc *rproc,
 				      void *priv);
 int rproc_coredump_set_elf_info(struct rproc *rproc, u8 class, u16 machine);
 
-static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
-{
-	return container_of(vdev->dev.parent, struct rproc_vdev, dev);
-}
-
-static inline struct rproc *vdev_to_rproc(struct virtio_device *vdev)
-{
-	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
-
-	return rvdev->rproc;
-}
-
 void rproc_add_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
 
 void rproc_remove_subdev(struct rproc *rproc, struct rproc_subdev *subdev);
-- 
cgit v1.2.3


From 789c1093f02c436b320d78a739f9610c8271cb73 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 11 Oct 2021 21:21:14 +0800
Subject: rtc: class: don't call cdev_device_del() when cdev_device_add()
 failed

I got a null-ptr-deref report when doing fault injection test:

general protection fault, probably for non-canonical address 0xdffffc0000000022: 0000 [#1] SMP KASAN PTI
KASAN: null-ptr-deref in range [0x0000000000000110-0x0000000000000117]
RIP: 0010:device_del+0x132/0xdc0
Call Trace:
 cdev_device_del+0x1a/0x80
 devm_rtc_unregister_device+0x37/0x80
 release_nodes+0xc3/0x3b0

If cdev_device_add() fails, 'dev->p' is not set, it causes
null-ptr-deref when calling cdev_device_del(). Registering
character device is optional, we don't return error code
here, so introduce a new flag 'RTC_NO_CDEV' to indicate
if it has character device, cdev_device_del() is called
when this bit is not set.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211011132114.3663509-1-yangyingliang@huawei.com
---
 drivers/rtc/class.c | 9 ++++++---
 include/linux/rtc.h | 1 +
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index f77bc089eb6b..654e921244bf 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -334,7 +334,8 @@ static void devm_rtc_unregister_device(void *data)
 	 * letting any rtc_class_open() users access it again
 	 */
 	rtc_proc_del_device(rtc);
-	cdev_device_del(&rtc->char_dev, &rtc->dev);
+	if (!test_bit(RTC_NO_CDEV, &rtc->flags))
+		cdev_device_del(&rtc->char_dev, &rtc->dev);
 	rtc->ops = NULL;
 	mutex_unlock(&rtc->ops_lock);
 }
@@ -397,12 +398,14 @@ int __devm_rtc_register_device(struct module *owner, struct rtc_device *rtc)
 	rtc_dev_prepare(rtc);
 
 	err = cdev_device_add(&rtc->char_dev, &rtc->dev);
-	if (err)
+	if (err) {
+		set_bit(RTC_NO_CDEV, &rtc->flags);
 		dev_warn(rtc->dev.parent, "failed to add char device %d:%d\n",
 			 MAJOR(rtc->dev.devt), rtc->id);
-	else
+	} else {
 		dev_dbg(rtc->dev.parent, "char device (%d:%d)\n",
 			MAJOR(rtc->dev.devt), rtc->id);
+	}
 
 	rtc_proc_add_device(rtc);
 
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index bd611e26291d..354e0843ab17 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -80,6 +80,7 @@ struct rtc_timer {
 
 /* flags */
 #define RTC_DEV_BUSY 0
+#define RTC_NO_CDEV  1
 
 struct rtc_device {
 	struct device dev;
-- 
cgit v1.2.3


From 4b2c5fa9c9902ce34ecea6711558d9af96351b31 Mon Sep 17 00:00:00 2001
From: Amir Tzin <amirtz@nvidia.com>
Date: Wed, 21 Jul 2021 16:14:12 +0300
Subject: net/mlx5: Add layout to support default timeouts register

Add needed structures and defines for DTOR (default timeouts register).
This will be used to get timeouts values from FW instead of hard coded
values in the driver code thus enabling support for slower devices which
need longer timeouts.

Signed-off-by: Amir Tzin <amirtz@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/device.h   |  4 +++-
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 37 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 40 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 66eaf0aa7f69..109cc8106d16 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -577,7 +577,9 @@ struct mlx5_init_seg {
 	__be32			rsvd1[120];
 	__be32			initializing;
 	struct health_buffer	health;
-	__be32			rsvd2[880];
+	__be32			rsvd2[878];
+	__be32			cmd_exec_to;
+	__be32			cmd_q_init_to;
 	__be32			internal_timer_h;
 	__be32			internal_timer_l;
 	__be32			rsvd3[2];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 0ca719c00824..ccbd87fbd3bf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -156,6 +156,7 @@ enum {
 	MLX5_REG_MIRC		 = 0x9162,
 	MLX5_REG_SBCAM		 = 0xB01F,
 	MLX5_REG_RESOURCE_DUMP   = 0xC000,
+	MLX5_REG_DTOR            = 0xC00E,
 };
 
 enum mlx5_qpts_trust_state {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 993204a6c1a1..b8bff5109656 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1306,7 +1306,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         vhca_resource_manager[0x1];
 
 	u8         hca_cap_2[0x1];
-	u8         reserved_at_21[0x2];
+	u8         reserved_at_21[0x1];
+	u8         dtor[0x1];
 	u8         event_on_vhca_state_teardown_request[0x1];
 	u8         event_on_vhca_state_in_use[0x1];
 	u8         event_on_vhca_state_active[0x1];
@@ -2807,6 +2808,40 @@ struct mlx5_ifc_dropped_packet_logged_bits {
 	u8         reserved_at_0[0xe0];
 };
 
+struct mlx5_ifc_default_timeout_bits {
+	u8         to_multiplier[0x3];
+	u8         reserved_at_3[0x9];
+	u8         to_value[0x14];
+};
+
+struct mlx5_ifc_dtor_reg_bits {
+	u8         reserved_at_0[0x20];
+
+	struct mlx5_ifc_default_timeout_bits pcie_toggle_to;
+
+	u8         reserved_at_40[0x60];
+
+	struct mlx5_ifc_default_timeout_bits health_poll_to;
+
+	struct mlx5_ifc_default_timeout_bits full_crdump_to;
+
+	struct mlx5_ifc_default_timeout_bits fw_reset_to;
+
+	struct mlx5_ifc_default_timeout_bits flush_on_err_to;
+
+	struct mlx5_ifc_default_timeout_bits pci_sync_update_to;
+
+	struct mlx5_ifc_default_timeout_bits tear_down_to;
+
+	struct mlx5_ifc_default_timeout_bits fsm_reactivate_to;
+
+	struct mlx5_ifc_default_timeout_bits reclaim_pages_to;
+
+	struct mlx5_ifc_default_timeout_bits reclaim_vfs_pages_to;
+
+	u8         reserved_at_1c0[0x40];
+};
+
 enum {
 	MLX5_CQ_ERROR_SYNDROME_CQ_OVERRUN                 = 0x1,
 	MLX5_CQ_ERROR_SYNDROME_CQ_ACCESS_VIOLATION_ERROR  = 0x2,
-- 
cgit v1.2.3


From 5945e1adeab527ec96c75a786213c146d4d482a4 Mon Sep 17 00:00:00 2001
From: Amir Tzin <amirtz@mellanox.com>
Date: Thu, 7 Oct 2021 18:00:27 +0300
Subject: net/mlx5: Read timeout values from init segment

Replace hard coded timeouts with values stored in firmware's init
segment. Timeouts are read from init segment during driver load. If init
segment timeouts are not supported then fallback to hard coded defaults
instead. Also move pre initialization timeouts which cannot be read from
firmware to the new mechanism.

Signed-off-by: Amir Tzin <amirtz@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      | 18 ++--
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c | 96 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h | 28 +++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 38 +++++----
 include/linux/mlx5/driver.h                        |  5 +-
 6 files changed, 161 insertions(+), 26 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 63032cd6efb1..a151575be51f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -17,7 +17,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
 		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
-		fw_reset.o qos.o
+		fw_reset.o qos.o lib/tout.o
 
 #
 # Netdev basic
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 4dc3a822907a..f71ec4d9d68e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -45,6 +45,7 @@
 
 #include "mlx5_core.h"
 #include "lib/eq.h"
+#include "lib/tout.h"
 
 enum {
 	CMD_IF_REV = 5,
@@ -225,9 +226,13 @@ static void set_signature(struct mlx5_cmd_work_ent *ent, int csum)
 
 static void poll_timeout(struct mlx5_cmd_work_ent *ent)
 {
-	unsigned long poll_end = jiffies + msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC + 1000);
+	struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd);
+	u64 cmd_to_ms = mlx5_tout_ms(dev, CMD);
+	unsigned long poll_end;
 	u8 own;
 
+	poll_end = jiffies + msecs_to_jiffies(cmd_to_ms + 1000);
+
 	do {
 		own = READ_ONCE(ent->lay->status_own);
 		if (!(own & CMD_OWNER_HW)) {
@@ -925,15 +930,18 @@ static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
 	struct mlx5_cmd *cmd = ent->cmd;
-	struct mlx5_core_dev *dev = container_of(cmd, struct mlx5_core_dev, cmd);
-	unsigned long cb_timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+	bool poll_cmd = ent->polling;
 	struct mlx5_cmd_layout *lay;
+	struct mlx5_core_dev *dev;
+	unsigned long cb_timeout;
 	struct semaphore *sem;
 	unsigned long flags;
-	bool poll_cmd = ent->polling;
 	int alloc_ret;
 	int cmd_mode;
 
+	dev = container_of(cmd, struct mlx5_core_dev, cmd);
+	cb_timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
+
 	complete(&ent->handling);
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
@@ -1073,7 +1081,7 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
 
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
-	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
+	unsigned long timeout = msecs_to_jiffies(mlx5_tout_ms(dev, CMD));
 	struct mlx5_cmd *cmd = &dev->cmd;
 	int err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
new file mode 100644
index 000000000000..ee266e0d122a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/mlx5/driver.h>
+#include "lib/tout.h"
+
+struct mlx5_timeouts {
+	u64 to[MAX_TIMEOUT_TYPES];
+};
+
+static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = {
+	[MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000,
+	[MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000,
+	[MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2,
+	[MLX5_TO_FW_INIT_MS] = 2000,
+	[MLX5_TO_CMD_MS] = 60000
+};
+
+static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_types type)
+{
+	dev->timeouts->to[type] = val;
+}
+
+static void tout_set_def_val(struct mlx5_core_dev *dev)
+{
+	int i;
+
+	for (i = MLX5_TO_FW_PRE_INIT_TIMEOUT_MS; i < MAX_TIMEOUT_TYPES; i++)
+		tout_set(dev, tout_def_sw_val[i], i);
+}
+
+int mlx5_tout_init(struct mlx5_core_dev *dev)
+{
+	dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL);
+	if (!dev->timeouts)
+		return -ENOMEM;
+
+	tout_set_def_val(dev);
+	return 0;
+}
+
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev)
+{
+	kfree(dev->timeouts);
+}
+
+/* Time register consists of two fields to_multiplier(time out multiplier)
+ * and to_value(time out value). to_value is the quantity of the time units and
+ * to_multiplier is the type and should be one off these four values.
+ * 0x0: millisecond
+ * 0x1: seconds
+ * 0x2: minutes
+ * 0x3: hours
+ * this function converts the time stored in the two register fields into
+ * millisecond.
+ */
+static u64 tout_convert_reg_field_to_ms(u32 to_mul, u32 to_val)
+{
+	u64 msec = to_val;
+
+	to_mul &= 0x3;
+	/* convert hours/minutes/seconds to miliseconds */
+	if (to_mul)
+		msec *= 1000 * int_pow(60, to_mul - 1);
+
+	return msec;
+}
+
+static u64 tout_convert_iseg_to_ms(u32 iseg_to)
+{
+	return tout_convert_reg_field_to_ms(iseg_to >> 29, iseg_to & 0xfffff);
+}
+
+static bool tout_is_supported(struct mlx5_core_dev *dev)
+{
+	return !!ioread32be(&dev->iseg->cmd_q_init_to);
+}
+
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev)
+{
+	u32 to;
+
+	if (!tout_is_supported(dev))
+		return;
+
+	to = ioread32be(&dev->iseg->cmd_q_init_to);
+	tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_FW_INIT_MS);
+
+	to = ioread32be(&dev->iseg->cmd_exec_to);
+	tout_set(dev, tout_convert_iseg_to_ms(to), MLX5_TO_CMD_MS);
+}
+
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type)
+{
+	return dev->timeouts->to[type];
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
new file mode 100644
index 000000000000..7e6fc61c5b45
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef MLX5_TIMEOUTS_H
+#define MLX5_TIMEOUTS_H
+
+enum mlx5_timeouts_types {
+	/* pre init timeouts (not read from FW) */
+	MLX5_TO_FW_PRE_INIT_TIMEOUT_MS,
+	MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS,
+	MLX5_TO_FW_PRE_INIT_WAIT_MS,
+
+	/* init segment timeouts */
+	MLX5_TO_FW_INIT_MS,
+	MLX5_TO_CMD_MS,
+
+	MAX_TIMEOUT_TYPES
+};
+
+struct mlx5_core_dev;
+int mlx5_tout_init(struct mlx5_core_dev *dev);
+void mlx5_tout_cleanup(struct mlx5_core_dev *dev);
+void mlx5_tout_query_iseg(struct mlx5_core_dev *dev);
+u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type);
+
+#define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS)
+
+# endif /* MLX5_TIMEOUTS_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 65313448a47c..b4893eac6ed6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -60,6 +60,7 @@
 #include "devlink.h"
 #include "fw_reset.h"
 #include "lib/mlx5.h"
+#include "lib/tout.h"
 #include "fpga/core.h"
 #include "fpga/ipsec.h"
 #include "accel/ipsec.h"
@@ -176,11 +177,6 @@ static struct mlx5_profile profile[] = {
 	},
 };
 
-#define FW_INIT_TIMEOUT_MILI		2000
-#define FW_INIT_WAIT_MS			2
-#define FW_PRE_INIT_TIMEOUT_MILI	120000
-#define FW_INIT_WARN_MESSAGE_INTERVAL	20000
-
 static int fw_initializing(struct mlx5_core_dev *dev)
 {
 	return ioread32be(&dev->iseg->initializing) >> 31;
@@ -193,8 +189,6 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
 	unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili);
 	int err = 0;
 
-	BUILD_BUG_ON(FW_PRE_INIT_TIMEOUT_MILI < FW_INIT_WARN_MESSAGE_INTERVAL);
-
 	while (fw_initializing(dev)) {
 		if (time_after(jiffies, end)) {
 			err = -EBUSY;
@@ -205,7 +199,7 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
 				       jiffies_to_msecs(end - warn) / 1000);
 			warn = jiffies + msecs_to_jiffies(warn_time_mili);
 		}
-		msleep(FW_INIT_WAIT_MS);
+		msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT));
 	}
 
 	return err;
@@ -975,25 +969,34 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 	if (mlx5_core_is_pf(dev))
 		pcie_print_link_status(dev->pdev);
 
+	err = mlx5_tout_init(dev);
+	if (err) {
+		mlx5_core_err(dev, "Failed initializing timeouts, aborting\n");
+		return err;
+	}
+
 	/* wait for firmware to accept initialization segments configurations
 	 */
-	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI, FW_INIT_WARN_MESSAGE_INTERVAL);
+	err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT),
+			   mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL));
 	if (err) {
-		mlx5_core_err(dev, "Firmware over %d MS in pre-initializing state, aborting\n",
-			      FW_PRE_INIT_TIMEOUT_MILI);
-		return err;
+		mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n",
+			      mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT));
+		goto err_tout_cleanup;
 	}
 
 	err = mlx5_cmd_init(dev);
 	if (err) {
 		mlx5_core_err(dev, "Failed initializing command interface, aborting\n");
-		return err;
+		goto err_tout_cleanup;
 	}
 
-	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
+	mlx5_tout_query_iseg(dev);
+
+	err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0);
 	if (err) {
-		mlx5_core_err(dev, "Firmware over %d MS in initializing state, aborting\n",
-			      FW_INIT_TIMEOUT_MILI);
+		mlx5_core_err(dev, "Firmware over %llu MS in initializing state, aborting\n",
+			      mlx5_tout_ms(dev, FW_INIT));
 		goto err_cmd_cleanup;
 	}
 
@@ -1062,6 +1065,8 @@ err_disable_hca:
 err_cmd_cleanup:
 	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
 	mlx5_cmd_cleanup(dev);
+err_tout_cleanup:
+	mlx5_tout_cleanup(dev);
 
 	return err;
 }
@@ -1080,6 +1085,7 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot)
 	mlx5_core_disable_hca(dev, 0);
 	mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN);
 	mlx5_cmd_cleanup(dev);
+	mlx5_tout_cleanup(dev);
 
 	return 0;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ccbd87fbd3bf..fb06e8870aee 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -66,10 +66,6 @@ enum {
 };
 
 enum {
-	/* one minute for the sake of bringup. Generally, commands must always
-	 * complete and we may need to increase this timeout value
-	 */
-	MLX5_CMD_TIMEOUT_MSEC	= 60 * 1000,
 	MLX5_CMD_WQ_MAX_NAME	= 32,
 };
 
@@ -755,6 +751,7 @@ struct mlx5_core_dev {
 		u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
 		u8  embedded_cpu;
 	} caps;
+	struct mlx5_timeouts	*timeouts;
 	u64			sys_image_guid;
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
-- 
cgit v1.2.3


From fbfa97b4d79f26042f188b84959065213e9d3e99 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Wed, 18 Aug 2021 13:21:30 +0300
Subject: net/mlx5: Disable roce at HCA level

Currently, when a user disables roce via the devlink param, this change
isn't passed down to the device.
If device allows disabling RoCE at device level, make use of it. This
instructs the device to skip memory allocations related to RoCE
functionality which otherwise is done by the device.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c    | 25 ++++++++++++++++++++++-
 include/linux/mlx5/driver.h                       |  9 ++++----
 include/linux/mlx5/mlx5_ifc.h                     |  3 ++-
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index a85341a41cd0..1c98652b244a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -454,7 +454,8 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
 	struct mlx5_core_dev *dev = devlink_priv(devlink);
 	bool new_state = val.vbool;
 
-	if (new_state && !MLX5_CAP_GEN(dev, roce)) {
+	if (new_state && !MLX5_CAP_GEN(dev, roce) &&
+	    !MLX5_CAP_GEN(dev, roce_rw_supported)) {
 		NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 75d284272119..47d92fb459ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -558,15 +558,38 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 		MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix,
 			 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
 
+	if (MLX5_CAP_GEN(dev, roce_rw_supported))
+		MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
+
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
+/* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the
+ * boot process.
+ * In case RoCE cap is writable in FW and user/devlink requested to change the
+ * cap, we are yet to query the final state of the above cap.
+ * Hence, the need for this function.
+ *
+ * Returns
+ * True:
+ * 1) RoCE cap is read only in FW and already disabled
+ * OR:
+ * 2) RoCE cap is writable in FW and user/devlink requested it off.
+ *
+ * In any other case, return False.
+ */
+static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
+{
+	return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
+		(!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
+}
+
 static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
 {
 	void *set_hca_cap;
 	int err;
 
-	if (!MLX5_CAP_GEN(dev, roce))
+	if (is_roce_fw_disabled(dev))
 		return 0;
 
 	err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index fb06e8870aee..7c8b5f06c2cd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1251,11 +1251,12 @@ static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
 {
 	struct devlink *devlink = priv_to_devlink(dev);
 	union devlink_param_value val;
+	int err;
 
-	devlink_param_driverinit_value_get(devlink,
-					   DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
-					   &val);
-	return val.vbool;
+	err = devlink_param_driverinit_value_get(devlink,
+						 DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+						 &val);
+	return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
 }
 
 #endif /* MLX5_DRIVER_H */
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index b8bff5109656..c614ad1da44d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1588,7 +1588,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_tis_per_sq[0x5];
 
 	u8         ext_stride_num_range[0x1];
-	u8         reserved_at_3a1[0x2];
+	u8         roce_rw_supported[0x1];
+	u8         reserved_at_3a2[0x1];
 	u8         log_max_stride_sz_rq[0x5];
 	u8         reserved_at_3a8[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-- 
cgit v1.2.3


From 2ec16ddde1fa31a83aee04320b248e94348d9152 Mon Sep 17 00:00:00 2001
From: Rongwei Liu <rongweil@nvidia.com>
Date: Thu, 16 Sep 2021 10:46:17 +0300
Subject: net/mlx5: Introduce new device index wrapper

Downstream patches.

Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lag.c              | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c       | 2 +-
 include/linux/mlx5/driver.h                                | 5 +++++
 6 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
index 86e079310ac3..ae52e7f38306 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c
@@ -24,7 +24,7 @@ int mlx5e_devlink_port_register(struct mlx5e_priv *priv)
 
 	if (mlx5_core_is_pf(priv->mdev)) {
 		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
-		attrs.phys.port_number = PCI_FUNC(priv->mdev->pdev->devfn);
+		attrs.phys.port_number = mlx5_get_dev_index(priv->mdev);
 		if (MLX5_ESWITCH_MANAGER(priv->mdev)) {
 			mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid);
 			memcpy(attrs.switch_id.id, ppid.id, ppid.id_len);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 20af557ae30c..7f9b96d9537e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -36,7 +36,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
 		return NULL;
 
 	mlx5_esw_get_port_parent_id(dev, &ppid);
-	pfnum = PCI_FUNC(dev->pdev->devfn);
+	pfnum = mlx5_get_dev_index(dev);
 	external = mlx5_core_is_ecpf_esw_manager(dev);
 	if (external)
 		controller_num = dev->priv.eswitch->offloads.host_number + 1;
@@ -149,7 +149,7 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
 	if (IS_ERR(vport))
 		return PTR_ERR(vport);
 
-	pfnum = PCI_FUNC(dev->pdev->devfn);
+	pfnum = mlx5_get_dev_index(dev);
 	mlx5_esw_get_port_parent_id(dev, &ppid);
 	memcpy(dl_port->attrs.switch_id.id, &ppid.id[0], ppid.id_len);
 	dl_port->attrs.switch_id.id_len = ppid.id_len;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ca7e31a1a431..3e5a7d74020b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2798,7 +2798,7 @@ u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
 	int id;
 
 	/* Only 4 bits of pf_num */
-	pf_num = PCI_FUNC(esw->dev->pdev->devfn);
+	pf_num = mlx5_get_dev_index(esw->dev);
 	if (pf_num > max_pf_num)
 		return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index ca5690b0a7ab..f35c8ba48aac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -688,7 +688,7 @@ static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
 				 struct mlx5_core_dev *dev,
 				 struct net_device *netdev)
 {
-	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+	unsigned int fn = mlx5_get_dev_index(dev);
 
 	if (fn >= MLX5_MAX_PORTS)
 		return;
@@ -718,7 +718,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 			       struct mlx5_core_dev *dev)
 {
-	unsigned int fn = PCI_FUNC(dev->pdev->devfn);
+	unsigned int fn = mlx5_get_dev_index(dev);
 
 	if (fn >= MLX5_MAX_PORTS)
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
index 13891fdc607e..e1bb3acf45e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
@@ -323,7 +323,7 @@ mlx5_sf_new_check_attr(struct mlx5_core_dev *dev, const struct devlink_port_new_
 		NL_SET_ERR_MSG_MOD(extack, "External controller is unsupported");
 		return -EOPNOTSUPP;
 	}
-	if (new_attr->pfnum != PCI_FUNC(dev->pdev->devfn)) {
+	if (new_attr->pfnum != mlx5_get_dev_index(dev)) {
 		NL_SET_ERR_MSG_MOD(extack, "Invalid pfnum supplied");
 		return -EOPNOTSUPP;
 	}
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7c8b5f06c2cd..aecc38b90de5 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1243,6 +1243,11 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev)
 	return MLX5_CAP_GEN(dev, native_port_num);
 }
 
+static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev)
+{
+	return PCI_FUNC(dev->pdev->devfn);
+}
+
 enum {
 	MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
-- 
cgit v1.2.3


From 1021d0645d593ea86193c5fc371e33e5b208e14d Mon Sep 17 00:00:00 2001
From: Rongwei Liu <rongweil@nvidia.com>
Date: Tue, 12 Oct 2021 10:40:52 +0300
Subject: net/mlx5: Use native_port_num as 1st option of device index

Using "native_port_num" can support more NICs.

Fallback to PCIe IDs if "native_port_num" query fails.

Signed-off-by: Rongwei Liu <rongweil@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/mlx5/driver.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aecc38b90de5..cf508685abca 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1245,7 +1245,12 @@ static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev)
 
 static inline int mlx5_get_dev_index(struct mlx5_core_dev *dev)
 {
-	return PCI_FUNC(dev->pdev->devfn);
+	int idx = MLX5_CAP_GEN(dev, native_port_num);
+
+	if (idx >= 1 && idx <= MLX5_MAX_PORTS)
+		return idx - 1;
+	else
+		return PCI_FUNC(dev->pdev->devfn);
 }
 
 enum {
-- 
cgit v1.2.3


From 8e141f9eb803e209714a80aa6ec073893f94c526 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 29 Sep 2021 09:12:40 +0200
Subject: block: drain file system I/O on del_gendisk

Instead of delaying draining of file system I/O related items like the
blk-qos queues, the integrity read workqueue and timeouts only when the
request_queue is removed, do that when del_gendisk is called.  This is
important for SCSI where the upper level drivers that control the gendisk
are separate entities, and the disk can be freed much earlier than the
request_queue, or can even be unbound without tearing down the queue.

Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk")
Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Darrick J. Wong <djwong@kernel.org>
Link: https://lore.kernel.org/r/20210929071241.934472-5-hch@lst.de
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c      | 27 ++++++++++++---------------
 block/blk.h           |  1 +
 block/genhd.c         | 21 +++++++++++++++++++++
 include/linux/genhd.h |  1 +
 4 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 43f5da707d8e..4d8f5fe91588 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -49,7 +49,6 @@
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
-#include "blk-rq-qos.h"
 
 struct dentry *blk_debugfs_root;
 
@@ -337,23 +336,25 @@ void blk_put_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_put_queue);
 
-void blk_set_queue_dying(struct request_queue *q)
+void blk_queue_start_drain(struct request_queue *q)
 {
-	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
-
 	/*
 	 * When queue DYING flag is set, we need to block new req
 	 * entering queue, so we call blk_freeze_queue_start() to
 	 * prevent I/O from crossing blk_queue_enter().
 	 */
 	blk_freeze_queue_start(q);
-
 	if (queue_is_mq(q))
 		blk_mq_wake_waiters(q);
-
 	/* Make blk_queue_enter() reexamine the DYING flag. */
 	wake_up_all(&q->mq_freeze_wq);
 }
+
+void blk_set_queue_dying(struct request_queue *q)
+{
+	blk_queue_flag_set(QUEUE_FLAG_DYING, q);
+	blk_queue_start_drain(q);
+}
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
 /**
@@ -385,13 +386,8 @@ void blk_cleanup_queue(struct request_queue *q)
 	 */
 	blk_freeze_queue(q);
 
-	rq_qos_exit(q);
-
 	blk_queue_flag_set(QUEUE_FLAG_DEAD, q);
 
-	/* for synchronous bio-based driver finish in-flight integrity i/o */
-	blk_flush_integrity();
-
 	blk_sync_queue(q);
 	if (queue_is_mq(q))
 		blk_mq_exit_queue(q);
@@ -474,11 +470,12 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 
 static inline int bio_queue_enter(struct bio *bio)
 {
-	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+	struct gendisk *disk = bio->bi_bdev->bd_disk;
+	struct request_queue *q = disk->queue;
 
 	while (!blk_try_enter_queue(q, false)) {
 		if (bio->bi_opf & REQ_NOWAIT) {
-			if (blk_queue_dying(q))
+			if (test_bit(GD_DEAD, &disk->state))
 				goto dead;
 			bio_wouldblock_error(bio);
 			return -EBUSY;
@@ -495,8 +492,8 @@ static inline int bio_queue_enter(struct bio *bio)
 		wait_event(q->mq_freeze_wq,
 			   (!q->mq_freeze_depth &&
 			    blk_pm_resume_queue(false, q)) ||
-			   blk_queue_dying(q));
-		if (blk_queue_dying(q))
+			   test_bit(GD_DEAD, &disk->state));
+		if (test_bit(GD_DEAD, &disk->state))
 			goto dead;
 	}
 
diff --git a/block/blk.h b/block/blk.h
index 7d2a0ba7ed21..e2ed2257709a 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,6 +51,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size,
 void blk_free_flush_queue(struct blk_flush_queue *q);
 
 void blk_freeze_queue(struct request_queue *q);
+void blk_queue_start_drain(struct request_queue *q);
 
 #define BIO_INLINE_VECS 4
 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
diff --git a/block/genhd.c b/block/genhd.c
index 496e8458c357..1fe816be9bcd 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -26,6 +26,7 @@
 #include <linux/badblocks.h>
 
 #include "blk.h"
+#include "blk-rq-qos.h"
 
 static struct kobject *block_depr;
 
@@ -559,6 +560,8 @@ EXPORT_SYMBOL(device_add_disk);
  */
 void del_gendisk(struct gendisk *disk)
 {
+	struct request_queue *q = disk->queue;
+
 	might_sleep();
 
 	if (WARN_ON_ONCE(!disk_live(disk) && !(disk->flags & GENHD_FL_HIDDEN)))
@@ -575,8 +578,26 @@ void del_gendisk(struct gendisk *disk)
 	fsync_bdev(disk->part0);
 	__invalidate_device(disk->part0, true);
 
+	/*
+	 * Fail any new I/O.
+	 */
+	set_bit(GD_DEAD, &disk->state);
 	set_capacity(disk, 0);
 
+	/*
+	 * Prevent new I/O from crossing bio_queue_enter().
+	 */
+	blk_queue_start_drain(q);
+	blk_mq_freeze_queue_wait(q);
+
+	rq_qos_exit(q);
+	blk_sync_queue(q);
+	blk_flush_integrity();
+	/*
+	 * Allow using passthrough request again after the queue is torn down.
+	 */
+	blk_mq_unfreeze_queue(q);
+
 	if (!(disk->flags & GENHD_FL_HIDDEN)) {
 		sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c68d83c87f83..0f5315c2b5a3 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -149,6 +149,7 @@ struct gendisk {
 	unsigned long state;
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
+#define GD_DEAD				2
 
 	struct mutex open_mutex;	/* open/close mutex */
 	unsigned open_partitions;	/* number of open partitions */
-- 
cgit v1.2.3


From 72bf80cf09c4693780ad93a31b48fa5a4e17a946 Mon Sep 17 00:00:00 2001
From: Maíra Canal <maira.canal@usp.br>
Date: Fri, 15 Oct 2021 12:14:35 -0300
Subject: regulator: lp872x: replacing legacy gpio interface for gpiod
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removing all linux/gpio.h and linux/of_gpio.h dependencies and replacing
them with the gpiod interface

Signed-off-by: Maíra Canal <maira.canal@usp.br>
Message-Id: <YWma2yTyuwS5XwhY@fedora>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp872x.c       | 38 +++++++++++++++-----------------------
 include/linux/regulator/lp872x.h | 14 +++++++-------
 2 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c
index e84be29533f4..1dba5dbcd461 100644
--- a/drivers/regulator/lp872x.c
+++ b/drivers/regulator/lp872x.c
@@ -10,13 +10,12 @@
 #include <linux/i2c.h>
 #include <linux/regmap.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/delay.h>
 #include <linux/regulator/lp872x.h>
 #include <linux/regulator/driver.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/regulator/of_regulator.h>
 
 /* Registers : LP8720/8725 shared */
@@ -250,12 +249,12 @@ static int lp872x_regulator_enable_time(struct regulator_dev *rdev)
 }
 
 static void lp872x_set_dvs(struct lp872x *lp, enum lp872x_dvs_sel dvs_sel,
-			int gpio)
+			struct gpio_desc *gpio)
 {
 	enum lp872x_dvs_state state;
 
 	state = dvs_sel == SEL_V1 ? DVS_HIGH : DVS_LOW;
-	gpio_set_value(gpio, state);
+	gpiod_set_value(gpio, state);
 	lp->dvs_pin = state;
 }
 
@@ -321,7 +320,7 @@ static int lp872x_buck_set_voltage_sel(struct regulator_dev *rdev,
 	u8 addr, mask = LP872X_VOUT_M;
 	struct lp872x_dvs *dvs = lp->pdata ? lp->pdata->dvs : NULL;
 
-	if (dvs && gpio_is_valid(dvs->gpio))
+	if (dvs && dvs->gpio)
 		lp872x_set_dvs(lp, dvs->vsel, dvs->gpio);
 
 	addr = lp872x_select_buck_vout_addr(lp, buck);
@@ -675,7 +674,6 @@ static const struct regulator_desc lp8725_regulator_desc[] = {
 
 static int lp872x_init_dvs(struct lp872x *lp)
 {
-	int ret, gpio;
 	struct lp872x_dvs *dvs = lp->pdata ? lp->pdata->dvs : NULL;
 	enum lp872x_dvs_state pinstate;
 	u8 mask[] = { LP8720_EXT_DVS_M, LP8725_DVS1_M | LP8725_DVS2_M };
@@ -684,15 +682,15 @@ static int lp872x_init_dvs(struct lp872x *lp)
 	if (!dvs)
 		goto set_default_dvs_mode;
 
-	gpio = dvs->gpio;
-	if (!gpio_is_valid(gpio))
+	if (!dvs->gpio)
 		goto set_default_dvs_mode;
 
 	pinstate = dvs->init_state;
-	ret = devm_gpio_request_one(lp->dev, gpio, pinstate, "LP872X DVS");
-	if (ret) {
-		dev_err(lp->dev, "gpio request err: %d\n", ret);
-		return ret;
+	dvs->gpio = devm_gpiod_get_optional(lp->dev, "ti,dvs", pinstate);
+
+	if (IS_ERR(dvs->gpio)) {
+		dev_err(lp->dev, "gpio request err: %ld\n", PTR_ERR(dvs->gpio));
+		return PTR_ERR(dvs->gpio);
 	}
 
 	lp->dvs_pin = pinstate;
@@ -706,20 +704,17 @@ set_default_dvs_mode:
 
 static int lp872x_hw_enable(struct lp872x *lp)
 {
-	int ret, gpio;
-
 	if (!lp->pdata)
 		return -EINVAL;
 
-	gpio = lp->pdata->enable_gpio;
-	if (!gpio_is_valid(gpio))
+	if (!lp->pdata->enable_gpio)
 		return 0;
 
 	/* Always set enable GPIO high. */
-	ret = devm_gpio_request_one(lp->dev, gpio, GPIOF_OUT_INIT_HIGH, "LP872X EN");
-	if (ret) {
-		dev_err(lp->dev, "gpio request err: %d\n", ret);
-		return ret;
+	lp->pdata->enable_gpio = devm_gpiod_get_optional(lp->dev, "enable", GPIOD_OUT_HIGH);
+	if (IS_ERR(lp->pdata->enable_gpio)) {
+		dev_err(lp->dev, "gpio request err: %ld\n", PTR_ERR(lp->pdata->enable_gpio));
+		return PTR_ERR(lp->pdata->enable_gpio);
 	}
 
 	/* Each chip has a different enable delay. */
@@ -844,13 +839,10 @@ static struct lp872x_platform_data
 	if (!pdata->dvs)
 		return ERR_PTR(-ENOMEM);
 
-	pdata->dvs->gpio = of_get_named_gpio(np, "ti,dvs-gpio", 0);
 	of_property_read_u8(np, "ti,dvs-vsel", (u8 *)&pdata->dvs->vsel);
 	of_property_read_u8(np, "ti,dvs-state", &dvs_state);
 	pdata->dvs->init_state = dvs_state ? DVS_HIGH : DVS_LOW;
 
-	pdata->enable_gpio = of_get_named_gpio(np, "enable-gpios", 0);
-
 	if (of_get_child_count(np) == 0)
 		goto out;
 
diff --git a/include/linux/regulator/lp872x.h b/include/linux/regulator/lp872x.h
index d780dbb8b423..8e7e0343c6e1 100644
--- a/include/linux/regulator/lp872x.h
+++ b/include/linux/regulator/lp872x.h
@@ -10,7 +10,7 @@
 
 #include <linux/regulator/machine.h>
 #include <linux/platform_device.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 
 #define LP872X_MAX_REGULATORS		9
 
@@ -41,8 +41,8 @@ enum lp872x_regulator_id {
 };
 
 enum lp872x_dvs_state {
-	DVS_LOW  = GPIOF_OUT_INIT_LOW,
-	DVS_HIGH = GPIOF_OUT_INIT_HIGH,
+	DVS_LOW  = GPIOD_OUT_LOW,
+	DVS_HIGH = GPIOD_OUT_HIGH,
 };
 
 enum lp872x_dvs_sel {
@@ -52,12 +52,12 @@ enum lp872x_dvs_sel {
 
 /**
  * lp872x_dvs
- * @gpio       : gpio pin number for dvs control
+ * @gpio       : gpio descriptor for dvs control
  * @vsel       : dvs selector for buck v1 or buck v2 register
  * @init_state : initial dvs pin state
  */
 struct lp872x_dvs {
-	int gpio;
+	struct gpio_desc *gpio;
 	enum lp872x_dvs_sel vsel;
 	enum lp872x_dvs_state init_state;
 };
@@ -78,14 +78,14 @@ struct lp872x_regulator_data {
  * @update_config     : if LP872X_GENERAL_CFG register is updated, set true
  * @regulator_data    : platform regulator id and init data
  * @dvs               : dvs data for buck voltage control
- * @enable_gpio       : gpio pin number for enable control
+ * @enable_gpio       : gpio descriptor for enable control
  */
 struct lp872x_platform_data {
 	u8 general_config;
 	bool update_config;
 	struct lp872x_regulator_data regulator_data[LP872X_MAX_REGULATORS];
 	struct lp872x_dvs *dvs;
-	int enable_gpio;
+	struct gpio_desc *enable_gpio;
 };
 
 #endif
-- 
cgit v1.2.3


From c3f69c7f629ff53c75f335754ea41426b8e095de Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 12 Oct 2021 16:35:14 -0700
Subject: scsi: ata: Switch to attribute groups

struct device supports attribute groups directly but does not support
struct device_attribute directly. Hence switch to attribute groups.

Link: https://lore.kernel.org/r/20211012233558.4066756-3-bvanassche@acm.org
Acked-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/ahci.h        |  8 ++++----
 drivers/ata/ata_piix.c    |  8 +++++---
 drivers/ata/libahci.c     | 52 +++++++++++++++++++++++++++++++----------------
 drivers/ata/libata-sata.c | 19 ++++++++++++-----
 drivers/ata/libata-scsi.c | 15 +++++++++++---
 drivers/ata/pata_macio.c  |  2 +-
 drivers/ata/sata_mv.c     |  2 +-
 drivers/ata/sata_nv.c     |  4 ++--
 drivers/ata/sata_sil24.c  |  2 +-
 include/linux/libata.h    |  8 ++++----
 10 files changed, 79 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h
index 2e89499bd9c3..eeac5482f1d1 100644
--- a/drivers/ata/ahci.h
+++ b/drivers/ata/ahci.h
@@ -376,8 +376,8 @@ struct ahci_host_priv {
 
 extern int ahci_ignore_sss;
 
-extern struct device_attribute *ahci_shost_attrs[];
-extern struct device_attribute *ahci_sdev_attrs[];
+extern const struct attribute_group *ahci_shost_groups[];
+extern const struct attribute_group *ahci_sdev_groups[];
 
 /*
  * This must be instantiated by the edge drivers.  Read the comments
@@ -388,8 +388,8 @@ extern struct device_attribute *ahci_sdev_attrs[];
 	.can_queue		= AHCI_MAX_CMDS,			\
 	.sg_tablesize		= AHCI_MAX_SG,				\
 	.dma_boundary		= AHCI_DMA_BOUNDARY,			\
-	.shost_attrs		= ahci_shost_attrs,			\
-	.sdev_attrs		= ahci_sdev_attrs,			\
+	.shost_groups		= ahci_shost_groups,			\
+	.sdev_groups		= ahci_sdev_groups,			\
 	.change_queue_depth     = ata_scsi_change_queue_depth,		\
 	.tag_alloc_policy       = BLK_TAG_ALLOC_RR,             	\
 	.slave_configure        = ata_scsi_slave_config
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index 3ca7720e7d8f..0b2fcf0d1d6c 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -1085,14 +1085,16 @@ static struct ata_port_operations ich_pata_ops = {
 	.set_dmamode		= ich_set_dmamode,
 };
 
-static struct device_attribute *piix_sidpr_shost_attrs[] = {
-	&dev_attr_link_power_management_policy,
+static struct attribute *piix_sidpr_shost_attrs[] = {
+	&dev_attr_link_power_management_policy.attr,
 	NULL
 };
 
+ATTRIBUTE_GROUPS(piix_sidpr_shost);
+
 static struct scsi_host_template piix_sidpr_sht = {
 	ATA_BMDMA_SHT(DRV_NAME),
-	.shost_attrs		= piix_sidpr_shost_attrs,
+	.shost_groups		= piix_sidpr_shost_groups,
 };
 
 static struct ata_port_operations piix_sidpr_sata_ops = {
diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
index 5b3fa2cbe722..28430c093a7f 100644
--- a/drivers/ata/libahci.c
+++ b/drivers/ata/libahci.c
@@ -108,28 +108,46 @@ static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO,
 		   ahci_read_em_buffer, ahci_store_em_buffer);
 static DEVICE_ATTR(em_message_supported, S_IRUGO, ahci_show_em_supported, NULL);
 
-struct device_attribute *ahci_shost_attrs[] = {
-	&dev_attr_link_power_management_policy,
-	&dev_attr_em_message_type,
-	&dev_attr_em_message,
-	&dev_attr_ahci_host_caps,
-	&dev_attr_ahci_host_cap2,
-	&dev_attr_ahci_host_version,
-	&dev_attr_ahci_port_cmd,
-	&dev_attr_em_buffer,
-	&dev_attr_em_message_supported,
+static struct attribute *ahci_shost_attrs[] = {
+	&dev_attr_link_power_management_policy.attr,
+	&dev_attr_em_message_type.attr,
+	&dev_attr_em_message.attr,
+	&dev_attr_ahci_host_caps.attr,
+	&dev_attr_ahci_host_cap2.attr,
+	&dev_attr_ahci_host_version.attr,
+	&dev_attr_ahci_port_cmd.attr,
+	&dev_attr_em_buffer.attr,
+	&dev_attr_em_message_supported.attr,
 	NULL
 };
-EXPORT_SYMBOL_GPL(ahci_shost_attrs);
 
-struct device_attribute *ahci_sdev_attrs[] = {
-	&dev_attr_sw_activity,
-	&dev_attr_unload_heads,
-	&dev_attr_ncq_prio_supported,
-	&dev_attr_ncq_prio_enable,
+static const struct attribute_group ahci_shost_attr_group = {
+	.attrs = ahci_shost_attrs
+};
+
+const struct attribute_group *ahci_shost_groups[] = {
+	&ahci_shost_attr_group,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ahci_shost_groups);
+
+struct attribute *ahci_sdev_attrs[] = {
+	&dev_attr_sw_activity.attr,
+	&dev_attr_unload_heads.attr,
+	&dev_attr_ncq_prio_supported.attr,
+	&dev_attr_ncq_prio_enable.attr,
+	NULL
+};
+
+static const struct attribute_group ahci_sdev_attr_group = {
+	.attrs = ahci_sdev_attrs
+};
+
+const struct attribute_group *ahci_sdev_groups[] = {
+	&ahci_sdev_attr_group,
 	NULL
 };
-EXPORT_SYMBOL_GPL(ahci_sdev_attrs);
+EXPORT_SYMBOL_GPL(ahci_sdev_groups);
 
 struct ata_port_operations ahci_ops = {
 	.inherits		= &sata_pmp_port_ops,
diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c
index 60418d872c12..4e88597aa9df 100644
--- a/drivers/ata/libata-sata.c
+++ b/drivers/ata/libata-sata.c
@@ -922,13 +922,22 @@ DEVICE_ATTR(ncq_prio_enable, S_IRUGO | S_IWUSR,
 	    ata_ncq_prio_enable_show, ata_ncq_prio_enable_store);
 EXPORT_SYMBOL_GPL(dev_attr_ncq_prio_enable);
 
-struct device_attribute *ata_ncq_sdev_attrs[] = {
-	&dev_attr_unload_heads,
-	&dev_attr_ncq_prio_enable,
-	&dev_attr_ncq_prio_supported,
+struct attribute *ata_ncq_sdev_attrs[] = {
+	&dev_attr_unload_heads.attr,
+	&dev_attr_ncq_prio_enable.attr,
+	&dev_attr_ncq_prio_supported.attr,
 	NULL
 };
-EXPORT_SYMBOL_GPL(ata_ncq_sdev_attrs);
+
+static const struct attribute_group ata_ncq_sdev_attr_group = {
+	.attrs = ata_ncq_sdev_attrs
+};
+
+const struct attribute_group *ata_ncq_sdev_groups[] = {
+	&ata_ncq_sdev_attr_group,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ata_ncq_sdev_groups);
 
 static ssize_t
 ata_scsi_em_message_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 4afe1abc4709..29eb7cca17a4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -234,11 +234,20 @@ static void ata_scsi_set_invalid_parameter(struct ata_device *dev,
 				     field, 0xff, 0);
 }
 
-struct device_attribute *ata_common_sdev_attrs[] = {
-	&dev_attr_unload_heads,
+static struct attribute *ata_common_sdev_attrs[] = {
+	&dev_attr_unload_heads.attr,
 	NULL
 };
-EXPORT_SYMBOL_GPL(ata_common_sdev_attrs);
+
+static const struct attribute_group ata_common_sdev_attr_group = {
+	.attrs = ata_common_sdev_attrs
+};
+
+const struct attribute_group *ata_common_sdev_groups[] = {
+	&ata_common_sdev_attr_group,
+	NULL
+};
+EXPORT_SYMBOL_GPL(ata_common_sdev_groups);
 
 /**
  *	ata_std_bios_param - generic bios head/sector/cylinder calculator used by sd.
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index be0ca8d5b345..16e8aa184a75 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -923,7 +923,7 @@ static struct scsi_host_template pata_macio_sht = {
 	 */
 	.max_segment_size	= MAX_DBDMA_SEG,
 	.slave_configure	= pata_macio_slave_config,
-	.sdev_attrs		= ata_common_sdev_attrs,
+	.sdev_groups		= ata_common_sdev_groups,
 	.can_queue		= ATA_DEF_QUEUE,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 9d86203e1e7a..24130e551b26 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -670,7 +670,7 @@ static struct scsi_host_template mv6_sht = {
 	.can_queue		= MV_MAX_Q_DEPTH - 1,
 	.sg_tablesize		= MV_MAX_SG_CT / 2,
 	.dma_boundary		= MV_DMA_BOUNDARY,
-	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.sdev_groups		= ata_ncq_sdev_groups,
 	.change_queue_depth	= ata_scsi_change_queue_depth,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 	.slave_configure	= ata_scsi_slave_config
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index c385d18ce87b..16272c111208 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -380,7 +380,7 @@ static struct scsi_host_template nv_adma_sht = {
 	.sg_tablesize		= NV_ADMA_SGTBL_TOTAL_LEN,
 	.dma_boundary		= NV_ADMA_DMA_BOUNDARY,
 	.slave_configure	= nv_adma_slave_config,
-	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.sdev_groups		= ata_ncq_sdev_groups,
 	.change_queue_depth     = ata_scsi_change_queue_depth,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
@@ -391,7 +391,7 @@ static struct scsi_host_template nv_swncq_sht = {
 	.sg_tablesize		= LIBATA_MAX_PRD,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.slave_configure	= nv_swncq_slave_config,
-	.sdev_attrs             = ata_ncq_sdev_attrs,
+	.sdev_groups		= ata_ncq_sdev_groups,
 	.change_queue_depth     = ata_scsi_change_queue_depth,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_RR,
 };
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 06a1e27c4f84..f99ec6f7d7c0 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -379,7 +379,7 @@ static struct scsi_host_template sil24_sht = {
 	.sg_tablesize		= SIL24_MAX_SGE,
 	.dma_boundary		= ATA_DMA_BOUNDARY,
 	.tag_alloc_policy	= BLK_TAG_ALLOC_FIFO,
-	.sdev_attrs		= ata_ncq_sdev_attrs,
+	.sdev_groups		= ata_ncq_sdev_groups,
 	.change_queue_depth	= ata_scsi_change_queue_depth,
 	.slave_configure	= ata_scsi_slave_config
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c0c64f03e107..bd1b782d1bbf 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1388,7 +1388,7 @@ extern int ata_link_nr_enabled(struct ata_link *link);
  */
 extern const struct ata_port_operations ata_base_port_ops;
 extern const struct ata_port_operations sata_port_ops;
-extern struct device_attribute *ata_common_sdev_attrs[];
+extern const struct attribute_group *ata_common_sdev_groups[];
 
 /*
  * All sht initializers (BASE, PIO, BMDMA, NCQ) must be instantiated
@@ -1418,14 +1418,14 @@ extern struct device_attribute *ata_common_sdev_attrs[];
 
 #define ATA_BASE_SHT(drv_name)					\
 	ATA_SUBBASE_SHT(drv_name),				\
-	.sdev_attrs		= ata_common_sdev_attrs
+	.sdev_groups		= ata_common_sdev_groups
 
 #ifdef CONFIG_SATA_HOST
-extern struct device_attribute *ata_ncq_sdev_attrs[];
+extern const struct attribute_group *ata_ncq_sdev_groups[];
 
 #define ATA_NCQ_SHT(drv_name)					\
 	ATA_SUBBASE_SHT(drv_name),				\
-	.sdev_attrs		= ata_ncq_sdev_attrs,		\
+	.sdev_groups		= ata_ncq_sdev_groups,		\
 	.change_queue_depth	= ata_scsi_change_queue_depth
 #endif
 
-- 
cgit v1.2.3


From 05593a3fd1037b5fee85d3c8c28112f19e7baa06 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Fri, 27 Aug 2021 12:47:45 +0900
Subject: counter: stm32-lptimer-cnt: Provide defines for clock polarities

The STM32 low-power timer permits configuration of the clock polarity
via the LPTIMX_CFGR register CKPOL bits. This patch provides
preprocessor defines for the supported clock polarities.

Cc: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/a111c8905c467805ca530728f88189b59430f27e.1630031207.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/counter/stm32-lptimer-cnt.c | 6 +++---
 include/linux/mfd/stm32-lptimer.h   | 5 +++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
index 13656957c45f..7367f46c6f91 100644
--- a/drivers/counter/stm32-lptimer-cnt.c
+++ b/drivers/counter/stm32-lptimer-cnt.c
@@ -140,9 +140,9 @@ static const enum counter_function stm32_lptim_cnt_functions[] = {
 };
 
 enum stm32_lptim_synapse_action {
-	STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE,
-	STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE,
-	STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES,
+	STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE = STM32_LPTIM_CKPOL_RISING_EDGE,
+	STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE = STM32_LPTIM_CKPOL_FALLING_EDGE,
+	STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES = STM32_LPTIM_CKPOL_BOTH_EDGES,
 	STM32_LPTIM_SYNAPSE_ACTION_NONE,
 };
 
diff --git a/include/linux/mfd/stm32-lptimer.h b/include/linux/mfd/stm32-lptimer.h
index 90b20550c1c8..06d3f11dc3c9 100644
--- a/include/linux/mfd/stm32-lptimer.h
+++ b/include/linux/mfd/stm32-lptimer.h
@@ -45,6 +45,11 @@
 #define STM32_LPTIM_PRESC	GENMASK(11, 9)
 #define STM32_LPTIM_CKPOL	GENMASK(2, 1)
 
+/* STM32_LPTIM_CKPOL */
+#define STM32_LPTIM_CKPOL_RISING_EDGE	0
+#define STM32_LPTIM_CKPOL_FALLING_EDGE	1
+#define STM32_LPTIM_CKPOL_BOTH_EDGES	2
+
 /* STM32_LPTIM_ARR */
 #define STM32_LPTIM_MAX_ARR	0xFFFF
 
-- 
cgit v1.2.3


From ea434ff82649111de4fcabd76187270f8abdb63a Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Fri, 27 Aug 2021 12:47:46 +0900
Subject: counter: stm32-timer-cnt: Provide defines for slave mode selection

The STM32 timer permits configuration of the counter encoder mode via
the slave mode control register (SMCR) slave mode selection (SMS) bits.
This patch provides preprocessor defines for the supported encoder
modes.

Cc: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/ad3d9cd7af580d586316d368f74964cbc394f981.1630031207.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/counter/stm32-timer-cnt.c | 16 ++++++++--------
 include/linux/mfd/stm32-timers.h  |  4 ++++
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c
index 3fb0debd7425..1fbc46f4ee66 100644
--- a/drivers/counter/stm32-timer-cnt.c
+++ b/drivers/counter/stm32-timer-cnt.c
@@ -93,16 +93,16 @@ static int stm32_count_function_get(struct counter_device *counter,
 	regmap_read(priv->regmap, TIM_SMCR, &smcr);
 
 	switch (smcr & TIM_SMCR_SMS) {
-	case 0:
+	case TIM_SMCR_SMS_SLAVE_MODE_DISABLED:
 		*function = STM32_COUNT_SLAVE_MODE_DISABLED;
 		return 0;
-	case 1:
+	case TIM_SMCR_SMS_ENCODER_MODE_1:
 		*function = STM32_COUNT_ENCODER_MODE_1;
 		return 0;
-	case 2:
+	case TIM_SMCR_SMS_ENCODER_MODE_2:
 		*function = STM32_COUNT_ENCODER_MODE_2;
 		return 0;
-	case 3:
+	case TIM_SMCR_SMS_ENCODER_MODE_3:
 		*function = STM32_COUNT_ENCODER_MODE_3;
 		return 0;
 	default:
@@ -119,16 +119,16 @@ static int stm32_count_function_set(struct counter_device *counter,
 
 	switch (function) {
 	case STM32_COUNT_SLAVE_MODE_DISABLED:
-		sms = 0;
+		sms = TIM_SMCR_SMS_SLAVE_MODE_DISABLED;
 		break;
 	case STM32_COUNT_ENCODER_MODE_1:
-		sms = 1;
+		sms = TIM_SMCR_SMS_ENCODER_MODE_1;
 		break;
 	case STM32_COUNT_ENCODER_MODE_2:
-		sms = 2;
+		sms = TIM_SMCR_SMS_ENCODER_MODE_2;
 		break;
 	case STM32_COUNT_ENCODER_MODE_3:
-		sms = 3;
+		sms = TIM_SMCR_SMS_ENCODER_MODE_3;
 		break;
 	default:
 		return -EINVAL;
diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h
index f8db83aedb2b..5f5c43fd69dd 100644
--- a/include/linux/mfd/stm32-timers.h
+++ b/include/linux/mfd/stm32-timers.h
@@ -82,6 +82,10 @@
 #define MAX_TIM_ICPSC		0x3
 #define TIM_CR2_MMS_SHIFT	4
 #define TIM_CR2_MMS2_SHIFT	20
+#define TIM_SMCR_SMS_SLAVE_MODE_DISABLED	0 /* counts on internal clock when CEN=1 */
+#define TIM_SMCR_SMS_ENCODER_MODE_1		1 /* counts TI1FP1 edges, depending on TI2FP2 level */
+#define TIM_SMCR_SMS_ENCODER_MODE_2		2 /* counts TI2FP2 edges, depending on TI1FP1 level */
+#define TIM_SMCR_SMS_ENCODER_MODE_3		3 /* counts on both TI1FP1 and TI2FP2 edges */
 #define TIM_SMCR_TS_SHIFT	4
 #define TIM_BDTR_BKF_MASK	0xF
 #define TIM_BDTR_BKF_SHIFT(x)	(16 + (x) * 4)
-- 
cgit v1.2.3


From aaec1a0f76ec25f46bbb17b81487c4b0e1c318c5 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Fri, 27 Aug 2021 12:47:47 +0900
Subject: counter: Internalize sysfs interface code

This is a reimplementation of the Generic Counter driver interface.
There are no modifications to the Counter subsystem userspace interface,
so existing userspace applications should continue to run seamlessly.

The purpose of this patch is to internalize the sysfs interface code
among the various counter drivers into a shared module. Counter drivers
pass and take data natively (i.e. u8, u64, etc.) and the shared counter
module handles the translation between the sysfs interface and the
device drivers. This guarantees a standard userspace interface for all
counter drivers, and helps generalize the Generic Counter driver ABI in
order to support the Generic Counter chrdev interface (introduced in a
subsequent patch) without significant changes to the existing counter
drivers.

Note, Counter device registration is the same as before: drivers
populate a struct counter_device with components and callbacks, then
pass the structure to the devm_counter_register function. However,
what's different now is how the Counter subsystem code handles this
registration internally.

Whereas before callbacks would interact directly with sysfs data, this
interaction is now abstracted and instead callbacks interact with native
C data types. The counter_comp structure forms the basis for Counter
extensions.

The counter-sysfs.c file contains the code to parse through the
counter_device structure and register the requested components and
extensions. Attributes are created and populated based on type, with
respective translation functions to handle the mapping between sysfs and
the counter driver callbacks.

The translation performed for each attribute is straightforward: the
attribute type and data is parsed from the counter_attribute structure,
the respective counter driver read/write callback is called, and sysfs
I/O is handled before or after the driver read/write function is called.

Cc: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Cc: Patrick Havelange <patrick.havelange@essensium.com>
Cc: Kamel Bouhara <kamel.bouhara@bootlin.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Syed Nayyar Waris <syednwaris@gmail.com>
Reviewed-by: David Lechner <david@lechnology.com>
Tested-by: David Lechner <david@lechnology.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com> # for stm32
Link: https://lore.kernel.org/r/c68b4a1ffb195c1a2f65e8dd5ad7b7c14e79c6ef.1630031207.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 MAINTAINERS                             |    1 -
 drivers/counter/104-quad-8.c            |  449 ++++------
 drivers/counter/Makefile                |    1 +
 drivers/counter/counter-core.c          |  142 +++
 drivers/counter/counter-sysfs.c         |  849 ++++++++++++++++++
 drivers/counter/counter-sysfs.h         |   13 +
 drivers/counter/counter.c               | 1496 -------------------------------
 drivers/counter/ftm-quaddec.c           |   60 +-
 drivers/counter/intel-qep.c             |  146 ++-
 drivers/counter/interrupt-cnt.c         |   62 +-
 drivers/counter/microchip-tcb-capture.c |   91 +-
 drivers/counter/stm32-lptimer-cnt.c     |  212 +++--
 drivers/counter/stm32-timer-cnt.c       |  179 ++--
 drivers/counter/ti-eqep.c               |  180 ++--
 include/linux/counter.h                 |  658 +++++++-------
 include/linux/counter_enum.h            |   45 -
 16 files changed, 1950 insertions(+), 2634 deletions(-)
 create mode 100644 drivers/counter/counter-core.c
 create mode 100644 drivers/counter/counter-sysfs.c
 create mode 100644 drivers/counter/counter-sysfs.h
 delete mode 100644 drivers/counter/counter.c
 delete mode 100644 include/linux/counter_enum.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index eeb4c70b3d5b..b98a34fcec4b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4804,7 +4804,6 @@ F:	Documentation/ABI/testing/sysfs-bus-counter
 F:	Documentation/driver-api/generic-counter.rst
 F:	drivers/counter/
 F:	include/linux/counter.h
-F:	include/linux/counter_enum.h
 
 CP2615 I2C DRIVER
 M:	Bence Csókás <bence98@sch.bme.hu>
diff --git a/drivers/counter/104-quad-8.c b/drivers/counter/104-quad-8.c
index 0caa60537b14..c587f295d720 100644
--- a/drivers/counter/104-quad-8.c
+++ b/drivers/counter/104-quad-8.c
@@ -117,7 +117,7 @@ static int quad8_signal_read(struct counter_device *counter,
 }
 
 static int quad8_count_read(struct counter_device *counter,
-	struct counter_count *count, unsigned long *val)
+			    struct counter_count *count, u64 *val)
 {
 	struct quad8 *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id;
@@ -148,7 +148,7 @@ static int quad8_count_read(struct counter_device *counter,
 }
 
 static int quad8_count_write(struct counter_device *counter,
-	struct counter_count *count, unsigned long val)
+			     struct counter_count *count, u64 val)
 {
 	struct quad8 *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id;
@@ -188,22 +188,16 @@ static int quad8_count_write(struct counter_device *counter,
 	return 0;
 }
 
-enum quad8_count_function {
-	QUAD8_COUNT_FUNCTION_PULSE_DIRECTION = 0,
-	QUAD8_COUNT_FUNCTION_QUADRATURE_X1,
-	QUAD8_COUNT_FUNCTION_QUADRATURE_X2,
-	QUAD8_COUNT_FUNCTION_QUADRATURE_X4
-};
-
 static const enum counter_function quad8_count_functions_list[] = {
-	[QUAD8_COUNT_FUNCTION_PULSE_DIRECTION] = COUNTER_FUNCTION_PULSE_DIRECTION,
-	[QUAD8_COUNT_FUNCTION_QUADRATURE_X1] = COUNTER_FUNCTION_QUADRATURE_X1_A,
-	[QUAD8_COUNT_FUNCTION_QUADRATURE_X2] = COUNTER_FUNCTION_QUADRATURE_X2_A,
-	[QUAD8_COUNT_FUNCTION_QUADRATURE_X4] = COUNTER_FUNCTION_QUADRATURE_X4
+	COUNTER_FUNCTION_PULSE_DIRECTION,
+	COUNTER_FUNCTION_QUADRATURE_X1_A,
+	COUNTER_FUNCTION_QUADRATURE_X2_A,
+	COUNTER_FUNCTION_QUADRATURE_X4,
 };
 
-static int quad8_function_get(struct counter_device *counter,
-	struct counter_count *count, size_t *function)
+static int quad8_function_read(struct counter_device *counter,
+			       struct counter_count *count,
+			       enum counter_function *function)
 {
 	struct quad8 *const priv = counter->priv;
 	const int id = count->id;
@@ -213,25 +207,26 @@ static int quad8_function_get(struct counter_device *counter,
 	if (priv->quadrature_mode[id])
 		switch (priv->quadrature_scale[id]) {
 		case 0:
-			*function = QUAD8_COUNT_FUNCTION_QUADRATURE_X1;
+			*function = COUNTER_FUNCTION_QUADRATURE_X1_A;
 			break;
 		case 1:
-			*function = QUAD8_COUNT_FUNCTION_QUADRATURE_X2;
+			*function = COUNTER_FUNCTION_QUADRATURE_X2_A;
 			break;
 		case 2:
-			*function = QUAD8_COUNT_FUNCTION_QUADRATURE_X4;
+			*function = COUNTER_FUNCTION_QUADRATURE_X4;
 			break;
 		}
 	else
-		*function = QUAD8_COUNT_FUNCTION_PULSE_DIRECTION;
+		*function = COUNTER_FUNCTION_PULSE_DIRECTION;
 
 	mutex_unlock(&priv->lock);
 
 	return 0;
 }
 
-static int quad8_function_set(struct counter_device *counter,
-	struct counter_count *count, size_t function)
+static int quad8_function_write(struct counter_device *counter,
+				struct counter_count *count,
+				enum counter_function function)
 {
 	struct quad8 *const priv = counter->priv;
 	const int id = count->id;
@@ -247,7 +242,7 @@ static int quad8_function_set(struct counter_device *counter,
 	mode_cfg = priv->count_mode[id] << 1;
 	idr_cfg = priv->index_polarity[id] << 1;
 
-	if (function == QUAD8_COUNT_FUNCTION_PULSE_DIRECTION) {
+	if (function == COUNTER_FUNCTION_PULSE_DIRECTION) {
 		*quadrature_mode = 0;
 
 		/* Quadrature scaling only available in quadrature mode */
@@ -263,15 +258,15 @@ static int quad8_function_set(struct counter_device *counter,
 		*quadrature_mode = 1;
 
 		switch (function) {
-		case QUAD8_COUNT_FUNCTION_QUADRATURE_X1:
+		case COUNTER_FUNCTION_QUADRATURE_X1_A:
 			*scale = 0;
 			mode_cfg |= QUAD8_CMR_QUADRATURE_X1;
 			break;
-		case QUAD8_COUNT_FUNCTION_QUADRATURE_X2:
+		case COUNTER_FUNCTION_QUADRATURE_X2_A:
 			*scale = 1;
 			mode_cfg |= QUAD8_CMR_QUADRATURE_X2;
 			break;
-		case QUAD8_COUNT_FUNCTION_QUADRATURE_X4:
+		case COUNTER_FUNCTION_QUADRATURE_X4:
 			*scale = 2;
 			mode_cfg |= QUAD8_CMR_QUADRATURE_X4;
 			break;
@@ -290,8 +285,9 @@ static int quad8_function_set(struct counter_device *counter,
 	return 0;
 }
 
-static void quad8_direction_get(struct counter_device *counter,
-	struct counter_count *count, enum counter_count_direction *direction)
+static int quad8_direction_read(struct counter_device *counter,
+				struct counter_count *count,
+				enum counter_count_direction *direction)
 {
 	const struct quad8 *const priv = counter->priv;
 	unsigned int ud_flag;
@@ -302,76 +298,74 @@ static void quad8_direction_get(struct counter_device *counter,
 
 	*direction = (ud_flag) ? COUNTER_COUNT_DIRECTION_FORWARD :
 		COUNTER_COUNT_DIRECTION_BACKWARD;
-}
 
-enum quad8_synapse_action {
-	QUAD8_SYNAPSE_ACTION_NONE = 0,
-	QUAD8_SYNAPSE_ACTION_RISING_EDGE,
-	QUAD8_SYNAPSE_ACTION_FALLING_EDGE,
-	QUAD8_SYNAPSE_ACTION_BOTH_EDGES
-};
+	return 0;
+}
 
 static const enum counter_synapse_action quad8_index_actions_list[] = {
-	[QUAD8_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
-	[QUAD8_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
 };
 
 static const enum counter_synapse_action quad8_synapse_actions_list[] = {
-	[QUAD8_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
-	[QUAD8_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	[QUAD8_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
-	[QUAD8_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
 };
 
-static int quad8_action_get(struct counter_device *counter,
-	struct counter_count *count, struct counter_synapse *synapse,
-	size_t *action)
+static int quad8_action_read(struct counter_device *counter,
+			     struct counter_count *count,
+			     struct counter_synapse *synapse,
+			     enum counter_synapse_action *action)
 {
 	struct quad8 *const priv = counter->priv;
 	int err;
-	size_t function = 0;
+	enum counter_function function;
 	const size_t signal_a_id = count->synapses[0].signal->id;
 	enum counter_count_direction direction;
 
 	/* Handle Index signals */
 	if (synapse->signal->id >= 16) {
 		if (priv->preset_enable[count->id])
-			*action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
+			*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 		else
-			*action = QUAD8_SYNAPSE_ACTION_NONE;
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
 
 		return 0;
 	}
 
-	err = quad8_function_get(counter, count, &function);
+	err = quad8_function_read(counter, count, &function);
 	if (err)
 		return err;
 
 	/* Default action mode */
-	*action = QUAD8_SYNAPSE_ACTION_NONE;
+	*action = COUNTER_SYNAPSE_ACTION_NONE;
 
 	/* Determine action mode based on current count function mode */
 	switch (function) {
-	case QUAD8_COUNT_FUNCTION_PULSE_DIRECTION:
+	case COUNTER_FUNCTION_PULSE_DIRECTION:
 		if (synapse->signal->id == signal_a_id)
-			*action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
+			*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 		return 0;
-	case QUAD8_COUNT_FUNCTION_QUADRATURE_X1:
+	case COUNTER_FUNCTION_QUADRATURE_X1_A:
 		if (synapse->signal->id == signal_a_id) {
-			quad8_direction_get(counter, count, &direction);
+			err = quad8_direction_read(counter, count, &direction);
+			if (err)
+				return err;
 
 			if (direction == COUNTER_COUNT_DIRECTION_FORWARD)
-				*action = QUAD8_SYNAPSE_ACTION_RISING_EDGE;
+				*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 			else
-				*action = QUAD8_SYNAPSE_ACTION_FALLING_EDGE;
+				*action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
 		}
 		return 0;
-	case QUAD8_COUNT_FUNCTION_QUADRATURE_X2:
+	case COUNTER_FUNCTION_QUADRATURE_X2_A:
 		if (synapse->signal->id == signal_a_id)
-			*action = QUAD8_SYNAPSE_ACTION_BOTH_EDGES;
+			*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		return 0;
-	case QUAD8_COUNT_FUNCTION_QUADRATURE_X4:
-		*action = QUAD8_SYNAPSE_ACTION_BOTH_EDGES;
+	case COUNTER_FUNCTION_QUADRATURE_X4:
+		*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		return 0;
 	default:
 		/* should never reach this path */
@@ -383,9 +377,9 @@ static const struct counter_ops quad8_ops = {
 	.signal_read = quad8_signal_read,
 	.count_read = quad8_count_read,
 	.count_write = quad8_count_write,
-	.function_get = quad8_function_get,
-	.function_set = quad8_function_set,
-	.action_get = quad8_action_get
+	.function_read = quad8_function_read,
+	.function_write = quad8_function_write,
+	.action_read = quad8_action_read
 };
 
 static const char *const quad8_index_polarity_modes[] = {
@@ -394,7 +388,8 @@ static const char *const quad8_index_polarity_modes[] = {
 };
 
 static int quad8_index_polarity_get(struct counter_device *counter,
-	struct counter_signal *signal, size_t *index_polarity)
+				    struct counter_signal *signal,
+				    u32 *index_polarity)
 {
 	const struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id - 16;
@@ -405,7 +400,8 @@ static int quad8_index_polarity_get(struct counter_device *counter,
 }
 
 static int quad8_index_polarity_set(struct counter_device *counter,
-	struct counter_signal *signal, size_t index_polarity)
+				    struct counter_signal *signal,
+				    u32 index_polarity)
 {
 	struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id - 16;
@@ -426,20 +422,14 @@ static int quad8_index_polarity_set(struct counter_device *counter,
 	return 0;
 }
 
-static struct counter_signal_enum_ext quad8_index_pol_enum = {
-	.items = quad8_index_polarity_modes,
-	.num_items = ARRAY_SIZE(quad8_index_polarity_modes),
-	.get = quad8_index_polarity_get,
-	.set = quad8_index_polarity_set
-};
-
 static const char *const quad8_synchronous_modes[] = {
 	"non-synchronous",
 	"synchronous"
 };
 
 static int quad8_synchronous_mode_get(struct counter_device *counter,
-	struct counter_signal *signal, size_t *synchronous_mode)
+				      struct counter_signal *signal,
+				      u32 *synchronous_mode)
 {
 	const struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id - 16;
@@ -450,7 +440,8 @@ static int quad8_synchronous_mode_get(struct counter_device *counter,
 }
 
 static int quad8_synchronous_mode_set(struct counter_device *counter,
-	struct counter_signal *signal, size_t synchronous_mode)
+				      struct counter_signal *signal,
+				      u32 synchronous_mode)
 {
 	struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id - 16;
@@ -477,22 +468,18 @@ static int quad8_synchronous_mode_set(struct counter_device *counter,
 	return 0;
 }
 
-static struct counter_signal_enum_ext quad8_syn_mode_enum = {
-	.items = quad8_synchronous_modes,
-	.num_items = ARRAY_SIZE(quad8_synchronous_modes),
-	.get = quad8_synchronous_mode_get,
-	.set = quad8_synchronous_mode_set
-};
-
-static ssize_t quad8_count_floor_read(struct counter_device *counter,
-	struct counter_count *count, void *private, char *buf)
+static int quad8_count_floor_read(struct counter_device *counter,
+				  struct counter_count *count, u64 *floor)
 {
 	/* Only a floor of 0 is supported */
-	return sprintf(buf, "0\n");
+	*floor = 0;
+
+	return 0;
 }
 
-static int quad8_count_mode_get(struct counter_device *counter,
-	struct counter_count *count, size_t *cnt_mode)
+static int quad8_count_mode_read(struct counter_device *counter,
+				 struct counter_count *count,
+				 enum counter_count_mode *cnt_mode)
 {
 	const struct quad8 *const priv = counter->priv;
 
@@ -515,26 +502,28 @@ static int quad8_count_mode_get(struct counter_device *counter,
 	return 0;
 }
 
-static int quad8_count_mode_set(struct counter_device *counter,
-	struct counter_count *count, size_t cnt_mode)
+static int quad8_count_mode_write(struct counter_device *counter,
+				  struct counter_count *count,
+				  enum counter_count_mode cnt_mode)
 {
 	struct quad8 *const priv = counter->priv;
+	unsigned int count_mode;
 	unsigned int mode_cfg;
 	const int base_offset = priv->base + 2 * count->id + 1;
 
 	/* Map Generic Counter count mode to 104-QUAD-8 count mode */
 	switch (cnt_mode) {
 	case COUNTER_COUNT_MODE_NORMAL:
-		cnt_mode = 0;
+		count_mode = 0;
 		break;
 	case COUNTER_COUNT_MODE_RANGE_LIMIT:
-		cnt_mode = 1;
+		count_mode = 1;
 		break;
 	case COUNTER_COUNT_MODE_NON_RECYCLE:
-		cnt_mode = 2;
+		count_mode = 2;
 		break;
 	case COUNTER_COUNT_MODE_MODULO_N:
-		cnt_mode = 3;
+		count_mode = 3;
 		break;
 	default:
 		/* should never reach this path */
@@ -543,10 +532,10 @@ static int quad8_count_mode_set(struct counter_device *counter,
 
 	mutex_lock(&priv->lock);
 
-	priv->count_mode[count->id] = cnt_mode;
+	priv->count_mode[count->id] = count_mode;
 
 	/* Set count mode configuration value */
-	mode_cfg = cnt_mode << 1;
+	mode_cfg = count_mode << 1;
 
 	/* Add quadrature mode configuration */
 	if (priv->quadrature_mode[count->id])
@@ -560,56 +549,35 @@ static int quad8_count_mode_set(struct counter_device *counter,
 	return 0;
 }
 
-static struct counter_count_enum_ext quad8_cnt_mode_enum = {
-	.items = counter_count_mode_str,
-	.num_items = ARRAY_SIZE(counter_count_mode_str),
-	.get = quad8_count_mode_get,
-	.set = quad8_count_mode_set
-};
-
-static ssize_t quad8_count_direction_read(struct counter_device *counter,
-	struct counter_count *count, void *priv, char *buf)
-{
-	enum counter_count_direction dir;
-
-	quad8_direction_get(counter, count, &dir);
-
-	return sprintf(buf, "%s\n", counter_count_direction_str[dir]);
-}
-
-static ssize_t quad8_count_enable_read(struct counter_device *counter,
-	struct counter_count *count, void *private, char *buf)
+static int quad8_count_enable_read(struct counter_device *counter,
+				   struct counter_count *count, u8 *enable)
 {
 	const struct quad8 *const priv = counter->priv;
 
-	return sprintf(buf, "%u\n", priv->ab_enable[count->id]);
+	*enable = priv->ab_enable[count->id];
+
+	return 0;
 }
 
-static ssize_t quad8_count_enable_write(struct counter_device *counter,
-	struct counter_count *count, void *private, const char *buf, size_t len)
+static int quad8_count_enable_write(struct counter_device *counter,
+				    struct counter_count *count, u8 enable)
 {
 	struct quad8 *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id;
-	int err;
-	bool ab_enable;
 	unsigned int ior_cfg;
 
-	err = kstrtobool(buf, &ab_enable);
-	if (err)
-		return err;
-
 	mutex_lock(&priv->lock);
 
-	priv->ab_enable[count->id] = ab_enable;
+	priv->ab_enable[count->id] = enable;
 
-	ior_cfg = ab_enable | priv->preset_enable[count->id] << 1;
+	ior_cfg = enable | priv->preset_enable[count->id] << 1;
 
 	/* Load I/O control configuration */
 	outb(QUAD8_CTR_IOR | ior_cfg, base_offset + 1);
 
 	mutex_unlock(&priv->lock);
 
-	return len;
+	return 0;
 }
 
 static const char *const quad8_noise_error_states[] = {
@@ -618,7 +586,7 @@ static const char *const quad8_noise_error_states[] = {
 };
 
 static int quad8_error_noise_get(struct counter_device *counter,
-	struct counter_count *count, size_t *noise_error)
+				 struct counter_count *count, u32 *noise_error)
 {
 	const struct quad8 *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id + 1;
@@ -628,18 +596,14 @@ static int quad8_error_noise_get(struct counter_device *counter,
 	return 0;
 }
 
-static struct counter_count_enum_ext quad8_error_noise_enum = {
-	.items = quad8_noise_error_states,
-	.num_items = ARRAY_SIZE(quad8_noise_error_states),
-	.get = quad8_error_noise_get
-};
-
-static ssize_t quad8_count_preset_read(struct counter_device *counter,
-	struct counter_count *count, void *private, char *buf)
+static int quad8_count_preset_read(struct counter_device *counter,
+				   struct counter_count *count, u64 *preset)
 {
 	const struct quad8 *const priv = counter->priv;
 
-	return sprintf(buf, "%u\n", priv->preset[count->id]);
+	*preset = priv->preset[count->id];
+
+	return 0;
 }
 
 static void quad8_preset_register_set(struct quad8 *const priv, const int id,
@@ -658,16 +622,10 @@ static void quad8_preset_register_set(struct quad8 *const priv, const int id,
 		outb(preset >> (8 * i), base_offset);
 }
 
-static ssize_t quad8_count_preset_write(struct counter_device *counter,
-	struct counter_count *count, void *private, const char *buf, size_t len)
+static int quad8_count_preset_write(struct counter_device *counter,
+				    struct counter_count *count, u64 preset)
 {
 	struct quad8 *const priv = counter->priv;
-	unsigned int preset;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &preset);
-	if (ret)
-		return ret;
 
 	/* Only 24-bit values are supported */
 	if (preset > 0xFFFFFF)
@@ -679,11 +637,11 @@ static ssize_t quad8_count_preset_write(struct counter_device *counter,
 
 	mutex_unlock(&priv->lock);
 
-	return len;
+	return 0;
 }
 
-static ssize_t quad8_count_ceiling_read(struct counter_device *counter,
-	struct counter_count *count, void *private, char *buf)
+static int quad8_count_ceiling_read(struct counter_device *counter,
+				    struct counter_count *count, u64 *ceiling)
 {
 	struct quad8 *const priv = counter->priv;
 
@@ -693,26 +651,23 @@ static ssize_t quad8_count_ceiling_read(struct counter_device *counter,
 	switch (priv->count_mode[count->id]) {
 	case 1:
 	case 3:
-		mutex_unlock(&priv->lock);
-		return sprintf(buf, "%u\n", priv->preset[count->id]);
+		*ceiling = priv->preset[count->id];
+		break;
+	default:
+		/* By default 0x1FFFFFF (25 bits unsigned) is maximum count */
+		*ceiling = 0x1FFFFFF;
+		break;
 	}
 
 	mutex_unlock(&priv->lock);
 
-	/* By default 0x1FFFFFF (25 bits unsigned) is maximum count */
-	return sprintf(buf, "33554431\n");
+	return 0;
 }
 
-static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
-	struct counter_count *count, void *private, const char *buf, size_t len)
+static int quad8_count_ceiling_write(struct counter_device *counter,
+				     struct counter_count *count, u64 ceiling)
 {
 	struct quad8 *const priv = counter->priv;
-	unsigned int ceiling;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &ceiling);
-	if (ret)
-		return ret;
 
 	/* Only 24-bit values are supported */
 	if (ceiling > 0xFFFFFF)
@@ -726,7 +681,7 @@ static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
 	case 3:
 		quad8_preset_register_set(priv, count->id, ceiling);
 		mutex_unlock(&priv->lock);
-		return len;
+		return 0;
 	}
 
 	mutex_unlock(&priv->lock);
@@ -734,27 +689,25 @@ static ssize_t quad8_count_ceiling_write(struct counter_device *counter,
 	return -EINVAL;
 }
 
-static ssize_t quad8_count_preset_enable_read(struct counter_device *counter,
-	struct counter_count *count, void *private, char *buf)
+static int quad8_count_preset_enable_read(struct counter_device *counter,
+					  struct counter_count *count,
+					  u8 *preset_enable)
 {
 	const struct quad8 *const priv = counter->priv;
 
-	return sprintf(buf, "%u\n", !priv->preset_enable[count->id]);
+	*preset_enable = !priv->preset_enable[count->id];
+
+	return 0;
 }
 
-static ssize_t quad8_count_preset_enable_write(struct counter_device *counter,
-	struct counter_count *count, void *private, const char *buf, size_t len)
+static int quad8_count_preset_enable_write(struct counter_device *counter,
+					   struct counter_count *count,
+					   u8 preset_enable)
 {
 	struct quad8 *const priv = counter->priv;
 	const int base_offset = priv->base + 2 * count->id + 1;
-	bool preset_enable;
-	int ret;
 	unsigned int ior_cfg;
 
-	ret = kstrtobool(buf, &preset_enable);
-	if (ret)
-		return ret;
-
 	/* Preset enable is active low in Input/Output Control register */
 	preset_enable = !preset_enable;
 
@@ -762,25 +715,24 @@ static ssize_t quad8_count_preset_enable_write(struct counter_device *counter,
 
 	priv->preset_enable[count->id] = preset_enable;
 
-	ior_cfg = priv->ab_enable[count->id] | (unsigned int)preset_enable << 1;
+	ior_cfg = priv->ab_enable[count->id] | preset_enable << 1;
 
 	/* Load I/O control configuration to Input / Output Control Register */
 	outb(QUAD8_CTR_IOR | ior_cfg, base_offset);
 
 	mutex_unlock(&priv->lock);
 
-	return len;
+	return 0;
 }
 
-static ssize_t quad8_signal_cable_fault_read(struct counter_device *counter,
-					     struct counter_signal *signal,
-					     void *private, char *buf)
+static int quad8_signal_cable_fault_read(struct counter_device *counter,
+					 struct counter_signal *signal,
+					 u8 *cable_fault)
 {
 	struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id / 2;
 	bool disabled;
 	unsigned int status;
-	unsigned int fault;
 
 	mutex_lock(&priv->lock);
 
@@ -797,36 +749,31 @@ static ssize_t quad8_signal_cable_fault_read(struct counter_device *counter,
 	mutex_unlock(&priv->lock);
 
 	/* Mask respective channel and invert logic */
-	fault = !(status & BIT(channel_id));
+	*cable_fault = !(status & BIT(channel_id));
 
-	return sprintf(buf, "%u\n", fault);
+	return 0;
 }
 
-static ssize_t quad8_signal_cable_fault_enable_read(
-	struct counter_device *counter, struct counter_signal *signal,
-	void *private, char *buf)
+static int quad8_signal_cable_fault_enable_read(struct counter_device *counter,
+						struct counter_signal *signal,
+						u8 *enable)
 {
 	const struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id / 2;
-	const unsigned int enb = !!(priv->cable_fault_enable & BIT(channel_id));
 
-	return sprintf(buf, "%u\n", enb);
+	*enable = !!(priv->cable_fault_enable & BIT(channel_id));
+
+	return 0;
 }
 
-static ssize_t quad8_signal_cable_fault_enable_write(
-	struct counter_device *counter, struct counter_signal *signal,
-	void *private, const char *buf, size_t len)
+static int quad8_signal_cable_fault_enable_write(struct counter_device *counter,
+						 struct counter_signal *signal,
+						 u8 enable)
 {
 	struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id / 2;
-	bool enable;
-	int ret;
 	unsigned int cable_fault_enable;
 
-	ret = kstrtobool(buf, &enable);
-	if (ret)
-		return ret;
-
 	mutex_lock(&priv->lock);
 
 	if (enable)
@@ -841,31 +788,27 @@ static ssize_t quad8_signal_cable_fault_enable_write(
 
 	mutex_unlock(&priv->lock);
 
-	return len;
+	return 0;
 }
 
-static ssize_t quad8_signal_fck_prescaler_read(struct counter_device *counter,
-	struct counter_signal *signal, void *private, char *buf)
+static int quad8_signal_fck_prescaler_read(struct counter_device *counter,
+					   struct counter_signal *signal,
+					   u8 *prescaler)
 {
 	const struct quad8 *const priv = counter->priv;
-	const size_t channel_id = signal->id / 2;
 
-	return sprintf(buf, "%u\n", priv->fck_prescaler[channel_id]);
+	*prescaler = priv->fck_prescaler[signal->id / 2];
+
+	return 0;
 }
 
-static ssize_t quad8_signal_fck_prescaler_write(struct counter_device *counter,
-	struct counter_signal *signal, void *private, const char *buf,
-	size_t len)
+static int quad8_signal_fck_prescaler_write(struct counter_device *counter,
+					    struct counter_signal *signal,
+					    u8 prescaler)
 {
 	struct quad8 *const priv = counter->priv;
 	const size_t channel_id = signal->id / 2;
 	const int base_offset = priv->base + 2 * channel_id;
-	u8 prescaler;
-	int ret;
-
-	ret = kstrtou8(buf, 0, &prescaler);
-	if (ret)
-		return ret;
 
 	mutex_lock(&priv->lock);
 
@@ -881,31 +824,30 @@ static ssize_t quad8_signal_fck_prescaler_write(struct counter_device *counter,
 
 	mutex_unlock(&priv->lock);
 
-	return len;
+	return 0;
 }
 
-static const struct counter_signal_ext quad8_signal_ext[] = {
-	{
-		.name = "cable_fault",
-		.read = quad8_signal_cable_fault_read
-	},
-	{
-		.name = "cable_fault_enable",
-		.read = quad8_signal_cable_fault_enable_read,
-		.write = quad8_signal_cable_fault_enable_write
-	},
-	{
-		.name = "filter_clock_prescaler",
-		.read = quad8_signal_fck_prescaler_read,
-		.write = quad8_signal_fck_prescaler_write
-	}
+static struct counter_comp quad8_signal_ext[] = {
+	COUNTER_COMP_SIGNAL_BOOL("cable_fault", quad8_signal_cable_fault_read,
+				 NULL),
+	COUNTER_COMP_SIGNAL_BOOL("cable_fault_enable",
+				 quad8_signal_cable_fault_enable_read,
+				 quad8_signal_cable_fault_enable_write),
+	COUNTER_COMP_SIGNAL_U8("filter_clock_prescaler",
+			       quad8_signal_fck_prescaler_read,
+			       quad8_signal_fck_prescaler_write)
 };
 
-static const struct counter_signal_ext quad8_index_ext[] = {
-	COUNTER_SIGNAL_ENUM("index_polarity", &quad8_index_pol_enum),
-	COUNTER_SIGNAL_ENUM_AVAILABLE("index_polarity",	&quad8_index_pol_enum),
-	COUNTER_SIGNAL_ENUM("synchronous_mode", &quad8_syn_mode_enum),
-	COUNTER_SIGNAL_ENUM_AVAILABLE("synchronous_mode", &quad8_syn_mode_enum)
+static DEFINE_COUNTER_ENUM(quad8_index_pol_enum, quad8_index_polarity_modes);
+static DEFINE_COUNTER_ENUM(quad8_synch_mode_enum, quad8_synchronous_modes);
+
+static struct counter_comp quad8_index_ext[] = {
+	COUNTER_COMP_SIGNAL_ENUM("index_polarity", quad8_index_polarity_get,
+				 quad8_index_polarity_set,
+				 quad8_index_pol_enum),
+	COUNTER_COMP_SIGNAL_ENUM("synchronous_mode", quad8_synchronous_mode_get,
+				 quad8_synchronous_mode_set,
+				 quad8_synch_mode_enum),
 };
 
 #define QUAD8_QUAD_SIGNAL(_id, _name) {		\
@@ -974,39 +916,30 @@ static struct counter_synapse quad8_count_synapses[][3] = {
 	QUAD8_COUNT_SYNAPSES(6), QUAD8_COUNT_SYNAPSES(7)
 };
 
-static const struct counter_count_ext quad8_count_ext[] = {
-	{
-		.name = "ceiling",
-		.read = quad8_count_ceiling_read,
-		.write = quad8_count_ceiling_write
-	},
-	{
-		.name = "floor",
-		.read = quad8_count_floor_read
-	},
-	COUNTER_COUNT_ENUM("count_mode", &quad8_cnt_mode_enum),
-	COUNTER_COUNT_ENUM_AVAILABLE("count_mode", &quad8_cnt_mode_enum),
-	{
-		.name = "direction",
-		.read = quad8_count_direction_read
-	},
-	{
-		.name = "enable",
-		.read = quad8_count_enable_read,
-		.write = quad8_count_enable_write
-	},
-	COUNTER_COUNT_ENUM("error_noise", &quad8_error_noise_enum),
-	COUNTER_COUNT_ENUM_AVAILABLE("error_noise", &quad8_error_noise_enum),
-	{
-		.name = "preset",
-		.read = quad8_count_preset_read,
-		.write = quad8_count_preset_write
-	},
-	{
-		.name = "preset_enable",
-		.read = quad8_count_preset_enable_read,
-		.write = quad8_count_preset_enable_write
-	}
+static const enum counter_count_mode quad8_cnt_modes[] = {
+	COUNTER_COUNT_MODE_NORMAL,
+	COUNTER_COUNT_MODE_RANGE_LIMIT,
+	COUNTER_COUNT_MODE_NON_RECYCLE,
+	COUNTER_COUNT_MODE_MODULO_N,
+};
+
+static DEFINE_COUNTER_AVAILABLE(quad8_count_mode_available, quad8_cnt_modes);
+
+static DEFINE_COUNTER_ENUM(quad8_error_noise_enum, quad8_noise_error_states);
+
+static struct counter_comp quad8_count_ext[] = {
+	COUNTER_COMP_CEILING(quad8_count_ceiling_read,
+			     quad8_count_ceiling_write),
+	COUNTER_COMP_FLOOR(quad8_count_floor_read, NULL),
+	COUNTER_COMP_COUNT_MODE(quad8_count_mode_read, quad8_count_mode_write,
+				quad8_count_mode_available),
+	COUNTER_COMP_DIRECTION(quad8_direction_read),
+	COUNTER_COMP_ENABLE(quad8_count_enable_read, quad8_count_enable_write),
+	COUNTER_COMP_COUNT_ENUM("error_noise", quad8_error_noise_get, NULL,
+				quad8_error_noise_enum),
+	COUNTER_COMP_PRESET(quad8_count_preset_read, quad8_count_preset_write),
+	COUNTER_COMP_PRESET_ENABLE(quad8_count_preset_enable_read,
+				   quad8_count_preset_enable_write),
 };
 
 #define QUAD8_COUNT(_id, _cntname) {					\
diff --git a/drivers/counter/Makefile b/drivers/counter/Makefile
index 19742e6f5e3e..1ab7e087fdc2 100644
--- a/drivers/counter/Makefile
+++ b/drivers/counter/Makefile
@@ -4,6 +4,7 @@
 #
 
 obj-$(CONFIG_COUNTER) += counter.o
+counter-y := counter-core.o counter-sysfs.o
 
 obj-$(CONFIG_104_QUAD_8)	+= 104-quad-8.o
 obj-$(CONFIG_INTERRUPT_CNT)		+= interrupt-cnt.o
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
new file mode 100644
index 000000000000..c533a6ff12cf
--- /dev/null
+++ b/drivers/counter/counter-core.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic Counter interface
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#include <linux/counter.h>
+#include <linux/device.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include "counter-sysfs.h"
+
+/* Provides a unique ID for each counter device */
+static DEFINE_IDA(counter_ida);
+
+static void counter_device_release(struct device *dev)
+{
+	ida_free(&counter_ida, dev->id);
+}
+
+static struct device_type counter_device_type = {
+	.name = "counter_device",
+	.release = counter_device_release,
+};
+
+static struct bus_type counter_bus_type = {
+	.name = "counter",
+	.dev_name = "counter",
+};
+
+/**
+ * counter_register - register Counter to the system
+ * @counter:	pointer to Counter to register
+ *
+ * This function registers a Counter to the system. A sysfs "counter" directory
+ * will be created and populated with sysfs attributes correlating with the
+ * Counter Signals, Synapses, and Counts respectively.
+ */
+int counter_register(struct counter_device *const counter)
+{
+	struct device *const dev = &counter->dev;
+	int id;
+	int err;
+
+	/* Acquire unique ID */
+	id = ida_alloc(&counter_ida, GFP_KERNEL);
+	if (id < 0)
+		return id;
+
+	/* Configure device structure for Counter */
+	dev->id = id;
+	dev->type = &counter_device_type;
+	dev->bus = &counter_bus_type;
+	if (counter->parent) {
+		dev->parent = counter->parent;
+		dev->of_node = counter->parent->of_node;
+	}
+	device_initialize(dev);
+	dev_set_drvdata(dev, counter);
+
+	/* Add Counter sysfs attributes */
+	err = counter_sysfs_add(counter);
+	if (err < 0)
+		goto err_free_id;
+
+	/* Add device to system */
+	err = device_add(dev);
+	if (err < 0)
+		goto err_free_id;
+
+	return 0;
+
+err_free_id:
+	put_device(dev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(counter_register);
+
+/**
+ * counter_unregister - unregister Counter from the system
+ * @counter:	pointer to Counter to unregister
+ *
+ * The Counter is unregistered from the system.
+ */
+void counter_unregister(struct counter_device *const counter)
+{
+	if (!counter)
+		return;
+
+	device_unregister(&counter->dev);
+}
+EXPORT_SYMBOL_GPL(counter_unregister);
+
+static void devm_counter_release(void *counter)
+{
+	counter_unregister(counter);
+}
+
+/**
+ * devm_counter_register - Resource-managed counter_register
+ * @dev:	device to allocate counter_device for
+ * @counter:	pointer to Counter to register
+ *
+ * Managed counter_register. The Counter registered with this function is
+ * automatically unregistered on driver detach. This function calls
+ * counter_register internally. Refer to that function for more information.
+ *
+ * RETURNS:
+ * 0 on success, negative error number on failure.
+ */
+int devm_counter_register(struct device *dev,
+			  struct counter_device *const counter)
+{
+	int err;
+
+	err = counter_register(counter);
+	if (err < 0)
+		return err;
+
+	return devm_add_action_or_reset(dev, devm_counter_release, counter);
+}
+EXPORT_SYMBOL_GPL(devm_counter_register);
+
+static int __init counter_init(void)
+{
+	return bus_register(&counter_bus_type);
+}
+
+static void __exit counter_exit(void)
+{
+	bus_unregister(&counter_bus_type);
+}
+
+subsys_initcall(counter_init);
+module_exit(counter_exit);
+
+MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
+MODULE_DESCRIPTION("Generic Counter interface");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
new file mode 100644
index 000000000000..108cbd838eb9
--- /dev/null
+++ b/drivers/counter/counter-sysfs.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic Counter sysfs interface
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#include <linux/counter.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/kstrtox.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include "counter-sysfs.h"
+
+/**
+ * struct counter_attribute - Counter sysfs attribute
+ * @dev_attr:	device attribute for sysfs
+ * @l:		node to add Counter attribute to attribute group list
+ * @comp:	Counter component callbacks and data
+ * @scope:	Counter scope of the attribute
+ * @parent:	pointer to the parent component
+ */
+struct counter_attribute {
+	struct device_attribute dev_attr;
+	struct list_head l;
+
+	struct counter_comp comp;
+	enum counter_scope scope;
+	void *parent;
+};
+
+#define to_counter_attribute(_dev_attr) \
+	container_of(_dev_attr, struct counter_attribute, dev_attr)
+
+/**
+ * struct counter_attribute_group - container for attribute group
+ * @name:	name of the attribute group
+ * @attr_list:	list to keep track of created attributes
+ * @num_attr:	number of attributes
+ */
+struct counter_attribute_group {
+	const char *name;
+	struct list_head attr_list;
+	size_t num_attr;
+};
+
+static const char *const counter_function_str[] = {
+	[COUNTER_FUNCTION_INCREASE] = "increase",
+	[COUNTER_FUNCTION_DECREASE] = "decrease",
+	[COUNTER_FUNCTION_PULSE_DIRECTION] = "pulse-direction",
+	[COUNTER_FUNCTION_QUADRATURE_X1_A] = "quadrature x1 a",
+	[COUNTER_FUNCTION_QUADRATURE_X1_B] = "quadrature x1 b",
+	[COUNTER_FUNCTION_QUADRATURE_X2_A] = "quadrature x2 a",
+	[COUNTER_FUNCTION_QUADRATURE_X2_B] = "quadrature x2 b",
+	[COUNTER_FUNCTION_QUADRATURE_X4] = "quadrature x4"
+};
+
+static const char *const counter_signal_value_str[] = {
+	[COUNTER_SIGNAL_LEVEL_LOW] = "low",
+	[COUNTER_SIGNAL_LEVEL_HIGH] = "high"
+};
+
+static const char *const counter_synapse_action_str[] = {
+	[COUNTER_SYNAPSE_ACTION_NONE] = "none",
+	[COUNTER_SYNAPSE_ACTION_RISING_EDGE] = "rising edge",
+	[COUNTER_SYNAPSE_ACTION_FALLING_EDGE] = "falling edge",
+	[COUNTER_SYNAPSE_ACTION_BOTH_EDGES] = "both edges"
+};
+
+static const char *const counter_count_direction_str[] = {
+	[COUNTER_COUNT_DIRECTION_FORWARD] = "forward",
+	[COUNTER_COUNT_DIRECTION_BACKWARD] = "backward"
+};
+
+static const char *const counter_count_mode_str[] = {
+	[COUNTER_COUNT_MODE_NORMAL] = "normal",
+	[COUNTER_COUNT_MODE_RANGE_LIMIT] = "range limit",
+	[COUNTER_COUNT_MODE_NON_RECYCLE] = "non-recycle",
+	[COUNTER_COUNT_MODE_MODULO_N] = "modulo-n"
+};
+
+static ssize_t counter_comp_u8_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	int err;
+	u8 data = 0;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u8_read(counter, &data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u8_read(counter, a->parent, &data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		err = a->comp.count_u8_read(counter, a->parent, &data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	if (a->comp.type == COUNTER_COMP_BOOL)
+		/* data should already be boolean but ensure just to be safe */
+		data = !!data;
+
+	return sprintf(buf, "%u\n", (unsigned int)data);
+}
+
+static ssize_t counter_comp_u8_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t len)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	int err;
+	bool bool_data = 0;
+	u8 data = 0;
+
+	if (a->comp.type == COUNTER_COMP_BOOL) {
+		err = kstrtobool(buf, &bool_data);
+		data = bool_data;
+	} else
+		err = kstrtou8(buf, 0, &data);
+	if (err < 0)
+		return err;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u8_write(counter, data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u8_write(counter, a->parent, data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		err = a->comp.count_u8_write(counter, a->parent, data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	return len;
+}
+
+static ssize_t counter_comp_u32_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	const struct counter_available *const avail = a->comp.priv;
+	int err;
+	u32 data = 0;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u32_read(counter, &data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u32_read(counter, a->parent, &data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		if (a->comp.type == COUNTER_COMP_SYNAPSE_ACTION)
+			err = a->comp.action_read(counter, a->parent,
+						  a->comp.priv, &data);
+		else
+			err = a->comp.count_u32_read(counter, a->parent, &data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	switch (a->comp.type) {
+	case COUNTER_COMP_FUNCTION:
+		return sysfs_emit(buf, "%s\n", counter_function_str[data]);
+	case COUNTER_COMP_SIGNAL_LEVEL:
+		return sysfs_emit(buf, "%s\n", counter_signal_value_str[data]);
+	case COUNTER_COMP_SYNAPSE_ACTION:
+		return sysfs_emit(buf, "%s\n", counter_synapse_action_str[data]);
+	case COUNTER_COMP_ENUM:
+		return sysfs_emit(buf, "%s\n", avail->strs[data]);
+	case COUNTER_COMP_COUNT_DIRECTION:
+		return sysfs_emit(buf, "%s\n", counter_count_direction_str[data]);
+	case COUNTER_COMP_COUNT_MODE:
+		return sysfs_emit(buf, "%s\n", counter_count_mode_str[data]);
+	default:
+		return sprintf(buf, "%u\n", (unsigned int)data);
+	}
+}
+
+static int counter_find_enum(u32 *const enum_item, const u32 *const enums,
+			     const size_t num_enums, const char *const buf,
+			     const char *const string_array[])
+{
+	size_t index;
+
+	for (index = 0; index < num_enums; index++) {
+		*enum_item = enums[index];
+		if (sysfs_streq(buf, string_array[*enum_item]))
+			return 0;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t counter_comp_u32_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t len)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	struct counter_count *const count = a->parent;
+	struct counter_synapse *const synapse = a->comp.priv;
+	const struct counter_available *const avail = a->comp.priv;
+	int err;
+	u32 data = 0;
+
+	switch (a->comp.type) {
+	case COUNTER_COMP_FUNCTION:
+		err = counter_find_enum(&data, count->functions_list,
+					count->num_functions, buf,
+					counter_function_str);
+		break;
+	case COUNTER_COMP_SYNAPSE_ACTION:
+		err = counter_find_enum(&data, synapse->actions_list,
+					synapse->num_actions, buf,
+					counter_synapse_action_str);
+		break;
+	case COUNTER_COMP_ENUM:
+		err = __sysfs_match_string(avail->strs, avail->num_items, buf);
+		data = err;
+		break;
+	case COUNTER_COMP_COUNT_MODE:
+		err = counter_find_enum(&data, avail->enums, avail->num_items,
+					buf, counter_count_mode_str);
+		break;
+	default:
+		err = kstrtou32(buf, 0, &data);
+		break;
+	}
+	if (err < 0)
+		return err;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u32_write(counter, data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u32_write(counter, a->parent, data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		if (a->comp.type == COUNTER_COMP_SYNAPSE_ACTION)
+			err = a->comp.action_write(counter, count, synapse,
+						   data);
+		else
+			err = a->comp.count_u32_write(counter, count, data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	return len;
+}
+
+static ssize_t counter_comp_u64_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	int err;
+	u64 data = 0;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u64_read(counter, &data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u64_read(counter, a->parent, &data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		err = a->comp.count_u64_read(counter, a->parent, &data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	return sprintf(buf, "%llu\n", (unsigned long long)data);
+}
+
+static ssize_t counter_comp_u64_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t len)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	struct counter_device *const counter = dev_get_drvdata(dev);
+	int err;
+	u64 data = 0;
+
+	err = kstrtou64(buf, 0, &data);
+	if (err < 0)
+		return err;
+
+	switch (a->scope) {
+	case COUNTER_SCOPE_DEVICE:
+		err = a->comp.device_u64_write(counter, data);
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		err = a->comp.signal_u64_write(counter, a->parent, data);
+		break;
+	case COUNTER_SCOPE_COUNT:
+		err = a->comp.count_u64_write(counter, a->parent, data);
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (err < 0)
+		return err;
+
+	return len;
+}
+
+static ssize_t enums_available_show(const u32 *const enums,
+				    const size_t num_enums,
+				    const char *const strs[], char *buf)
+{
+	size_t len = 0;
+	size_t index;
+
+	for (index = 0; index < num_enums; index++)
+		len += sysfs_emit_at(buf, len, "%s\n", strs[enums[index]]);
+
+	return len;
+}
+
+static ssize_t strs_available_show(const struct counter_available *const avail,
+				   char *buf)
+{
+	size_t len = 0;
+	size_t index;
+
+	for (index = 0; index < avail->num_items; index++)
+		len += sysfs_emit_at(buf, len, "%s\n", avail->strs[index]);
+
+	return len;
+}
+
+static ssize_t counter_comp_available_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	const struct counter_attribute *const a = to_counter_attribute(attr);
+	const struct counter_count *const count = a->parent;
+	const struct counter_synapse *const synapse = a->comp.priv;
+	const struct counter_available *const avail = a->comp.priv;
+
+	switch (a->comp.type) {
+	case COUNTER_COMP_FUNCTION:
+		return enums_available_show(count->functions_list,
+					    count->num_functions,
+					    counter_function_str, buf);
+	case COUNTER_COMP_SYNAPSE_ACTION:
+		return enums_available_show(synapse->actions_list,
+					    synapse->num_actions,
+					    counter_synapse_action_str, buf);
+	case COUNTER_COMP_ENUM:
+		return strs_available_show(avail, buf);
+	case COUNTER_COMP_COUNT_MODE:
+		return enums_available_show(avail->enums, avail->num_items,
+					    counter_count_mode_str, buf);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int counter_avail_attr_create(struct device *const dev,
+	struct counter_attribute_group *const group,
+	const struct counter_comp *const comp, void *const parent)
+{
+	struct counter_attribute *counter_attr;
+	struct device_attribute *dev_attr;
+
+	counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
+	if (!counter_attr)
+		return -ENOMEM;
+
+	/* Configure Counter attribute */
+	counter_attr->comp.type = comp->type;
+	counter_attr->comp.priv = comp->priv;
+	counter_attr->parent = parent;
+
+	/* Initialize sysfs attribute */
+	dev_attr = &counter_attr->dev_attr;
+	sysfs_attr_init(&dev_attr->attr);
+
+	/* Configure device attribute */
+	dev_attr->attr.name = devm_kasprintf(dev, GFP_KERNEL, "%s_available",
+					     comp->name);
+	if (!dev_attr->attr.name)
+		return -ENOMEM;
+	dev_attr->attr.mode = 0444;
+	dev_attr->show = counter_comp_available_show;
+
+	/* Store list node */
+	list_add(&counter_attr->l, &group->attr_list);
+	group->num_attr++;
+
+	return 0;
+}
+
+static int counter_attr_create(struct device *const dev,
+			       struct counter_attribute_group *const group,
+			       const struct counter_comp *const comp,
+			       const enum counter_scope scope,
+			       void *const parent)
+{
+	struct counter_attribute *counter_attr;
+	struct device_attribute *dev_attr;
+
+	counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
+	if (!counter_attr)
+		return -ENOMEM;
+
+	/* Configure Counter attribute */
+	counter_attr->comp = *comp;
+	counter_attr->scope = scope;
+	counter_attr->parent = parent;
+
+	/* Configure device attribute */
+	dev_attr = &counter_attr->dev_attr;
+	sysfs_attr_init(&dev_attr->attr);
+	dev_attr->attr.name = comp->name;
+	switch (comp->type) {
+	case COUNTER_COMP_U8:
+	case COUNTER_COMP_BOOL:
+		if (comp->device_u8_read) {
+			dev_attr->attr.mode |= 0444;
+			dev_attr->show = counter_comp_u8_show;
+		}
+		if (comp->device_u8_write) {
+			dev_attr->attr.mode |= 0200;
+			dev_attr->store = counter_comp_u8_store;
+		}
+		break;
+	case COUNTER_COMP_SIGNAL_LEVEL:
+	case COUNTER_COMP_FUNCTION:
+	case COUNTER_COMP_SYNAPSE_ACTION:
+	case COUNTER_COMP_ENUM:
+	case COUNTER_COMP_COUNT_DIRECTION:
+	case COUNTER_COMP_COUNT_MODE:
+		if (comp->device_u32_read) {
+			dev_attr->attr.mode |= 0444;
+			dev_attr->show = counter_comp_u32_show;
+		}
+		if (comp->device_u32_write) {
+			dev_attr->attr.mode |= 0200;
+			dev_attr->store = counter_comp_u32_store;
+		}
+		break;
+	case COUNTER_COMP_U64:
+		if (comp->device_u64_read) {
+			dev_attr->attr.mode |= 0444;
+			dev_attr->show = counter_comp_u64_show;
+		}
+		if (comp->device_u64_write) {
+			dev_attr->attr.mode |= 0200;
+			dev_attr->store = counter_comp_u64_store;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Store list node */
+	list_add(&counter_attr->l, &group->attr_list);
+	group->num_attr++;
+
+	/* Create "*_available" attribute if needed */
+	switch (comp->type) {
+	case COUNTER_COMP_FUNCTION:
+	case COUNTER_COMP_SYNAPSE_ACTION:
+	case COUNTER_COMP_ENUM:
+	case COUNTER_COMP_COUNT_MODE:
+		return counter_avail_attr_create(dev, group, comp, parent);
+	default:
+		return 0;
+	}
+}
+
+static ssize_t counter_comp_name_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", to_counter_attribute(attr)->comp.name);
+}
+
+static int counter_name_attr_create(struct device *const dev,
+				    struct counter_attribute_group *const group,
+				    const char *const name)
+{
+	struct counter_attribute *counter_attr;
+
+	counter_attr = devm_kzalloc(dev, sizeof(*counter_attr), GFP_KERNEL);
+	if (!counter_attr)
+		return -ENOMEM;
+
+	/* Configure Counter attribute */
+	counter_attr->comp.name = name;
+
+	/* Configure device attribute */
+	sysfs_attr_init(&counter_attr->dev_attr.attr);
+	counter_attr->dev_attr.attr.name = "name";
+	counter_attr->dev_attr.attr.mode = 0444;
+	counter_attr->dev_attr.show = counter_comp_name_show;
+
+	/* Store list node */
+	list_add(&counter_attr->l, &group->attr_list);
+	group->num_attr++;
+
+	return 0;
+}
+
+static struct counter_comp counter_signal_comp = {
+	.type = COUNTER_COMP_SIGNAL_LEVEL,
+	.name = "signal",
+};
+
+static int counter_signal_attrs_create(struct counter_device *const counter,
+	struct counter_attribute_group *const cattr_group,
+	struct counter_signal *const signal)
+{
+	const enum counter_scope scope = COUNTER_SCOPE_SIGNAL;
+	struct device *const dev = &counter->dev;
+	int err;
+	struct counter_comp comp;
+	size_t i;
+
+	/* Create main Signal attribute */
+	comp = counter_signal_comp;
+	comp.signal_u32_read = counter->ops->signal_read;
+	err = counter_attr_create(dev, cattr_group, &comp, scope, signal);
+	if (err < 0)
+		return err;
+
+	/* Create Signal name attribute */
+	err = counter_name_attr_create(dev, cattr_group, signal->name);
+	if (err < 0)
+		return err;
+
+	/* Create an attribute for each extension */
+	for (i = 0; i < signal->num_ext; i++) {
+		err = counter_attr_create(dev, cattr_group, signal->ext + i,
+					  scope, signal);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int counter_sysfs_signals_add(struct counter_device *const counter,
+	struct counter_attribute_group *const groups)
+{
+	size_t i;
+	int err;
+
+	/* Add each Signal */
+	for (i = 0; i < counter->num_signals; i++) {
+		/* Generate Signal attribute directory name */
+		groups[i].name = devm_kasprintf(&counter->dev, GFP_KERNEL,
+						"signal%zu", i);
+		if (!groups[i].name)
+			return -ENOMEM;
+
+		/* Create all attributes associated with Signal */
+		err = counter_signal_attrs_create(counter, groups + i,
+						  counter->signals + i);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int counter_sysfs_synapses_add(struct counter_device *const counter,
+	struct counter_attribute_group *const group,
+	struct counter_count *const count)
+{
+	size_t i;
+
+	/* Add each Synapse */
+	for (i = 0; i < count->num_synapses; i++) {
+		struct device *const dev = &counter->dev;
+		struct counter_synapse *synapse;
+		size_t id;
+		struct counter_comp comp;
+		int err;
+
+		synapse = count->synapses + i;
+
+		/* Generate Synapse action name */
+		id = synapse->signal - counter->signals;
+		comp.name = devm_kasprintf(dev, GFP_KERNEL, "signal%zu_action",
+					   id);
+		if (!comp.name)
+			return -ENOMEM;
+
+		/* Create action attribute */
+		comp.type = COUNTER_COMP_SYNAPSE_ACTION;
+		comp.action_read = counter->ops->action_read;
+		comp.action_write = counter->ops->action_write;
+		comp.priv = synapse;
+		err = counter_attr_create(dev, group, &comp,
+					  COUNTER_SCOPE_COUNT, count);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static struct counter_comp counter_count_comp =
+	COUNTER_COMP_COUNT_U64("count", NULL, NULL);
+
+static struct counter_comp counter_function_comp = {
+	.type = COUNTER_COMP_FUNCTION,
+	.name = "function",
+};
+
+static int counter_count_attrs_create(struct counter_device *const counter,
+	struct counter_attribute_group *const cattr_group,
+	struct counter_count *const count)
+{
+	const enum counter_scope scope = COUNTER_SCOPE_COUNT;
+	struct device *const dev = &counter->dev;
+	int err;
+	struct counter_comp comp;
+	size_t i;
+
+	/* Create main Count attribute */
+	comp = counter_count_comp;
+	comp.count_u64_read = counter->ops->count_read;
+	comp.count_u64_write = counter->ops->count_write;
+	err = counter_attr_create(dev, cattr_group, &comp, scope, count);
+	if (err < 0)
+		return err;
+
+	/* Create Count name attribute */
+	err = counter_name_attr_create(dev, cattr_group, count->name);
+	if (err < 0)
+		return err;
+
+	/* Create Count function attribute */
+	comp = counter_function_comp;
+	comp.count_u32_read = counter->ops->function_read;
+	comp.count_u32_write = counter->ops->function_write;
+	err = counter_attr_create(dev, cattr_group, &comp, scope, count);
+	if (err < 0)
+		return err;
+
+	/* Create an attribute for each extension */
+	for (i = 0; i < count->num_ext; i++) {
+		err = counter_attr_create(dev, cattr_group, count->ext + i,
+					  scope, count);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int counter_sysfs_counts_add(struct counter_device *const counter,
+	struct counter_attribute_group *const groups)
+{
+	size_t i;
+	struct counter_count *count;
+	int err;
+
+	/* Add each Count */
+	for (i = 0; i < counter->num_counts; i++) {
+		count = counter->counts + i;
+
+		/* Generate Count attribute directory name */
+		groups[i].name = devm_kasprintf(&counter->dev, GFP_KERNEL,
+						"count%zu", i);
+		if (!groups[i].name)
+			return -ENOMEM;
+
+		/* Add sysfs attributes of the Synapses */
+		err = counter_sysfs_synapses_add(counter, groups + i, count);
+		if (err < 0)
+			return err;
+
+		/* Create all attributes associated with Count */
+		err = counter_count_attrs_create(counter, groups + i, count);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static int counter_num_signals_read(struct counter_device *counter, u8 *val)
+{
+	*val = counter->num_signals;
+	return 0;
+}
+
+static int counter_num_counts_read(struct counter_device *counter, u8 *val)
+{
+	*val = counter->num_counts;
+	return 0;
+}
+
+static struct counter_comp counter_num_signals_comp =
+	COUNTER_COMP_DEVICE_U8("num_signals", counter_num_signals_read, NULL);
+
+static struct counter_comp counter_num_counts_comp =
+	COUNTER_COMP_DEVICE_U8("num_counts", counter_num_counts_read, NULL);
+
+static int counter_sysfs_attr_add(struct counter_device *const counter,
+				  struct counter_attribute_group *cattr_group)
+{
+	const enum counter_scope scope = COUNTER_SCOPE_DEVICE;
+	struct device *const dev = &counter->dev;
+	int err;
+	size_t i;
+
+	/* Add Signals sysfs attributes */
+	err = counter_sysfs_signals_add(counter, cattr_group);
+	if (err < 0)
+		return err;
+	cattr_group += counter->num_signals;
+
+	/* Add Counts sysfs attributes */
+	err = counter_sysfs_counts_add(counter, cattr_group);
+	if (err < 0)
+		return err;
+	cattr_group += counter->num_counts;
+
+	/* Create name attribute */
+	err = counter_name_attr_create(dev, cattr_group, counter->name);
+	if (err < 0)
+		return err;
+
+	/* Create num_signals attribute */
+	err = counter_attr_create(dev, cattr_group, &counter_num_signals_comp,
+				  scope, NULL);
+	if (err < 0)
+		return err;
+
+	/* Create num_counts attribute */
+	err = counter_attr_create(dev, cattr_group, &counter_num_counts_comp,
+				  scope, NULL);
+	if (err < 0)
+		return err;
+
+	/* Create an attribute for each extension */
+	for (i = 0; i < counter->num_ext; i++) {
+		err = counter_attr_create(dev, cattr_group, counter->ext + i,
+					  scope, NULL);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * counter_sysfs_add - Adds Counter sysfs attributes to the device structure
+ * @counter:	Pointer to the Counter device structure
+ *
+ * Counter sysfs attributes are created and added to the respective device
+ * structure for later registration to the system. Resource-managed memory
+ * allocation is performed by this function, and this memory should be freed
+ * when no longer needed (automatically by a device_unregister call, or
+ * manually by a devres_release_all call).
+ */
+int counter_sysfs_add(struct counter_device *const counter)
+{
+	struct device *const dev = &counter->dev;
+	const size_t num_groups = counter->num_signals + counter->num_counts + 1;
+	struct counter_attribute_group *cattr_groups;
+	size_t i, j;
+	int err;
+	struct attribute_group *groups;
+	struct counter_attribute *p;
+
+	/* Allocate space for attribute groups (signals, counts, and ext) */
+	cattr_groups = devm_kcalloc(dev, num_groups, sizeof(*cattr_groups),
+				    GFP_KERNEL);
+	if (!cattr_groups)
+		return -ENOMEM;
+
+	/* Initialize attribute lists */
+	for (i = 0; i < num_groups; i++)
+		INIT_LIST_HEAD(&cattr_groups[i].attr_list);
+
+	/* Add Counter device sysfs attributes */
+	err = counter_sysfs_attr_add(counter, cattr_groups);
+	if (err < 0)
+		return err;
+
+	/* Allocate attribute group pointers for association with device */
+	dev->groups = devm_kcalloc(dev, num_groups + 1, sizeof(*dev->groups),
+				   GFP_KERNEL);
+	if (!dev->groups)
+		return -ENOMEM;
+
+	/* Allocate space for attribute groups */
+	groups = devm_kcalloc(dev, num_groups, sizeof(*groups), GFP_KERNEL);
+	if (!groups)
+		return -ENOMEM;
+
+	/* Prepare each group of attributes for association */
+	for (i = 0; i < num_groups; i++) {
+		groups[i].name = cattr_groups[i].name;
+
+		/* Allocate space for attribute pointers */
+		groups[i].attrs = devm_kcalloc(dev,
+					       cattr_groups[i].num_attr + 1,
+					       sizeof(*groups[i].attrs),
+					       GFP_KERNEL);
+		if (!groups[i].attrs)
+			return -ENOMEM;
+
+		/* Add attribute pointers to attribute group */
+		j = 0;
+		list_for_each_entry(p, &cattr_groups[i].attr_list, l)
+			groups[i].attrs[j++] = &p->dev_attr.attr;
+
+		/* Associate attribute group */
+		dev->groups[i] = &groups[i];
+	}
+
+	return 0;
+}
diff --git a/drivers/counter/counter-sysfs.h b/drivers/counter/counter-sysfs.h
new file mode 100644
index 000000000000..14fe566aca0e
--- /dev/null
+++ b/drivers/counter/counter-sysfs.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Counter sysfs interface
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#ifndef _COUNTER_SYSFS_H_
+#define _COUNTER_SYSFS_H_
+
+#include <linux/counter.h>
+
+int counter_sysfs_add(struct counter_device *const counter);
+
+#endif /* _COUNTER_SYSFS_H_ */
diff --git a/drivers/counter/counter.c b/drivers/counter/counter.c
deleted file mode 100644
index de921e8a3f72..000000000000
--- a/drivers/counter/counter.c
+++ /dev/null
@@ -1,1496 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Generic Counter interface
- * Copyright (C) 2018 William Breathitt Gray
- */
-#include <linux/counter.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/gfp.h>
-#include <linux/idr.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/module.h>
-#include <linux/printk.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/sysfs.h>
-#include <linux/types.h>
-
-const char *const counter_count_direction_str[2] = {
-	[COUNTER_COUNT_DIRECTION_FORWARD] = "forward",
-	[COUNTER_COUNT_DIRECTION_BACKWARD] = "backward"
-};
-EXPORT_SYMBOL_GPL(counter_count_direction_str);
-
-const char *const counter_count_mode_str[4] = {
-	[COUNTER_COUNT_MODE_NORMAL] = "normal",
-	[COUNTER_COUNT_MODE_RANGE_LIMIT] = "range limit",
-	[COUNTER_COUNT_MODE_NON_RECYCLE] = "non-recycle",
-	[COUNTER_COUNT_MODE_MODULO_N] = "modulo-n"
-};
-EXPORT_SYMBOL_GPL(counter_count_mode_str);
-
-ssize_t counter_signal_enum_read(struct counter_device *counter,
-				 struct counter_signal *signal, void *priv,
-				 char *buf)
-{
-	const struct counter_signal_enum_ext *const e = priv;
-	int err;
-	size_t index;
-
-	if (!e->get)
-		return -EINVAL;
-
-	err = e->get(counter, signal, &index);
-	if (err)
-		return err;
-
-	if (index >= e->num_items)
-		return -EINVAL;
-
-	return sprintf(buf, "%s\n", e->items[index]);
-}
-EXPORT_SYMBOL_GPL(counter_signal_enum_read);
-
-ssize_t counter_signal_enum_write(struct counter_device *counter,
-				  struct counter_signal *signal, void *priv,
-				  const char *buf, size_t len)
-{
-	const struct counter_signal_enum_ext *const e = priv;
-	ssize_t index;
-	int err;
-
-	if (!e->set)
-		return -EINVAL;
-
-	index = __sysfs_match_string(e->items, e->num_items, buf);
-	if (index < 0)
-		return index;
-
-	err = e->set(counter, signal, index);
-	if (err)
-		return err;
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_signal_enum_write);
-
-ssize_t counter_signal_enum_available_read(struct counter_device *counter,
-					   struct counter_signal *signal,
-					   void *priv, char *buf)
-{
-	const struct counter_signal_enum_ext *const e = priv;
-	size_t i;
-	size_t len = 0;
-
-	if (!e->num_items)
-		return 0;
-
-	for (i = 0; i < e->num_items; i++)
-		len += sprintf(buf + len, "%s\n", e->items[i]);
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_signal_enum_available_read);
-
-ssize_t counter_count_enum_read(struct counter_device *counter,
-				struct counter_count *count, void *priv,
-				char *buf)
-{
-	const struct counter_count_enum_ext *const e = priv;
-	int err;
-	size_t index;
-
-	if (!e->get)
-		return -EINVAL;
-
-	err = e->get(counter, count, &index);
-	if (err)
-		return err;
-
-	if (index >= e->num_items)
-		return -EINVAL;
-
-	return sprintf(buf, "%s\n", e->items[index]);
-}
-EXPORT_SYMBOL_GPL(counter_count_enum_read);
-
-ssize_t counter_count_enum_write(struct counter_device *counter,
-				 struct counter_count *count, void *priv,
-				 const char *buf, size_t len)
-{
-	const struct counter_count_enum_ext *const e = priv;
-	ssize_t index;
-	int err;
-
-	if (!e->set)
-		return -EINVAL;
-
-	index = __sysfs_match_string(e->items, e->num_items, buf);
-	if (index < 0)
-		return index;
-
-	err = e->set(counter, count, index);
-	if (err)
-		return err;
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_count_enum_write);
-
-ssize_t counter_count_enum_available_read(struct counter_device *counter,
-					  struct counter_count *count,
-					  void *priv, char *buf)
-{
-	const struct counter_count_enum_ext *const e = priv;
-	size_t i;
-	size_t len = 0;
-
-	if (!e->num_items)
-		return 0;
-
-	for (i = 0; i < e->num_items; i++)
-		len += sprintf(buf + len, "%s\n", e->items[i]);
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_count_enum_available_read);
-
-ssize_t counter_device_enum_read(struct counter_device *counter, void *priv,
-				 char *buf)
-{
-	const struct counter_device_enum_ext *const e = priv;
-	int err;
-	size_t index;
-
-	if (!e->get)
-		return -EINVAL;
-
-	err = e->get(counter, &index);
-	if (err)
-		return err;
-
-	if (index >= e->num_items)
-		return -EINVAL;
-
-	return sprintf(buf, "%s\n", e->items[index]);
-}
-EXPORT_SYMBOL_GPL(counter_device_enum_read);
-
-ssize_t counter_device_enum_write(struct counter_device *counter, void *priv,
-				  const char *buf, size_t len)
-{
-	const struct counter_device_enum_ext *const e = priv;
-	ssize_t index;
-	int err;
-
-	if (!e->set)
-		return -EINVAL;
-
-	index = __sysfs_match_string(e->items, e->num_items, buf);
-	if (index < 0)
-		return index;
-
-	err = e->set(counter, index);
-	if (err)
-		return err;
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_device_enum_write);
-
-ssize_t counter_device_enum_available_read(struct counter_device *counter,
-					   void *priv, char *buf)
-{
-	const struct counter_device_enum_ext *const e = priv;
-	size_t i;
-	size_t len = 0;
-
-	if (!e->num_items)
-		return 0;
-
-	for (i = 0; i < e->num_items; i++)
-		len += sprintf(buf + len, "%s\n", e->items[i]);
-
-	return len;
-}
-EXPORT_SYMBOL_GPL(counter_device_enum_available_read);
-
-struct counter_attr_parm {
-	struct counter_device_attr_group *group;
-	const char *prefix;
-	const char *name;
-	ssize_t (*show)(struct device *dev, struct device_attribute *attr,
-			char *buf);
-	ssize_t (*store)(struct device *dev, struct device_attribute *attr,
-			 const char *buf, size_t len);
-	void *component;
-};
-
-struct counter_device_attr {
-	struct device_attribute dev_attr;
-	struct list_head l;
-	void *component;
-};
-
-static int counter_attribute_create(const struct counter_attr_parm *const parm)
-{
-	struct counter_device_attr *counter_attr;
-	struct device_attribute *dev_attr;
-	int err;
-	struct list_head *const attr_list = &parm->group->attr_list;
-
-	/* Allocate a Counter device attribute */
-	counter_attr = kzalloc(sizeof(*counter_attr), GFP_KERNEL);
-	if (!counter_attr)
-		return -ENOMEM;
-	dev_attr = &counter_attr->dev_attr;
-
-	sysfs_attr_init(&dev_attr->attr);
-
-	/* Configure device attribute */
-	dev_attr->attr.name = kasprintf(GFP_KERNEL, "%s%s", parm->prefix,
-					parm->name);
-	if (!dev_attr->attr.name) {
-		err = -ENOMEM;
-		goto err_free_counter_attr;
-	}
-	if (parm->show) {
-		dev_attr->attr.mode |= 0444;
-		dev_attr->show = parm->show;
-	}
-	if (parm->store) {
-		dev_attr->attr.mode |= 0200;
-		dev_attr->store = parm->store;
-	}
-
-	/* Store associated Counter component with attribute */
-	counter_attr->component = parm->component;
-
-	/* Keep track of the attribute for later cleanup */
-	list_add(&counter_attr->l, attr_list);
-	parm->group->num_attr++;
-
-	return 0;
-
-err_free_counter_attr:
-	kfree(counter_attr);
-	return err;
-}
-
-#define to_counter_attr(_dev_attr) \
-	container_of(_dev_attr, struct counter_device_attr, dev_attr)
-
-struct counter_signal_unit {
-	struct counter_signal *signal;
-};
-
-static const char *const counter_signal_level_str[] = {
-	[COUNTER_SIGNAL_LEVEL_LOW] = "low",
-	[COUNTER_SIGNAL_LEVEL_HIGH] = "high"
-};
-
-static ssize_t counter_signal_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_signal_unit *const component = devattr->component;
-	struct counter_signal *const signal = component->signal;
-	int err;
-	enum counter_signal_level level;
-
-	err = counter->ops->signal_read(counter, signal, &level);
-	if (err)
-		return err;
-
-	return sprintf(buf, "%s\n", counter_signal_level_str[level]);
-}
-
-struct counter_name_unit {
-	const char *name;
-};
-
-static ssize_t counter_device_attr_name_show(struct device *dev,
-					     struct device_attribute *attr,
-					     char *buf)
-{
-	const struct counter_name_unit *const comp = to_counter_attr(attr)->component;
-
-	return sprintf(buf, "%s\n", comp->name);
-}
-
-static int counter_name_attribute_create(
-	struct counter_device_attr_group *const group,
-	const char *const name)
-{
-	struct counter_name_unit *name_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Skip if no name */
-	if (!name)
-		return 0;
-
-	/* Allocate name attribute component */
-	name_comp = kmalloc(sizeof(*name_comp), GFP_KERNEL);
-	if (!name_comp)
-		return -ENOMEM;
-	name_comp->name = name;
-
-	/* Allocate Signal name attribute */
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = "name";
-	parm.show = counter_device_attr_name_show;
-	parm.store = NULL;
-	parm.component = name_comp;
-	err = counter_attribute_create(&parm);
-	if (err)
-		goto err_free_name_comp;
-
-	return 0;
-
-err_free_name_comp:
-	kfree(name_comp);
-	return err;
-}
-
-struct counter_signal_ext_unit {
-	struct counter_signal *signal;
-	const struct counter_signal_ext *ext;
-};
-
-static ssize_t counter_signal_ext_show(struct device *dev,
-				       struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_signal_ext_unit *const comp = devattr->component;
-	const struct counter_signal_ext *const ext = comp->ext;
-
-	return ext->read(dev_get_drvdata(dev), comp->signal, ext->priv, buf);
-}
-
-static ssize_t counter_signal_ext_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t len)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_signal_ext_unit *const comp = devattr->component;
-	const struct counter_signal_ext *const ext = comp->ext;
-
-	return ext->write(dev_get_drvdata(dev), comp->signal, ext->priv, buf,
-		len);
-}
-
-static void counter_device_attr_list_free(struct list_head *attr_list)
-{
-	struct counter_device_attr *p, *n;
-
-	list_for_each_entry_safe(p, n, attr_list, l) {
-		/* free attribute name and associated component memory */
-		kfree(p->dev_attr.attr.name);
-		kfree(p->component);
-		list_del(&p->l);
-		kfree(p);
-	}
-}
-
-static int counter_signal_ext_register(
-	struct counter_device_attr_group *const group,
-	struct counter_signal *const signal)
-{
-	const size_t num_ext = signal->num_ext;
-	size_t i;
-	const struct counter_signal_ext *ext;
-	struct counter_signal_ext_unit *signal_ext_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Create an attribute for each extension */
-	for (i = 0 ; i < num_ext; i++) {
-		ext = signal->ext + i;
-
-		/* Allocate signal_ext attribute component */
-		signal_ext_comp = kmalloc(sizeof(*signal_ext_comp), GFP_KERNEL);
-		if (!signal_ext_comp) {
-			err = -ENOMEM;
-			goto err_free_attr_list;
-		}
-		signal_ext_comp->signal = signal;
-		signal_ext_comp->ext = ext;
-
-		/* Allocate a Counter device attribute */
-		parm.group = group;
-		parm.prefix = "";
-		parm.name = ext->name;
-		parm.show = (ext->read) ? counter_signal_ext_show : NULL;
-		parm.store = (ext->write) ? counter_signal_ext_store : NULL;
-		parm.component = signal_ext_comp;
-		err = counter_attribute_create(&parm);
-		if (err) {
-			kfree(signal_ext_comp);
-			goto err_free_attr_list;
-		}
-	}
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-static int counter_signal_attributes_create(
-	struct counter_device_attr_group *const group,
-	const struct counter_device *const counter,
-	struct counter_signal *const signal)
-{
-	struct counter_signal_unit *signal_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Allocate Signal attribute component */
-	signal_comp = kmalloc(sizeof(*signal_comp), GFP_KERNEL);
-	if (!signal_comp)
-		return -ENOMEM;
-	signal_comp->signal = signal;
-
-	/* Create main Signal attribute */
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = "signal";
-	parm.show = (counter->ops->signal_read) ? counter_signal_show : NULL;
-	parm.store = NULL;
-	parm.component = signal_comp;
-	err = counter_attribute_create(&parm);
-	if (err) {
-		kfree(signal_comp);
-		return err;
-	}
-
-	/* Create Signal name attribute */
-	err = counter_name_attribute_create(group, signal->name);
-	if (err)
-		goto err_free_attr_list;
-
-	/* Register Signal extension attributes */
-	err = counter_signal_ext_register(group, signal);
-	if (err)
-		goto err_free_attr_list;
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-static int counter_signals_register(
-	struct counter_device_attr_group *const groups_list,
-	const struct counter_device *const counter)
-{
-	const size_t num_signals = counter->num_signals;
-	size_t i;
-	struct counter_signal *signal;
-	const char *name;
-	int err;
-
-	/* Register each Signal */
-	for (i = 0; i < num_signals; i++) {
-		signal = counter->signals + i;
-
-		/* Generate Signal attribute directory name */
-		name = kasprintf(GFP_KERNEL, "signal%d", signal->id);
-		if (!name) {
-			err = -ENOMEM;
-			goto err_free_attr_groups;
-		}
-		groups_list[i].attr_group.name = name;
-
-		/* Create all attributes associated with Signal */
-		err = counter_signal_attributes_create(groups_list + i, counter,
-						       signal);
-		if (err)
-			goto err_free_attr_groups;
-	}
-
-	return 0;
-
-err_free_attr_groups:
-	do {
-		kfree(groups_list[i].attr_group.name);
-		counter_device_attr_list_free(&groups_list[i].attr_list);
-	} while (i--);
-	return err;
-}
-
-static const char *const counter_synapse_action_str[] = {
-	[COUNTER_SYNAPSE_ACTION_NONE] = "none",
-	[COUNTER_SYNAPSE_ACTION_RISING_EDGE] = "rising edge",
-	[COUNTER_SYNAPSE_ACTION_FALLING_EDGE] = "falling edge",
-	[COUNTER_SYNAPSE_ACTION_BOTH_EDGES] = "both edges"
-};
-
-struct counter_action_unit {
-	struct counter_synapse *synapse;
-	struct counter_count *count;
-};
-
-static ssize_t counter_action_show(struct device *dev,
-				   struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	int err;
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	const struct counter_action_unit *const component = devattr->component;
-	struct counter_count *const count = component->count;
-	struct counter_synapse *const synapse = component->synapse;
-	size_t action_index;
-	enum counter_synapse_action action;
-
-	err = counter->ops->action_get(counter, count, synapse, &action_index);
-	if (err)
-		return err;
-
-	synapse->action = action_index;
-
-	action = synapse->actions_list[action_index];
-	return sprintf(buf, "%s\n", counter_synapse_action_str[action]);
-}
-
-static ssize_t counter_action_store(struct device *dev,
-				    struct device_attribute *attr,
-				    const char *buf, size_t len)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_action_unit *const component = devattr->component;
-	struct counter_synapse *const synapse = component->synapse;
-	size_t action_index;
-	const size_t num_actions = synapse->num_actions;
-	enum counter_synapse_action action;
-	int err;
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	struct counter_count *const count = component->count;
-
-	/* Find requested action mode */
-	for (action_index = 0; action_index < num_actions; action_index++) {
-		action = synapse->actions_list[action_index];
-		if (sysfs_streq(buf, counter_synapse_action_str[action]))
-			break;
-	}
-	/* If requested action mode not found */
-	if (action_index >= num_actions)
-		return -EINVAL;
-
-	err = counter->ops->action_set(counter, count, synapse, action_index);
-	if (err)
-		return err;
-
-	synapse->action = action_index;
-
-	return len;
-}
-
-struct counter_action_avail_unit {
-	const enum counter_synapse_action *actions_list;
-	size_t num_actions;
-};
-
-static ssize_t counter_synapse_action_available_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_action_avail_unit *const component = devattr->component;
-	size_t i;
-	enum counter_synapse_action action;
-	ssize_t len = 0;
-
-	for (i = 0; i < component->num_actions; i++) {
-		action = component->actions_list[i];
-		len += sprintf(buf + len, "%s\n",
-			       counter_synapse_action_str[action]);
-	}
-
-	return len;
-}
-
-static int counter_synapses_register(
-	struct counter_device_attr_group *const group,
-	const struct counter_device *const counter,
-	struct counter_count *const count, const char *const count_attr_name)
-{
-	size_t i;
-	struct counter_synapse *synapse;
-	const char *prefix;
-	struct counter_action_unit *action_comp;
-	struct counter_attr_parm parm;
-	int err;
-	struct counter_action_avail_unit *avail_comp;
-
-	/* Register each Synapse */
-	for (i = 0; i < count->num_synapses; i++) {
-		synapse = count->synapses + i;
-
-		/* Generate attribute prefix */
-		prefix = kasprintf(GFP_KERNEL, "signal%d_",
-				   synapse->signal->id);
-		if (!prefix) {
-			err = -ENOMEM;
-			goto err_free_attr_list;
-		}
-
-		/* Allocate action attribute component */
-		action_comp = kmalloc(sizeof(*action_comp), GFP_KERNEL);
-		if (!action_comp) {
-			err = -ENOMEM;
-			goto err_free_prefix;
-		}
-		action_comp->synapse = synapse;
-		action_comp->count = count;
-
-		/* Create action attribute */
-		parm.group = group;
-		parm.prefix = prefix;
-		parm.name = "action";
-		parm.show = (counter->ops->action_get) ? counter_action_show : NULL;
-		parm.store = (counter->ops->action_set) ? counter_action_store : NULL;
-		parm.component = action_comp;
-		err = counter_attribute_create(&parm);
-		if (err) {
-			kfree(action_comp);
-			goto err_free_prefix;
-		}
-
-		/* Allocate action available attribute component */
-		avail_comp = kmalloc(sizeof(*avail_comp), GFP_KERNEL);
-		if (!avail_comp) {
-			err = -ENOMEM;
-			goto err_free_prefix;
-		}
-		avail_comp->actions_list = synapse->actions_list;
-		avail_comp->num_actions = synapse->num_actions;
-
-		/* Create action_available attribute */
-		parm.group = group;
-		parm.prefix = prefix;
-		parm.name = "action_available";
-		parm.show = counter_synapse_action_available_show;
-		parm.store = NULL;
-		parm.component = avail_comp;
-		err = counter_attribute_create(&parm);
-		if (err) {
-			kfree(avail_comp);
-			goto err_free_prefix;
-		}
-
-		kfree(prefix);
-	}
-
-	return 0;
-
-err_free_prefix:
-	kfree(prefix);
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-struct counter_count_unit {
-	struct counter_count *count;
-};
-
-static ssize_t counter_count_show(struct device *dev,
-				  struct device_attribute *attr,
-				  char *buf)
-{
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_unit *const component = devattr->component;
-	struct counter_count *const count = component->count;
-	int err;
-	unsigned long val;
-
-	err = counter->ops->count_read(counter, count, &val);
-	if (err)
-		return err;
-
-	return sprintf(buf, "%lu\n", val);
-}
-
-static ssize_t counter_count_store(struct device *dev,
-				   struct device_attribute *attr,
-				   const char *buf, size_t len)
-{
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_unit *const component = devattr->component;
-	struct counter_count *const count = component->count;
-	int err;
-	unsigned long val;
-
-	err = kstrtoul(buf, 0, &val);
-	if (err)
-		return err;
-
-	err = counter->ops->count_write(counter, count, val);
-	if (err)
-		return err;
-
-	return len;
-}
-
-static const char *const counter_function_str[] = {
-	[COUNTER_FUNCTION_INCREASE] = "increase",
-	[COUNTER_FUNCTION_DECREASE] = "decrease",
-	[COUNTER_FUNCTION_PULSE_DIRECTION] = "pulse-direction",
-	[COUNTER_FUNCTION_QUADRATURE_X1_A] = "quadrature x1 a",
-	[COUNTER_FUNCTION_QUADRATURE_X1_B] = "quadrature x1 b",
-	[COUNTER_FUNCTION_QUADRATURE_X2_A] = "quadrature x2 a",
-	[COUNTER_FUNCTION_QUADRATURE_X2_B] = "quadrature x2 b",
-	[COUNTER_FUNCTION_QUADRATURE_X4] = "quadrature x4"
-};
-
-static ssize_t counter_function_show(struct device *dev,
-				     struct device_attribute *attr, char *buf)
-{
-	int err;
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_unit *const component = devattr->component;
-	struct counter_count *const count = component->count;
-	size_t func_index;
-	enum counter_function function;
-
-	err = counter->ops->function_get(counter, count, &func_index);
-	if (err)
-		return err;
-
-	count->function = func_index;
-
-	function = count->functions_list[func_index];
-	return sprintf(buf, "%s\n", counter_function_str[function]);
-}
-
-static ssize_t counter_function_store(struct device *dev,
-				      struct device_attribute *attr,
-				      const char *buf, size_t len)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_unit *const component = devattr->component;
-	struct counter_count *const count = component->count;
-	const size_t num_functions = count->num_functions;
-	size_t func_index;
-	enum counter_function function;
-	int err;
-	struct counter_device *const counter = dev_get_drvdata(dev);
-
-	/* Find requested Count function mode */
-	for (func_index = 0; func_index < num_functions; func_index++) {
-		function = count->functions_list[func_index];
-		if (sysfs_streq(buf, counter_function_str[function]))
-			break;
-	}
-	/* Return error if requested Count function mode not found */
-	if (func_index >= num_functions)
-		return -EINVAL;
-
-	err = counter->ops->function_set(counter, count, func_index);
-	if (err)
-		return err;
-
-	count->function = func_index;
-
-	return len;
-}
-
-struct counter_count_ext_unit {
-	struct counter_count *count;
-	const struct counter_count_ext *ext;
-};
-
-static ssize_t counter_count_ext_show(struct device *dev,
-				      struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_ext_unit *const comp = devattr->component;
-	const struct counter_count_ext *const ext = comp->ext;
-
-	return ext->read(dev_get_drvdata(dev), comp->count, ext->priv, buf);
-}
-
-static ssize_t counter_count_ext_store(struct device *dev,
-				       struct device_attribute *attr,
-				       const char *buf, size_t len)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_count_ext_unit *const comp = devattr->component;
-	const struct counter_count_ext *const ext = comp->ext;
-
-	return ext->write(dev_get_drvdata(dev), comp->count, ext->priv, buf,
-		len);
-}
-
-static int counter_count_ext_register(
-	struct counter_device_attr_group *const group,
-	struct counter_count *const count)
-{
-	size_t i;
-	const struct counter_count_ext *ext;
-	struct counter_count_ext_unit *count_ext_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Create an attribute for each extension */
-	for (i = 0 ; i < count->num_ext; i++) {
-		ext = count->ext + i;
-
-		/* Allocate count_ext attribute component */
-		count_ext_comp = kmalloc(sizeof(*count_ext_comp), GFP_KERNEL);
-		if (!count_ext_comp) {
-			err = -ENOMEM;
-			goto err_free_attr_list;
-		}
-		count_ext_comp->count = count;
-		count_ext_comp->ext = ext;
-
-		/* Allocate count_ext attribute */
-		parm.group = group;
-		parm.prefix = "";
-		parm.name = ext->name;
-		parm.show = (ext->read) ? counter_count_ext_show : NULL;
-		parm.store = (ext->write) ? counter_count_ext_store : NULL;
-		parm.component = count_ext_comp;
-		err = counter_attribute_create(&parm);
-		if (err) {
-			kfree(count_ext_comp);
-			goto err_free_attr_list;
-		}
-	}
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-struct counter_func_avail_unit {
-	const enum counter_function *functions_list;
-	size_t num_functions;
-};
-
-static ssize_t counter_function_available_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_func_avail_unit *const component = devattr->component;
-	const enum counter_function *const func_list = component->functions_list;
-	const size_t num_functions = component->num_functions;
-	size_t i;
-	enum counter_function function;
-	ssize_t len = 0;
-
-	for (i = 0; i < num_functions; i++) {
-		function = func_list[i];
-		len += sprintf(buf + len, "%s\n",
-			       counter_function_str[function]);
-	}
-
-	return len;
-}
-
-static int counter_count_attributes_create(
-	struct counter_device_attr_group *const group,
-	const struct counter_device *const counter,
-	struct counter_count *const count)
-{
-	struct counter_count_unit *count_comp;
-	struct counter_attr_parm parm;
-	int err;
-	struct counter_count_unit *func_comp;
-	struct counter_func_avail_unit *avail_comp;
-
-	/* Allocate count attribute component */
-	count_comp = kmalloc(sizeof(*count_comp), GFP_KERNEL);
-	if (!count_comp)
-		return -ENOMEM;
-	count_comp->count = count;
-
-	/* Create main Count attribute */
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = "count";
-	parm.show = (counter->ops->count_read) ? counter_count_show : NULL;
-	parm.store = (counter->ops->count_write) ? counter_count_store : NULL;
-	parm.component = count_comp;
-	err = counter_attribute_create(&parm);
-	if (err) {
-		kfree(count_comp);
-		return err;
-	}
-
-	/* Allocate function attribute component */
-	func_comp = kmalloc(sizeof(*func_comp), GFP_KERNEL);
-	if (!func_comp) {
-		err = -ENOMEM;
-		goto err_free_attr_list;
-	}
-	func_comp->count = count;
-
-	/* Create Count function attribute */
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = "function";
-	parm.show = (counter->ops->function_get) ? counter_function_show : NULL;
-	parm.store = (counter->ops->function_set) ? counter_function_store : NULL;
-	parm.component = func_comp;
-	err = counter_attribute_create(&parm);
-	if (err) {
-		kfree(func_comp);
-		goto err_free_attr_list;
-	}
-
-	/* Allocate function available attribute component */
-	avail_comp = kmalloc(sizeof(*avail_comp), GFP_KERNEL);
-	if (!avail_comp) {
-		err = -ENOMEM;
-		goto err_free_attr_list;
-	}
-	avail_comp->functions_list = count->functions_list;
-	avail_comp->num_functions = count->num_functions;
-
-	/* Create Count function_available attribute */
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = "function_available";
-	parm.show = counter_function_available_show;
-	parm.store = NULL;
-	parm.component = avail_comp;
-	err = counter_attribute_create(&parm);
-	if (err) {
-		kfree(avail_comp);
-		goto err_free_attr_list;
-	}
-
-	/* Create Count name attribute */
-	err = counter_name_attribute_create(group, count->name);
-	if (err)
-		goto err_free_attr_list;
-
-	/* Register Count extension attributes */
-	err = counter_count_ext_register(group, count);
-	if (err)
-		goto err_free_attr_list;
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-static int counter_counts_register(
-	struct counter_device_attr_group *const groups_list,
-	const struct counter_device *const counter)
-{
-	size_t i;
-	struct counter_count *count;
-	const char *name;
-	int err;
-
-	/* Register each Count */
-	for (i = 0; i < counter->num_counts; i++) {
-		count = counter->counts + i;
-
-		/* Generate Count attribute directory name */
-		name = kasprintf(GFP_KERNEL, "count%d", count->id);
-		if (!name) {
-			err = -ENOMEM;
-			goto err_free_attr_groups;
-		}
-		groups_list[i].attr_group.name = name;
-
-		/* Register the Synapses associated with each Count */
-		err = counter_synapses_register(groups_list + i, counter, count,
-						name);
-		if (err)
-			goto err_free_attr_groups;
-
-		/* Create all attributes associated with Count */
-		err = counter_count_attributes_create(groups_list + i, counter,
-						      count);
-		if (err)
-			goto err_free_attr_groups;
-	}
-
-	return 0;
-
-err_free_attr_groups:
-	do {
-		kfree(groups_list[i].attr_group.name);
-		counter_device_attr_list_free(&groups_list[i].attr_list);
-	} while (i--);
-	return err;
-}
-
-struct counter_size_unit {
-	size_t size;
-};
-
-static ssize_t counter_device_attr_size_show(struct device *dev,
-					     struct device_attribute *attr,
-					     char *buf)
-{
-	const struct counter_size_unit *const comp = to_counter_attr(attr)->component;
-
-	return sprintf(buf, "%zu\n", comp->size);
-}
-
-static int counter_size_attribute_create(
-	struct counter_device_attr_group *const group,
-	const size_t size, const char *const name)
-{
-	struct counter_size_unit *size_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Allocate size attribute component */
-	size_comp = kmalloc(sizeof(*size_comp), GFP_KERNEL);
-	if (!size_comp)
-		return -ENOMEM;
-	size_comp->size = size;
-
-	parm.group = group;
-	parm.prefix = "";
-	parm.name = name;
-	parm.show = counter_device_attr_size_show;
-	parm.store = NULL;
-	parm.component = size_comp;
-	err = counter_attribute_create(&parm);
-	if (err)
-		goto err_free_size_comp;
-
-	return 0;
-
-err_free_size_comp:
-	kfree(size_comp);
-	return err;
-}
-
-struct counter_ext_unit {
-	const struct counter_device_ext *ext;
-};
-
-static ssize_t counter_device_ext_show(struct device *dev,
-				       struct device_attribute *attr, char *buf)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_ext_unit *const component = devattr->component;
-	const struct counter_device_ext *const ext = component->ext;
-
-	return ext->read(dev_get_drvdata(dev), ext->priv, buf);
-}
-
-static ssize_t counter_device_ext_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t len)
-{
-	const struct counter_device_attr *const devattr = to_counter_attr(attr);
-	const struct counter_ext_unit *const component = devattr->component;
-	const struct counter_device_ext *const ext = component->ext;
-
-	return ext->write(dev_get_drvdata(dev), ext->priv, buf, len);
-}
-
-static int counter_device_ext_register(
-	struct counter_device_attr_group *const group,
-	struct counter_device *const counter)
-{
-	size_t i;
-	struct counter_ext_unit *ext_comp;
-	struct counter_attr_parm parm;
-	int err;
-
-	/* Create an attribute for each extension */
-	for (i = 0 ; i < counter->num_ext; i++) {
-		/* Allocate extension attribute component */
-		ext_comp = kmalloc(sizeof(*ext_comp), GFP_KERNEL);
-		if (!ext_comp) {
-			err = -ENOMEM;
-			goto err_free_attr_list;
-		}
-
-		ext_comp->ext = counter->ext + i;
-
-		/* Allocate extension attribute */
-		parm.group = group;
-		parm.prefix = "";
-		parm.name = counter->ext[i].name;
-		parm.show = (counter->ext[i].read) ? counter_device_ext_show : NULL;
-		parm.store = (counter->ext[i].write) ? counter_device_ext_store : NULL;
-		parm.component = ext_comp;
-		err = counter_attribute_create(&parm);
-		if (err) {
-			kfree(ext_comp);
-			goto err_free_attr_list;
-		}
-	}
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-static int counter_global_attr_register(
-	struct counter_device_attr_group *const group,
-	struct counter_device *const counter)
-{
-	int err;
-
-	/* Create name attribute */
-	err = counter_name_attribute_create(group, counter->name);
-	if (err)
-		return err;
-
-	/* Create num_counts attribute */
-	err = counter_size_attribute_create(group, counter->num_counts,
-					    "num_counts");
-	if (err)
-		goto err_free_attr_list;
-
-	/* Create num_signals attribute */
-	err = counter_size_attribute_create(group, counter->num_signals,
-					    "num_signals");
-	if (err)
-		goto err_free_attr_list;
-
-	/* Register Counter device extension attributes */
-	err = counter_device_ext_register(group, counter);
-	if (err)
-		goto err_free_attr_list;
-
-	return 0;
-
-err_free_attr_list:
-	counter_device_attr_list_free(&group->attr_list);
-	return err;
-}
-
-static void counter_device_groups_list_free(
-	struct counter_device_attr_group *const groups_list,
-	const size_t num_groups)
-{
-	struct counter_device_attr_group *group;
-	size_t i;
-
-	/* loop through all attribute groups (signals, counts, global, etc.) */
-	for (i = 0; i < num_groups; i++) {
-		group = groups_list + i;
-
-		/* free all attribute group and associated attributes memory */
-		kfree(group->attr_group.name);
-		kfree(group->attr_group.attrs);
-		counter_device_attr_list_free(&group->attr_list);
-	}
-
-	kfree(groups_list);
-}
-
-static int counter_device_groups_list_prepare(
-	struct counter_device *const counter)
-{
-	const size_t total_num_groups =
-		counter->num_signals + counter->num_counts + 1;
-	struct counter_device_attr_group *groups_list;
-	size_t i;
-	int err;
-	size_t num_groups = 0;
-
-	/* Allocate space for attribute groups (signals, counts, and ext) */
-	groups_list = kcalloc(total_num_groups, sizeof(*groups_list),
-			      GFP_KERNEL);
-	if (!groups_list)
-		return -ENOMEM;
-
-	/* Initialize attribute lists */
-	for (i = 0; i < total_num_groups; i++)
-		INIT_LIST_HEAD(&groups_list[i].attr_list);
-
-	/* Register Signals */
-	err = counter_signals_register(groups_list, counter);
-	if (err)
-		goto err_free_groups_list;
-	num_groups += counter->num_signals;
-
-	/* Register Counts and respective Synapses */
-	err = counter_counts_register(groups_list + num_groups, counter);
-	if (err)
-		goto err_free_groups_list;
-	num_groups += counter->num_counts;
-
-	/* Register Counter global attributes */
-	err = counter_global_attr_register(groups_list + num_groups, counter);
-	if (err)
-		goto err_free_groups_list;
-	num_groups++;
-
-	/* Store groups_list in device_state */
-	counter->device_state->groups_list = groups_list;
-	counter->device_state->num_groups = num_groups;
-
-	return 0;
-
-err_free_groups_list:
-	counter_device_groups_list_free(groups_list, num_groups);
-	return err;
-}
-
-static int counter_device_groups_prepare(
-	struct counter_device_state *const device_state)
-{
-	size_t i, j;
-	struct counter_device_attr_group *group;
-	int err;
-	struct counter_device_attr *p;
-
-	/* Allocate attribute groups for association with device */
-	device_state->groups = kcalloc(device_state->num_groups + 1,
-				       sizeof(*device_state->groups),
-				       GFP_KERNEL);
-	if (!device_state->groups)
-		return -ENOMEM;
-
-	/* Prepare each group of attributes for association */
-	for (i = 0; i < device_state->num_groups; i++) {
-		group = device_state->groups_list + i;
-
-		/* Allocate space for attribute pointers in attribute group */
-		group->attr_group.attrs = kcalloc(group->num_attr + 1,
-			sizeof(*group->attr_group.attrs), GFP_KERNEL);
-		if (!group->attr_group.attrs) {
-			err = -ENOMEM;
-			goto err_free_groups;
-		}
-
-		/* Add attribute pointers to attribute group */
-		j = 0;
-		list_for_each_entry(p, &group->attr_list, l)
-			group->attr_group.attrs[j++] = &p->dev_attr.attr;
-
-		/* Group attributes in attribute group */
-		device_state->groups[i] = &group->attr_group;
-	}
-	/* Associate attributes with device */
-	device_state->dev.groups = device_state->groups;
-
-	return 0;
-
-err_free_groups:
-	do {
-		group = device_state->groups_list + i;
-		kfree(group->attr_group.attrs);
-		group->attr_group.attrs = NULL;
-	} while (i--);
-	kfree(device_state->groups);
-	return err;
-}
-
-/* Provides a unique ID for each counter device */
-static DEFINE_IDA(counter_ida);
-
-static void counter_device_release(struct device *dev)
-{
-	struct counter_device *const counter = dev_get_drvdata(dev);
-	struct counter_device_state *const device_state = counter->device_state;
-
-	kfree(device_state->groups);
-	counter_device_groups_list_free(device_state->groups_list,
-					device_state->num_groups);
-	ida_simple_remove(&counter_ida, device_state->id);
-	kfree(device_state);
-}
-
-static struct device_type counter_device_type = {
-	.name = "counter_device",
-	.release = counter_device_release
-};
-
-static struct bus_type counter_bus_type = {
-	.name = "counter"
-};
-
-/**
- * counter_register - register Counter to the system
- * @counter:	pointer to Counter to register
- *
- * This function registers a Counter to the system. A sysfs "counter" directory
- * will be created and populated with sysfs attributes correlating with the
- * Counter Signals, Synapses, and Counts respectively.
- */
-int counter_register(struct counter_device *const counter)
-{
-	struct counter_device_state *device_state;
-	int err;
-
-	/* Allocate internal state container for Counter device */
-	device_state = kzalloc(sizeof(*device_state), GFP_KERNEL);
-	if (!device_state)
-		return -ENOMEM;
-	counter->device_state = device_state;
-
-	/* Acquire unique ID */
-	device_state->id = ida_simple_get(&counter_ida, 0, 0, GFP_KERNEL);
-	if (device_state->id < 0) {
-		err = device_state->id;
-		goto err_free_device_state;
-	}
-
-	/* Configure device structure for Counter */
-	device_state->dev.type = &counter_device_type;
-	device_state->dev.bus = &counter_bus_type;
-	if (counter->parent) {
-		device_state->dev.parent = counter->parent;
-		device_state->dev.of_node = counter->parent->of_node;
-	}
-	dev_set_name(&device_state->dev, "counter%d", device_state->id);
-	device_initialize(&device_state->dev);
-	dev_set_drvdata(&device_state->dev, counter);
-
-	/* Prepare device attributes */
-	err = counter_device_groups_list_prepare(counter);
-	if (err)
-		goto err_free_id;
-
-	/* Organize device attributes to groups and match to device */
-	err = counter_device_groups_prepare(device_state);
-	if (err)
-		goto err_free_groups_list;
-
-	/* Add device to system */
-	err = device_add(&device_state->dev);
-	if (err)
-		goto err_free_groups;
-
-	return 0;
-
-err_free_groups:
-	kfree(device_state->groups);
-err_free_groups_list:
-	counter_device_groups_list_free(device_state->groups_list,
-					device_state->num_groups);
-err_free_id:
-	ida_simple_remove(&counter_ida, device_state->id);
-err_free_device_state:
-	kfree(device_state);
-	return err;
-}
-EXPORT_SYMBOL_GPL(counter_register);
-
-/**
- * counter_unregister - unregister Counter from the system
- * @counter:	pointer to Counter to unregister
- *
- * The Counter is unregistered from the system; all allocated memory is freed.
- */
-void counter_unregister(struct counter_device *const counter)
-{
-	if (counter)
-		device_del(&counter->device_state->dev);
-}
-EXPORT_SYMBOL_GPL(counter_unregister);
-
-static void devm_counter_unreg(struct device *dev, void *res)
-{
-	counter_unregister(*(struct counter_device **)res);
-}
-
-/**
- * devm_counter_register - Resource-managed counter_register
- * @dev:	device to allocate counter_device for
- * @counter:	pointer to Counter to register
- *
- * Managed counter_register. The Counter registered with this function is
- * automatically unregistered on driver detach. This function calls
- * counter_register internally. Refer to that function for more information.
- *
- * If an Counter registered with this function needs to be unregistered
- * separately, devm_counter_unregister must be used.
- *
- * RETURNS:
- * 0 on success, negative error number on failure.
- */
-int devm_counter_register(struct device *dev,
-			  struct counter_device *const counter)
-{
-	struct counter_device **ptr;
-	int ret;
-
-	ptr = devres_alloc(devm_counter_unreg, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return -ENOMEM;
-
-	ret = counter_register(counter);
-	if (!ret) {
-		*ptr = counter;
-		devres_add(dev, ptr);
-	} else {
-		devres_free(ptr);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(devm_counter_register);
-
-static int devm_counter_match(struct device *dev, void *res, void *data)
-{
-	struct counter_device **r = res;
-
-	if (!r || !*r) {
-		WARN_ON(!r || !*r);
-		return 0;
-	}
-
-	return *r == data;
-}
-
-/**
- * devm_counter_unregister - Resource-managed counter_unregister
- * @dev:	device this counter_device belongs to
- * @counter:	pointer to Counter associated with the device
- *
- * Unregister Counter registered with devm_counter_register.
- */
-void devm_counter_unregister(struct device *dev,
-			     struct counter_device *const counter)
-{
-	int rc;
-
-	rc = devres_release(dev, devm_counter_unreg, devm_counter_match,
-			    counter);
-	WARN_ON(rc);
-}
-EXPORT_SYMBOL_GPL(devm_counter_unregister);
-
-static int __init counter_init(void)
-{
-	return bus_register(&counter_bus_type);
-}
-
-static void __exit counter_exit(void)
-{
-	bus_unregister(&counter_bus_type);
-}
-
-subsys_initcall(counter_init);
-module_exit(counter_exit);
-
-MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
-MODULE_DESCRIPTION("Generic Counter interface");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/counter/ftm-quaddec.c b/drivers/counter/ftm-quaddec.c
index 53c15f84909b..5ef0478709cd 100644
--- a/drivers/counter/ftm-quaddec.c
+++ b/drivers/counter/ftm-quaddec.c
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/counter.h>
 #include <linux/bitfield.h>
+#include <linux/types.h>
 
 #define FTM_FIELD_UPDATE(ftm, offset, mask, val)			\
 	({								\
@@ -115,8 +116,7 @@ static void ftm_quaddec_disable(void *ftm)
 }
 
 static int ftm_quaddec_get_prescaler(struct counter_device *counter,
-				     struct counter_count *count,
-				     size_t *cnt_mode)
+				     struct counter_count *count, u32 *cnt_mode)
 {
 	struct ftm_quaddec *ftm = counter->priv;
 	uint32_t scflags;
@@ -129,8 +129,7 @@ static int ftm_quaddec_get_prescaler(struct counter_device *counter,
 }
 
 static int ftm_quaddec_set_prescaler(struct counter_device *counter,
-				     struct counter_count *count,
-				     size_t cnt_mode)
+				     struct counter_count *count, u32 cnt_mode)
 {
 	struct ftm_quaddec *ftm = counter->priv;
 
@@ -151,33 +150,17 @@ static const char * const ftm_quaddec_prescaler[] = {
 	"1", "2", "4", "8", "16", "32", "64", "128"
 };
 
-static struct counter_count_enum_ext ftm_quaddec_prescaler_enum = {
-	.items = ftm_quaddec_prescaler,
-	.num_items = ARRAY_SIZE(ftm_quaddec_prescaler),
-	.get = ftm_quaddec_get_prescaler,
-	.set = ftm_quaddec_set_prescaler
-};
-
-enum ftm_quaddec_synapse_action {
-	FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES,
-};
-
 static const enum counter_synapse_action ftm_quaddec_synapse_actions[] = {
-	[FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES] =
 	COUNTER_SYNAPSE_ACTION_BOTH_EDGES
 };
 
-enum ftm_quaddec_count_function {
-	FTM_QUADDEC_COUNT_ENCODER_MODE_1,
-};
-
 static const enum counter_function ftm_quaddec_count_functions[] = {
-	[FTM_QUADDEC_COUNT_ENCODER_MODE_1] = COUNTER_FUNCTION_QUADRATURE_X4
+	COUNTER_FUNCTION_QUADRATURE_X4
 };
 
 static int ftm_quaddec_count_read(struct counter_device *counter,
 				  struct counter_count *count,
-				  unsigned long *val)
+				  u64 *val)
 {
 	struct ftm_quaddec *const ftm = counter->priv;
 	uint32_t cntval;
@@ -191,7 +174,7 @@ static int ftm_quaddec_count_read(struct counter_device *counter,
 
 static int ftm_quaddec_count_write(struct counter_device *counter,
 				   struct counter_count *count,
-				   const unsigned long val)
+				   const u64 val)
 {
 	struct ftm_quaddec *const ftm = counter->priv;
 
@@ -205,21 +188,21 @@ static int ftm_quaddec_count_write(struct counter_device *counter,
 	return 0;
 }
 
-static int ftm_quaddec_count_function_get(struct counter_device *counter,
-					  struct counter_count *count,
-					  size_t *function)
+static int ftm_quaddec_count_function_read(struct counter_device *counter,
+					   struct counter_count *count,
+					   enum counter_function *function)
 {
-	*function = FTM_QUADDEC_COUNT_ENCODER_MODE_1;
+	*function = COUNTER_FUNCTION_QUADRATURE_X4;
 
 	return 0;
 }
 
-static int ftm_quaddec_action_get(struct counter_device *counter,
-				  struct counter_count *count,
-				  struct counter_synapse *synapse,
-				  size_t *action)
+static int ftm_quaddec_action_read(struct counter_device *counter,
+				   struct counter_count *count,
+				   struct counter_synapse *synapse,
+				   enum counter_synapse_action *action)
 {
-	*action = FTM_QUADDEC_SYNAPSE_ACTION_BOTH_EDGES;
+	*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 
 	return 0;
 }
@@ -227,8 +210,8 @@ static int ftm_quaddec_action_get(struct counter_device *counter,
 static const struct counter_ops ftm_quaddec_cnt_ops = {
 	.count_read = ftm_quaddec_count_read,
 	.count_write = ftm_quaddec_count_write,
-	.function_get = ftm_quaddec_count_function_get,
-	.action_get = ftm_quaddec_action_get,
+	.function_read = ftm_quaddec_count_function_read,
+	.action_read = ftm_quaddec_action_read,
 };
 
 static struct counter_signal ftm_quaddec_signals[] = {
@@ -255,9 +238,12 @@ static struct counter_synapse ftm_quaddec_count_synapses[] = {
 	}
 };
 
-static const struct counter_count_ext ftm_quaddec_count_ext[] = {
-	COUNTER_COUNT_ENUM("prescaler", &ftm_quaddec_prescaler_enum),
-	COUNTER_COUNT_ENUM_AVAILABLE("prescaler", &ftm_quaddec_prescaler_enum),
+static DEFINE_COUNTER_ENUM(ftm_quaddec_prescaler_enum, ftm_quaddec_prescaler);
+
+static struct counter_comp ftm_quaddec_count_ext[] = {
+	COUNTER_COMP_COUNT_ENUM("prescaler", ftm_quaddec_get_prescaler,
+				ftm_quaddec_set_prescaler,
+				ftm_quaddec_prescaler_enum),
 };
 
 static struct counter_count ftm_quaddec_counts = {
diff --git a/drivers/counter/intel-qep.c b/drivers/counter/intel-qep.c
index 8a6847d5fb2b..0924d16de6e2 100644
--- a/drivers/counter/intel-qep.c
+++ b/drivers/counter/intel-qep.c
@@ -62,13 +62,6 @@
 
 #define INTEL_QEP_CLK_PERIOD_NS		10
 
-#define INTEL_QEP_COUNTER_EXT_RW(_name)				\
-{								\
-	.name = #_name,						\
-	.read = _name##_read,					\
-	.write = _name##_write,					\
-}
-
 struct intel_qep {
 	struct counter_device counter;
 	struct mutex lock;
@@ -114,8 +107,7 @@ static void intel_qep_init(struct intel_qep *qep)
 }
 
 static int intel_qep_count_read(struct counter_device *counter,
-				struct counter_count *count,
-				unsigned long *val)
+				struct counter_count *count, u64 *val)
 {
 	struct intel_qep *const qep = counter->priv;
 
@@ -130,11 +122,11 @@ static const enum counter_function intel_qep_count_functions[] = {
 	COUNTER_FUNCTION_QUADRATURE_X4,
 };
 
-static int intel_qep_function_get(struct counter_device *counter,
-				  struct counter_count *count,
-				  size_t *function)
+static int intel_qep_function_read(struct counter_device *counter,
+				   struct counter_count *count,
+				   enum counter_function *function)
 {
-	*function = 0;
+	*function = COUNTER_FUNCTION_QUADRATURE_X4;
 
 	return 0;
 }
@@ -143,19 +135,19 @@ static const enum counter_synapse_action intel_qep_synapse_actions[] = {
 	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
 };
 
-static int intel_qep_action_get(struct counter_device *counter,
-				struct counter_count *count,
-				struct counter_synapse *synapse,
-				size_t *action)
+static int intel_qep_action_read(struct counter_device *counter,
+				 struct counter_count *count,
+				 struct counter_synapse *synapse,
+				 enum counter_synapse_action *action)
 {
-	*action = 0;
+	*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 	return 0;
 }
 
 static const struct counter_ops intel_qep_counter_ops = {
 	.count_read = intel_qep_count_read,
-	.function_get = intel_qep_function_get,
-	.action_get = intel_qep_action_get,
+	.function_read = intel_qep_function_read,
+	.action_read = intel_qep_action_read,
 };
 
 #define INTEL_QEP_SIGNAL(_id, _name) {				\
@@ -181,31 +173,27 @@ static struct counter_synapse intel_qep_count_synapses[] = {
 	INTEL_QEP_SYNAPSE(2),
 };
 
-static ssize_t ceiling_read(struct counter_device *counter,
-			    struct counter_count *count,
-			    void *priv, char *buf)
+static int intel_qep_ceiling_read(struct counter_device *counter,
+				  struct counter_count *count, u64 *ceiling)
 {
 	struct intel_qep *qep = counter->priv;
-	u32 reg;
 
 	pm_runtime_get_sync(qep->dev);
-	reg = intel_qep_readl(qep, INTEL_QEPMAX);
+	*ceiling = intel_qep_readl(qep, INTEL_QEPMAX);
 	pm_runtime_put(qep->dev);
 
-	return sysfs_emit(buf, "%u\n", reg);
+	return 0;
 }
 
-static ssize_t ceiling_write(struct counter_device *counter,
-			     struct counter_count *count,
-			     void *priv, const char *buf, size_t len)
+static int intel_qep_ceiling_write(struct counter_device *counter,
+				   struct counter_count *count, u64 max)
 {
 	struct intel_qep *qep = counter->priv;
-	u32 max;
-	int ret;
+	int ret = 0;
 
-	ret = kstrtou32(buf, 0, &max);
-	if (ret < 0)
-		return ret;
+	/* Intel QEP ceiling configuration only supports 32-bit values */
+	if (max != (u32)max)
+		return -ERANGE;
 
 	mutex_lock(&qep->lock);
 	if (qep->enabled) {
@@ -216,34 +204,28 @@ static ssize_t ceiling_write(struct counter_device *counter,
 	pm_runtime_get_sync(qep->dev);
 	intel_qep_writel(qep, INTEL_QEPMAX, max);
 	pm_runtime_put(qep->dev);
-	ret = len;
 
 out:
 	mutex_unlock(&qep->lock);
 	return ret;
 }
 
-static ssize_t enable_read(struct counter_device *counter,
-			   struct counter_count *count,
-			   void *priv, char *buf)
+static int intel_qep_enable_read(struct counter_device *counter,
+				 struct counter_count *count, u8 *enable)
 {
 	struct intel_qep *qep = counter->priv;
 
-	return sysfs_emit(buf, "%u\n", qep->enabled);
+	*enable = qep->enabled;
+
+	return 0;
 }
 
-static ssize_t enable_write(struct counter_device *counter,
-			    struct counter_count *count,
-			    void *priv, const char *buf, size_t len)
+static int intel_qep_enable_write(struct counter_device *counter,
+				  struct counter_count *count, u8 val)
 {
 	struct intel_qep *qep = counter->priv;
 	u32 reg;
-	bool val, changed;
-	int ret;
-
-	ret = kstrtobool(buf, &val);
-	if (ret)
-		return ret;
+	bool changed;
 
 	mutex_lock(&qep->lock);
 	changed = val ^ qep->enabled;
@@ -267,12 +249,12 @@ static ssize_t enable_write(struct counter_device *counter,
 
 out:
 	mutex_unlock(&qep->lock);
-	return len;
+	return 0;
 }
 
-static ssize_t spike_filter_ns_read(struct counter_device *counter,
-				    struct counter_count *count,
-				    void *priv, char *buf)
+static int intel_qep_spike_filter_ns_read(struct counter_device *counter,
+					  struct counter_count *count,
+					  u64 *length)
 {
 	struct intel_qep *qep = counter->priv;
 	u32 reg;
@@ -281,33 +263,31 @@ static ssize_t spike_filter_ns_read(struct counter_device *counter,
 	reg = intel_qep_readl(qep, INTEL_QEPCON);
 	if (!(reg & INTEL_QEPCON_FLT_EN)) {
 		pm_runtime_put(qep->dev);
-		return sysfs_emit(buf, "0\n");
+		return 0;
 	}
 	reg = INTEL_QEPFLT_MAX_COUNT(intel_qep_readl(qep, INTEL_QEPFLT));
 	pm_runtime_put(qep->dev);
 
-	return sysfs_emit(buf, "%u\n", (reg + 2) * INTEL_QEP_CLK_PERIOD_NS);
+	*length = (reg + 2) * INTEL_QEP_CLK_PERIOD_NS;
+
+	return 0;
 }
 
-static ssize_t spike_filter_ns_write(struct counter_device *counter,
-				     struct counter_count *count,
-				     void *priv, const char *buf, size_t len)
+static int intel_qep_spike_filter_ns_write(struct counter_device *counter,
+					   struct counter_count *count,
+					   u64 length)
 {
 	struct intel_qep *qep = counter->priv;
-	u32 reg, length;
+	u32 reg;
 	bool enable;
-	int ret;
-
-	ret = kstrtou32(buf, 0, &length);
-	if (ret < 0)
-		return ret;
+	int ret = 0;
 
 	/*
 	 * Spike filter length is (MAX_COUNT + 2) clock periods.
 	 * Disable filter when userspace writes 0, enable for valid
 	 * nanoseconds values and error out otherwise.
 	 */
-	length /= INTEL_QEP_CLK_PERIOD_NS;
+	do_div(length, INTEL_QEP_CLK_PERIOD_NS);
 	if (length == 0) {
 		enable = false;
 		length = 0;
@@ -336,16 +316,15 @@ static ssize_t spike_filter_ns_write(struct counter_device *counter,
 	intel_qep_writel(qep, INTEL_QEPFLT, length);
 	intel_qep_writel(qep, INTEL_QEPCON, reg);
 	pm_runtime_put(qep->dev);
-	ret = len;
 
 out:
 	mutex_unlock(&qep->lock);
 	return ret;
 }
 
-static ssize_t preset_enable_read(struct counter_device *counter,
-				  struct counter_count *count,
-				  void *priv, char *buf)
+static int intel_qep_preset_enable_read(struct counter_device *counter,
+					struct counter_count *count,
+					u8 *preset_enable)
 {
 	struct intel_qep *qep = counter->priv;
 	u32 reg;
@@ -353,21 +332,18 @@ static ssize_t preset_enable_read(struct counter_device *counter,
 	pm_runtime_get_sync(qep->dev);
 	reg = intel_qep_readl(qep, INTEL_QEPCON);
 	pm_runtime_put(qep->dev);
-	return sysfs_emit(buf, "%u\n", !(reg & INTEL_QEPCON_COUNT_RST_MODE));
+
+	*preset_enable = !(reg & INTEL_QEPCON_COUNT_RST_MODE);
+
+	return 0;
 }
 
-static ssize_t preset_enable_write(struct counter_device *counter,
-				   struct counter_count *count,
-				   void *priv, const char *buf, size_t len)
+static int intel_qep_preset_enable_write(struct counter_device *counter,
+					 struct counter_count *count, u8 val)
 {
 	struct intel_qep *qep = counter->priv;
 	u32 reg;
-	bool val;
-	int ret;
-
-	ret = kstrtobool(buf, &val);
-	if (ret)
-		return ret;
+	int ret = 0;
 
 	mutex_lock(&qep->lock);
 	if (qep->enabled) {
@@ -384,7 +360,6 @@ static ssize_t preset_enable_write(struct counter_device *counter,
 
 	intel_qep_writel(qep, INTEL_QEPCON, reg);
 	pm_runtime_put(qep->dev);
-	ret = len;
 
 out:
 	mutex_unlock(&qep->lock);
@@ -392,11 +367,14 @@ out:
 	return ret;
 }
 
-static const struct counter_count_ext intel_qep_count_ext[] = {
-	INTEL_QEP_COUNTER_EXT_RW(ceiling),
-	INTEL_QEP_COUNTER_EXT_RW(enable),
-	INTEL_QEP_COUNTER_EXT_RW(spike_filter_ns),
-	INTEL_QEP_COUNTER_EXT_RW(preset_enable)
+static struct counter_comp intel_qep_count_ext[] = {
+	COUNTER_COMP_ENABLE(intel_qep_enable_read, intel_qep_enable_write),
+	COUNTER_COMP_CEILING(intel_qep_ceiling_read, intel_qep_ceiling_write),
+	COUNTER_COMP_PRESET_ENABLE(intel_qep_preset_enable_read,
+				   intel_qep_preset_enable_write),
+	COUNTER_COMP_COUNT_U64("spike_filter_ns",
+			       intel_qep_spike_filter_ns_read,
+			       intel_qep_spike_filter_ns_write),
 };
 
 static struct counter_count intel_qep_counter_count[] = {
diff --git a/drivers/counter/interrupt-cnt.c b/drivers/counter/interrupt-cnt.c
index 1de4243db488..8514a87fcbee 100644
--- a/drivers/counter/interrupt-cnt.c
+++ b/drivers/counter/interrupt-cnt.c
@@ -10,6 +10,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/types.h>
 
 #define INTERRUPT_CNT_NAME "interrupt-cnt"
 
@@ -33,30 +34,23 @@ static irqreturn_t interrupt_cnt_isr(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static ssize_t interrupt_cnt_enable_read(struct counter_device *counter,
-					 struct counter_count *count,
-					 void *private, char *buf)
+static int interrupt_cnt_enable_read(struct counter_device *counter,
+				     struct counter_count *count, u8 *enable)
 {
 	struct interrupt_cnt_priv *priv = counter->priv;
 
-	return sysfs_emit(buf, "%d\n", priv->enabled);
+	*enable = priv->enabled;
+
+	return 0;
 }
 
-static ssize_t interrupt_cnt_enable_write(struct counter_device *counter,
-					  struct counter_count *count,
-					  void *private, const char *buf,
-					  size_t len)
+static int interrupt_cnt_enable_write(struct counter_device *counter,
+				      struct counter_count *count, u8 enable)
 {
 	struct interrupt_cnt_priv *priv = counter->priv;
-	bool enable;
-	ssize_t ret;
-
-	ret = kstrtobool(buf, &enable);
-	if (ret)
-		return ret;
 
 	if (priv->enabled == enable)
-		return len;
+		return 0;
 
 	if (enable) {
 		priv->enabled = true;
@@ -66,33 +60,30 @@ static ssize_t interrupt_cnt_enable_write(struct counter_device *counter,
 		priv->enabled = false;
 	}
 
-	return len;
+	return 0;
 }
 
-static const struct counter_count_ext interrupt_cnt_ext[] = {
-	{
-		.name = "enable",
-		.read = interrupt_cnt_enable_read,
-		.write = interrupt_cnt_enable_write,
-	},
+static struct counter_comp interrupt_cnt_ext[] = {
+	COUNTER_COMP_ENABLE(interrupt_cnt_enable_read,
+			    interrupt_cnt_enable_write),
 };
 
 static const enum counter_synapse_action interrupt_cnt_synapse_actions[] = {
 	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
 };
 
-static int interrupt_cnt_action_get(struct counter_device *counter,
-				    struct counter_count *count,
-				    struct counter_synapse *synapse,
-				    size_t *action)
+static int interrupt_cnt_action_read(struct counter_device *counter,
+				     struct counter_count *count,
+				     struct counter_synapse *synapse,
+				     enum counter_synapse_action *action)
 {
-	*action = 0;
+	*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 
 	return 0;
 }
 
 static int interrupt_cnt_read(struct counter_device *counter,
-			      struct counter_count *count, unsigned long *val)
+			      struct counter_count *count, u64 *val)
 {
 	struct interrupt_cnt_priv *priv = counter->priv;
 
@@ -102,8 +93,7 @@ static int interrupt_cnt_read(struct counter_device *counter,
 }
 
 static int interrupt_cnt_write(struct counter_device *counter,
-			       struct counter_count *count,
-			       const unsigned long val)
+			       struct counter_count *count, const u64 val)
 {
 	struct interrupt_cnt_priv *priv = counter->priv;
 
@@ -119,11 +109,11 @@ static const enum counter_function interrupt_cnt_functions[] = {
 	COUNTER_FUNCTION_INCREASE,
 };
 
-static int interrupt_cnt_function_get(struct counter_device *counter,
-				      struct counter_count *count,
-				      size_t *function)
+static int interrupt_cnt_function_read(struct counter_device *counter,
+				       struct counter_count *count,
+				       enum counter_function *function)
 {
-	*function = 0;
+	*function = COUNTER_FUNCTION_INCREASE;
 
 	return 0;
 }
@@ -148,10 +138,10 @@ static int interrupt_cnt_signal_read(struct counter_device *counter,
 }
 
 static const struct counter_ops interrupt_cnt_ops = {
-	.action_get = interrupt_cnt_action_get,
+	.action_read = interrupt_cnt_action_read,
 	.count_read = interrupt_cnt_read,
 	.count_write = interrupt_cnt_write,
-	.function_get = interrupt_cnt_function_get,
+	.function_read = interrupt_cnt_function_read,
 	.signal_read  = interrupt_cnt_signal_read,
 };
 
diff --git a/drivers/counter/microchip-tcb-capture.c b/drivers/counter/microchip-tcb-capture.c
index 1aa70b9c4833..79e0c84a3b81 100644
--- a/drivers/counter/microchip-tcb-capture.c
+++ b/drivers/counter/microchip-tcb-capture.c
@@ -32,28 +32,16 @@ struct mchp_tc_data {
 	bool trig_inverted;
 };
 
-enum mchp_tc_count_function {
-	MCHP_TC_FUNCTION_INCREASE,
-	MCHP_TC_FUNCTION_QUADRATURE,
-};
-
 static const enum counter_function mchp_tc_count_functions[] = {
-	[MCHP_TC_FUNCTION_INCREASE] = COUNTER_FUNCTION_INCREASE,
-	[MCHP_TC_FUNCTION_QUADRATURE] = COUNTER_FUNCTION_QUADRATURE_X4,
-};
-
-enum mchp_tc_synapse_action {
-	MCHP_TC_SYNAPSE_ACTION_NONE = 0,
-	MCHP_TC_SYNAPSE_ACTION_RISING_EDGE,
-	MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE,
-	MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_QUADRATURE_X4,
 };
 
 static const enum counter_synapse_action mchp_tc_synapse_actions[] = {
-	[MCHP_TC_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
-	[MCHP_TC_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	[MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
-	[MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
 };
 
 static struct counter_signal mchp_tc_count_signals[] = {
@@ -80,23 +68,23 @@ static struct counter_synapse mchp_tc_count_synapses[] = {
 	}
 };
 
-static int mchp_tc_count_function_get(struct counter_device *counter,
-				      struct counter_count *count,
-				      size_t *function)
+static int mchp_tc_count_function_read(struct counter_device *counter,
+				       struct counter_count *count,
+				       enum counter_function *function)
 {
 	struct mchp_tc_data *const priv = counter->priv;
 
 	if (priv->qdec_mode)
-		*function = MCHP_TC_FUNCTION_QUADRATURE;
+		*function = COUNTER_FUNCTION_QUADRATURE_X4;
 	else
-		*function = MCHP_TC_FUNCTION_INCREASE;
+		*function = COUNTER_FUNCTION_INCREASE;
 
 	return 0;
 }
 
-static int mchp_tc_count_function_set(struct counter_device *counter,
-				      struct counter_count *count,
-				      size_t function)
+static int mchp_tc_count_function_write(struct counter_device *counter,
+					struct counter_count *count,
+					enum counter_function function)
 {
 	struct mchp_tc_data *const priv = counter->priv;
 	u32 bmr, cmr;
@@ -108,7 +96,7 @@ static int mchp_tc_count_function_set(struct counter_device *counter,
 	cmr &= ~ATMEL_TC_WAVE;
 
 	switch (function) {
-	case MCHP_TC_FUNCTION_INCREASE:
+	case COUNTER_FUNCTION_INCREASE:
 		priv->qdec_mode = 0;
 		/* Set highest rate based on whether soc has gclk or not */
 		bmr &= ~(ATMEL_TC_QDEN | ATMEL_TC_POSEN);
@@ -120,7 +108,7 @@ static int mchp_tc_count_function_set(struct counter_device *counter,
 		cmr |=  ATMEL_TC_CMR_MASK;
 		cmr &= ~(ATMEL_TC_ABETRG | ATMEL_TC_XC0);
 		break;
-	case MCHP_TC_FUNCTION_QUADRATURE:
+	case COUNTER_FUNCTION_QUADRATURE_X4:
 		if (!priv->tc_cfg->has_qdec)
 			return -EINVAL;
 		/* In QDEC mode settings both channels 0 and 1 are required */
@@ -176,10 +164,10 @@ static int mchp_tc_count_signal_read(struct counter_device *counter,
 	return 0;
 }
 
-static int mchp_tc_count_action_get(struct counter_device *counter,
-				    struct counter_count *count,
-				    struct counter_synapse *synapse,
-				    size_t *action)
+static int mchp_tc_count_action_read(struct counter_device *counter,
+				     struct counter_count *count,
+				     struct counter_synapse *synapse,
+				     enum counter_synapse_action *action)
 {
 	struct mchp_tc_data *const priv = counter->priv;
 	u32 cmr;
@@ -188,26 +176,26 @@ static int mchp_tc_count_action_get(struct counter_device *counter,
 
 	switch (cmr & ATMEL_TC_ETRGEDG) {
 	default:
-		*action = MCHP_TC_SYNAPSE_ACTION_NONE;
+		*action = COUNTER_SYNAPSE_ACTION_NONE;
 		break;
 	case ATMEL_TC_ETRGEDG_RISING:
-		*action = MCHP_TC_SYNAPSE_ACTION_RISING_EDGE;
+		*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 		break;
 	case ATMEL_TC_ETRGEDG_FALLING:
-		*action = MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE;
+		*action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
 		break;
 	case ATMEL_TC_ETRGEDG_BOTH:
-		*action = MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE;
+		*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		break;
 	}
 
 	return 0;
 }
 
-static int mchp_tc_count_action_set(struct counter_device *counter,
-				    struct counter_count *count,
-				    struct counter_synapse *synapse,
-				    size_t action)
+static int mchp_tc_count_action_write(struct counter_device *counter,
+				      struct counter_count *count,
+				      struct counter_synapse *synapse,
+				      enum counter_synapse_action action)
 {
 	struct mchp_tc_data *const priv = counter->priv;
 	u32 edge = ATMEL_TC_ETRGEDG_NONE;
@@ -217,16 +205,16 @@ static int mchp_tc_count_action_set(struct counter_device *counter,
 		return -EINVAL;
 
 	switch (action) {
-	case MCHP_TC_SYNAPSE_ACTION_NONE:
+	case COUNTER_SYNAPSE_ACTION_NONE:
 		edge = ATMEL_TC_ETRGEDG_NONE;
 		break;
-	case MCHP_TC_SYNAPSE_ACTION_RISING_EDGE:
+	case COUNTER_SYNAPSE_ACTION_RISING_EDGE:
 		edge = ATMEL_TC_ETRGEDG_RISING;
 		break;
-	case MCHP_TC_SYNAPSE_ACTION_FALLING_EDGE:
+	case COUNTER_SYNAPSE_ACTION_FALLING_EDGE:
 		edge = ATMEL_TC_ETRGEDG_FALLING;
 		break;
-	case MCHP_TC_SYNAPSE_ACTION_BOTH_EDGE:
+	case COUNTER_SYNAPSE_ACTION_BOTH_EDGES:
 		edge = ATMEL_TC_ETRGEDG_BOTH;
 		break;
 	default:
@@ -240,8 +228,7 @@ static int mchp_tc_count_action_set(struct counter_device *counter,
 }
 
 static int mchp_tc_count_read(struct counter_device *counter,
-			      struct counter_count *count,
-			      unsigned long *val)
+			      struct counter_count *count, u64 *val)
 {
 	struct mchp_tc_data *const priv = counter->priv;
 	u32 cnt;
@@ -264,12 +251,12 @@ static struct counter_count mchp_tc_counts[] = {
 };
 
 static const struct counter_ops mchp_tc_ops = {
-	.signal_read  = mchp_tc_count_signal_read,
-	.count_read   = mchp_tc_count_read,
-	.function_get = mchp_tc_count_function_get,
-	.function_set = mchp_tc_count_function_set,
-	.action_get   = mchp_tc_count_action_get,
-	.action_set   = mchp_tc_count_action_set
+	.signal_read    = mchp_tc_count_signal_read,
+	.count_read     = mchp_tc_count_read,
+	.function_read  = mchp_tc_count_function_read,
+	.function_write = mchp_tc_count_function_write,
+	.action_read    = mchp_tc_count_action_read,
+	.action_write   = mchp_tc_count_action_write
 };
 
 static const struct atmel_tcb_config tcb_rm9200_config = {
diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
index 7367f46c6f91..5168833b1fdf 100644
--- a/drivers/counter/stm32-lptimer-cnt.c
+++ b/drivers/counter/stm32-lptimer-cnt.c
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/types.h>
 
 struct stm32_lptim_cnt {
 	struct counter_device counter;
@@ -107,11 +108,7 @@ static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable)
 	return regmap_update_bits(priv->regmap, STM32_LPTIM_CFGR, mask, val);
 }
 
-/**
- * enum stm32_lptim_cnt_function - enumerates LPTimer counter & encoder modes
- * @STM32_LPTIM_COUNTER_INCREASE: up count on IN1 rising, falling or both edges
- * @STM32_LPTIM_ENCODER_BOTH_EDGE: count on both edges (IN1 & IN2 quadrature)
- *
+/*
  * In non-quadrature mode, device counts up on active edge.
  * In quadrature mode, encoder counting scenarios are as follows:
  * +---------+----------+--------------------+--------------------+
@@ -129,33 +126,20 @@ static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable)
  * | edges   | Low  ->  |   Up     |   Down  |   Down   |   Up    |
  * +---------+----------+----------+---------+----------+---------+
  */
-enum stm32_lptim_cnt_function {
-	STM32_LPTIM_COUNTER_INCREASE,
-	STM32_LPTIM_ENCODER_BOTH_EDGE,
-};
-
 static const enum counter_function stm32_lptim_cnt_functions[] = {
-	[STM32_LPTIM_COUNTER_INCREASE] = COUNTER_FUNCTION_INCREASE,
-	[STM32_LPTIM_ENCODER_BOTH_EDGE] = COUNTER_FUNCTION_QUADRATURE_X4,
-};
-
-enum stm32_lptim_synapse_action {
-	STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE = STM32_LPTIM_CKPOL_RISING_EDGE,
-	STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE = STM32_LPTIM_CKPOL_FALLING_EDGE,
-	STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES = STM32_LPTIM_CKPOL_BOTH_EDGES,
-	STM32_LPTIM_SYNAPSE_ACTION_NONE,
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_QUADRATURE_X4,
 };
 
 static const enum counter_synapse_action stm32_lptim_cnt_synapse_actions[] = {
-	/* Index must match with stm32_lptim_cnt_polarity[] (priv->polarity) */
-	[STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE] = COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	[STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE] = COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
-	[STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
-	[STM32_LPTIM_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+	COUNTER_SYNAPSE_ACTION_NONE,
 };
 
 static int stm32_lptim_cnt_read(struct counter_device *counter,
-				struct counter_count *count, unsigned long *val)
+				struct counter_count *count, u64 *val)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 	u32 cnt;
@@ -170,28 +154,28 @@ static int stm32_lptim_cnt_read(struct counter_device *counter,
 	return 0;
 }
 
-static int stm32_lptim_cnt_function_get(struct counter_device *counter,
-					struct counter_count *count,
-					size_t *function)
+static int stm32_lptim_cnt_function_read(struct counter_device *counter,
+					 struct counter_count *count,
+					 enum counter_function *function)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 
 	if (!priv->quadrature_mode) {
-		*function = STM32_LPTIM_COUNTER_INCREASE;
+		*function = COUNTER_FUNCTION_INCREASE;
 		return 0;
 	}
 
-	if (priv->polarity == STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES) {
-		*function = STM32_LPTIM_ENCODER_BOTH_EDGE;
+	if (priv->polarity == STM32_LPTIM_CKPOL_BOTH_EDGES) {
+		*function = COUNTER_FUNCTION_QUADRATURE_X4;
 		return 0;
 	}
 
 	return -EINVAL;
 }
 
-static int stm32_lptim_cnt_function_set(struct counter_device *counter,
-					struct counter_count *count,
-					size_t function)
+static int stm32_lptim_cnt_function_write(struct counter_device *counter,
+					  struct counter_count *count,
+					  enum counter_function function)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 
@@ -199,12 +183,12 @@ static int stm32_lptim_cnt_function_set(struct counter_device *counter,
 		return -EBUSY;
 
 	switch (function) {
-	case STM32_LPTIM_COUNTER_INCREASE:
+	case COUNTER_FUNCTION_INCREASE:
 		priv->quadrature_mode = 0;
 		return 0;
-	case STM32_LPTIM_ENCODER_BOTH_EDGE:
+	case COUNTER_FUNCTION_QUADRATURE_X4:
 		priv->quadrature_mode = 1;
-		priv->polarity = STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES;
+		priv->polarity = STM32_LPTIM_CKPOL_BOTH_EDGES;
 		return 0;
 	default:
 		/* should never reach this path */
@@ -212,9 +196,9 @@ static int stm32_lptim_cnt_function_set(struct counter_device *counter,
 	}
 }
 
-static ssize_t stm32_lptim_cnt_enable_read(struct counter_device *counter,
-					   struct counter_count *count,
-					   void *private, char *buf)
+static int stm32_lptim_cnt_enable_read(struct counter_device *counter,
+				       struct counter_count *count,
+				       u8 *enable)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 	int ret;
@@ -223,22 +207,18 @@ static ssize_t stm32_lptim_cnt_enable_read(struct counter_device *counter,
 	if (ret < 0)
 		return ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", ret);
+	*enable = ret;
+
+	return 0;
 }
 
-static ssize_t stm32_lptim_cnt_enable_write(struct counter_device *counter,
-					    struct counter_count *count,
-					    void *private,
-					    const char *buf, size_t len)
+static int stm32_lptim_cnt_enable_write(struct counter_device *counter,
+					struct counter_count *count,
+					u8 enable)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
-	bool enable;
 	int ret;
 
-	ret = kstrtobool(buf, &enable);
-	if (ret)
-		return ret;
-
 	/* Check nobody uses the timer, or already disabled/enabled */
 	ret = stm32_lptim_is_enabled(priv);
 	if ((ret < 0) || (!ret && !enable))
@@ -254,78 +234,81 @@ static ssize_t stm32_lptim_cnt_enable_write(struct counter_device *counter,
 	if (ret)
 		return ret;
 
-	return len;
+	return 0;
 }
 
-static ssize_t stm32_lptim_cnt_ceiling_read(struct counter_device *counter,
-					    struct counter_count *count,
-					    void *private, char *buf)
+static int stm32_lptim_cnt_ceiling_read(struct counter_device *counter,
+					struct counter_count *count,
+					u64 *ceiling)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
 
-	return snprintf(buf, PAGE_SIZE, "%u\n", priv->ceiling);
+	*ceiling = priv->ceiling;
+
+	return 0;
 }
 
-static ssize_t stm32_lptim_cnt_ceiling_write(struct counter_device *counter,
-					     struct counter_count *count,
-					     void *private,
-					     const char *buf, size_t len)
+static int stm32_lptim_cnt_ceiling_write(struct counter_device *counter,
+					 struct counter_count *count,
+					 u64 ceiling)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
-	unsigned int ceiling;
-	int ret;
 
 	if (stm32_lptim_is_enabled(priv))
 		return -EBUSY;
 
-	ret = kstrtouint(buf, 0, &ceiling);
-	if (ret)
-		return ret;
-
 	if (ceiling > STM32_LPTIM_MAX_ARR)
 		return -ERANGE;
 
 	priv->ceiling = ceiling;
 
-	return len;
+	return 0;
 }
 
-static const struct counter_count_ext stm32_lptim_cnt_ext[] = {
-	{
-		.name = "enable",
-		.read = stm32_lptim_cnt_enable_read,
-		.write = stm32_lptim_cnt_enable_write
-	},
-	{
-		.name = "ceiling",
-		.read = stm32_lptim_cnt_ceiling_read,
-		.write = stm32_lptim_cnt_ceiling_write
-	},
+static struct counter_comp stm32_lptim_cnt_ext[] = {
+	COUNTER_COMP_ENABLE(stm32_lptim_cnt_enable_read,
+			    stm32_lptim_cnt_enable_write),
+	COUNTER_COMP_CEILING(stm32_lptim_cnt_ceiling_read,
+			     stm32_lptim_cnt_ceiling_write),
 };
 
-static int stm32_lptim_cnt_action_get(struct counter_device *counter,
-				      struct counter_count *count,
-				      struct counter_synapse *synapse,
-				      size_t *action)
+static int stm32_lptim_cnt_action_read(struct counter_device *counter,
+				       struct counter_count *count,
+				       struct counter_synapse *synapse,
+				       enum counter_synapse_action *action)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
-	size_t function;
+	enum counter_function function;
 	int err;
 
-	err = stm32_lptim_cnt_function_get(counter, count, &function);
+	err = stm32_lptim_cnt_function_read(counter, count, &function);
 	if (err)
 		return err;
 
 	switch (function) {
-	case STM32_LPTIM_COUNTER_INCREASE:
+	case COUNTER_FUNCTION_INCREASE:
 		/* LP Timer acts as up-counter on input 1 */
-		if (synapse->signal->id == count->synapses[0].signal->id)
-			*action = priv->polarity;
-		else
-			*action = STM32_LPTIM_SYNAPSE_ACTION_NONE;
-		return 0;
-	case STM32_LPTIM_ENCODER_BOTH_EDGE:
-		*action = priv->polarity;
+		if (synapse->signal->id != count->synapses[0].signal->id) {
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
+			return 0;
+		}
+
+		switch (priv->polarity) {
+		case STM32_LPTIM_CKPOL_RISING_EDGE:
+			*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
+			return 0;
+		case STM32_LPTIM_CKPOL_FALLING_EDGE:
+			*action = COUNTER_SYNAPSE_ACTION_FALLING_EDGE;
+			return 0;
+		case STM32_LPTIM_CKPOL_BOTH_EDGES:
+			*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
+			return 0;
+		default:
+			/* should never reach this path */
+			return -EINVAL;
+		}
+	case COUNTER_FUNCTION_QUADRATURE_X4:
+		*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		return 0;
 	default:
 		/* should never reach this path */
@@ -333,43 +316,48 @@ static int stm32_lptim_cnt_action_get(struct counter_device *counter,
 	}
 }
 
-static int stm32_lptim_cnt_action_set(struct counter_device *counter,
-				      struct counter_count *count,
-				      struct counter_synapse *synapse,
-				      size_t action)
+static int stm32_lptim_cnt_action_write(struct counter_device *counter,
+					struct counter_count *count,
+					struct counter_synapse *synapse,
+					enum counter_synapse_action action)
 {
 	struct stm32_lptim_cnt *const priv = counter->priv;
-	size_t function;
+	enum counter_function function;
 	int err;
 
 	if (stm32_lptim_is_enabled(priv))
 		return -EBUSY;
 
-	err = stm32_lptim_cnt_function_get(counter, count, &function);
+	err = stm32_lptim_cnt_function_read(counter, count, &function);
 	if (err)
 		return err;
 
 	/* only set polarity when in counter mode (on input 1) */
-	if (function == STM32_LPTIM_COUNTER_INCREASE
-	    && synapse->signal->id == count->synapses[0].signal->id) {
-		switch (action) {
-		case STM32_LPTIM_SYNAPSE_ACTION_RISING_EDGE:
-		case STM32_LPTIM_SYNAPSE_ACTION_FALLING_EDGE:
-		case STM32_LPTIM_SYNAPSE_ACTION_BOTH_EDGES:
-			priv->polarity = action;
-			return 0;
-		}
-	}
+	if (function != COUNTER_FUNCTION_INCREASE
+	    || synapse->signal->id != count->synapses[0].signal->id)
+		return -EINVAL;
 
-	return -EINVAL;
+	switch (action) {
+	case COUNTER_SYNAPSE_ACTION_RISING_EDGE:
+		priv->polarity = STM32_LPTIM_CKPOL_RISING_EDGE;
+		return 0;
+	case COUNTER_SYNAPSE_ACTION_FALLING_EDGE:
+		priv->polarity = STM32_LPTIM_CKPOL_FALLING_EDGE;
+		return 0;
+	case COUNTER_SYNAPSE_ACTION_BOTH_EDGES:
+		priv->polarity = STM32_LPTIM_CKPOL_BOTH_EDGES;
+		return 0;
+	default:
+		return -EINVAL;
+	}
 }
 
 static const struct counter_ops stm32_lptim_cnt_ops = {
 	.count_read = stm32_lptim_cnt_read,
-	.function_get = stm32_lptim_cnt_function_get,
-	.function_set = stm32_lptim_cnt_function_set,
-	.action_get = stm32_lptim_cnt_action_get,
-	.action_set = stm32_lptim_cnt_action_set,
+	.function_read = stm32_lptim_cnt_function_read,
+	.function_write = stm32_lptim_cnt_function_write,
+	.action_read = stm32_lptim_cnt_action_read,
+	.action_write = stm32_lptim_cnt_action_write,
 };
 
 static struct counter_signal stm32_lptim_cnt_signals[] = {
diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c
index 1fbc46f4ee66..0546e932db0c 100644
--- a/drivers/counter/stm32-timer-cnt.c
+++ b/drivers/counter/stm32-timer-cnt.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/platform_device.h>
+#include <linux/types.h>
 
 #define TIM_CCMR_CCXS	(BIT(8) | BIT(0))
 #define TIM_CCMR_MASK	(TIM_CCMR_CC1S | TIM_CCMR_CC2S | \
@@ -36,29 +37,15 @@ struct stm32_timer_cnt {
 	struct stm32_timer_regs bak;
 };
 
-/**
- * enum stm32_count_function - enumerates stm32 timer counter encoder modes
- * @STM32_COUNT_SLAVE_MODE_DISABLED: counts on internal clock when CEN=1
- * @STM32_COUNT_ENCODER_MODE_1: counts TI1FP1 edges, depending on TI2FP2 level
- * @STM32_COUNT_ENCODER_MODE_2: counts TI2FP2 edges, depending on TI1FP1 level
- * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges
- */
-enum stm32_count_function {
-	STM32_COUNT_SLAVE_MODE_DISABLED,
-	STM32_COUNT_ENCODER_MODE_1,
-	STM32_COUNT_ENCODER_MODE_2,
-	STM32_COUNT_ENCODER_MODE_3,
-};
-
 static const enum counter_function stm32_count_functions[] = {
-	[STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_FUNCTION_INCREASE,
-	[STM32_COUNT_ENCODER_MODE_1] = COUNTER_FUNCTION_QUADRATURE_X2_A,
-	[STM32_COUNT_ENCODER_MODE_2] = COUNTER_FUNCTION_QUADRATURE_X2_B,
-	[STM32_COUNT_ENCODER_MODE_3] = COUNTER_FUNCTION_QUADRATURE_X4,
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_QUADRATURE_X2_A,
+	COUNTER_FUNCTION_QUADRATURE_X2_B,
+	COUNTER_FUNCTION_QUADRATURE_X4,
 };
 
 static int stm32_count_read(struct counter_device *counter,
-			    struct counter_count *count, unsigned long *val)
+			    struct counter_count *count, u64 *val)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 cnt;
@@ -70,8 +57,7 @@ static int stm32_count_read(struct counter_device *counter,
 }
 
 static int stm32_count_write(struct counter_device *counter,
-			     struct counter_count *count,
-			     const unsigned long val)
+			     struct counter_count *count, const u64 val)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 ceiling;
@@ -83,9 +69,9 @@ static int stm32_count_write(struct counter_device *counter,
 	return regmap_write(priv->regmap, TIM_CNT, val);
 }
 
-static int stm32_count_function_get(struct counter_device *counter,
-				    struct counter_count *count,
-				    size_t *function)
+static int stm32_count_function_read(struct counter_device *counter,
+				     struct counter_count *count,
+				     enum counter_function *function)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 smcr;
@@ -94,40 +80,40 @@ static int stm32_count_function_get(struct counter_device *counter,
 
 	switch (smcr & TIM_SMCR_SMS) {
 	case TIM_SMCR_SMS_SLAVE_MODE_DISABLED:
-		*function = STM32_COUNT_SLAVE_MODE_DISABLED;
+		*function = COUNTER_FUNCTION_INCREASE;
 		return 0;
 	case TIM_SMCR_SMS_ENCODER_MODE_1:
-		*function = STM32_COUNT_ENCODER_MODE_1;
+		*function = COUNTER_FUNCTION_QUADRATURE_X2_A;
 		return 0;
 	case TIM_SMCR_SMS_ENCODER_MODE_2:
-		*function = STM32_COUNT_ENCODER_MODE_2;
+		*function = COUNTER_FUNCTION_QUADRATURE_X2_B;
 		return 0;
 	case TIM_SMCR_SMS_ENCODER_MODE_3:
-		*function = STM32_COUNT_ENCODER_MODE_3;
+		*function = COUNTER_FUNCTION_QUADRATURE_X4;
 		return 0;
 	default:
 		return -EINVAL;
 	}
 }
 
-static int stm32_count_function_set(struct counter_device *counter,
-				    struct counter_count *count,
-				    size_t function)
+static int stm32_count_function_write(struct counter_device *counter,
+				      struct counter_count *count,
+				      enum counter_function function)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 cr1, sms;
 
 	switch (function) {
-	case STM32_COUNT_SLAVE_MODE_DISABLED:
+	case COUNTER_FUNCTION_INCREASE:
 		sms = TIM_SMCR_SMS_SLAVE_MODE_DISABLED;
 		break;
-	case STM32_COUNT_ENCODER_MODE_1:
+	case COUNTER_FUNCTION_QUADRATURE_X2_A:
 		sms = TIM_SMCR_SMS_ENCODER_MODE_1;
 		break;
-	case STM32_COUNT_ENCODER_MODE_2:
+	case COUNTER_FUNCTION_QUADRATURE_X2_B:
 		sms = TIM_SMCR_SMS_ENCODER_MODE_2;
 		break;
-	case STM32_COUNT_ENCODER_MODE_3:
+	case COUNTER_FUNCTION_QUADRATURE_X4:
 		sms = TIM_SMCR_SMS_ENCODER_MODE_3;
 		break;
 	default:
@@ -150,44 +136,37 @@ static int stm32_count_function_set(struct counter_device *counter,
 	return 0;
 }
 
-static ssize_t stm32_count_direction_read(struct counter_device *counter,
+static int stm32_count_direction_read(struct counter_device *counter,
 				      struct counter_count *count,
-				      void *private, char *buf)
+				      enum counter_count_direction *direction)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
-	const char *direction;
 	u32 cr1;
 
 	regmap_read(priv->regmap, TIM_CR1, &cr1);
-	direction = (cr1 & TIM_CR1_DIR) ? "backward" : "forward";
+	*direction = (cr1 & TIM_CR1_DIR) ? COUNTER_COUNT_DIRECTION_BACKWARD :
+		COUNTER_COUNT_DIRECTION_FORWARD;
 
-	return scnprintf(buf, PAGE_SIZE, "%s\n", direction);
+	return 0;
 }
 
-static ssize_t stm32_count_ceiling_read(struct counter_device *counter,
-					struct counter_count *count,
-					void *private, char *buf)
+static int stm32_count_ceiling_read(struct counter_device *counter,
+				    struct counter_count *count, u64 *ceiling)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 arr;
 
 	regmap_read(priv->regmap, TIM_ARR, &arr);
 
-	return snprintf(buf, PAGE_SIZE, "%u\n", arr);
+	*ceiling = arr;
+
+	return 0;
 }
 
-static ssize_t stm32_count_ceiling_write(struct counter_device *counter,
-					 struct counter_count *count,
-					 void *private,
-					 const char *buf, size_t len)
+static int stm32_count_ceiling_write(struct counter_device *counter,
+				     struct counter_count *count, u64 ceiling)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
-	unsigned int ceiling;
-	int ret;
-
-	ret = kstrtouint(buf, 0, &ceiling);
-	if (ret)
-		return ret;
 
 	if (ceiling > priv->max_arr)
 		return -ERANGE;
@@ -196,34 +175,27 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter,
 	regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0);
 	regmap_write(priv->regmap, TIM_ARR, ceiling);
 
-	return len;
+	return 0;
 }
 
-static ssize_t stm32_count_enable_read(struct counter_device *counter,
-				       struct counter_count *count,
-				       void *private, char *buf)
+static int stm32_count_enable_read(struct counter_device *counter,
+				   struct counter_count *count, u8 *enable)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
 	u32 cr1;
 
 	regmap_read(priv->regmap, TIM_CR1, &cr1);
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)(cr1 & TIM_CR1_CEN));
+	*enable = cr1 & TIM_CR1_CEN;
+
+	return 0;
 }
 
-static ssize_t stm32_count_enable_write(struct counter_device *counter,
-					struct counter_count *count,
-					void *private,
-					const char *buf, size_t len)
+static int stm32_count_enable_write(struct counter_device *counter,
+				    struct counter_count *count, u8 enable)
 {
 	struct stm32_timer_cnt *const priv = counter->priv;
-	int err;
 	u32 cr1;
-	bool enable;
-
-	err = kstrtobool(buf, &enable);
-	if (err)
-		return err;
 
 	if (enable) {
 		regmap_read(priv->regmap, TIM_CR1, &cr1);
@@ -242,70 +214,55 @@ static ssize_t stm32_count_enable_write(struct counter_device *counter,
 	/* Keep enabled state to properly handle low power states */
 	priv->enabled = enable;
 
-	return len;
+	return 0;
 }
 
-static const struct counter_count_ext stm32_count_ext[] = {
-	{
-		.name = "direction",
-		.read = stm32_count_direction_read,
-	},
-	{
-		.name = "enable",
-		.read = stm32_count_enable_read,
-		.write = stm32_count_enable_write
-	},
-	{
-		.name = "ceiling",
-		.read = stm32_count_ceiling_read,
-		.write = stm32_count_ceiling_write
-	},
-};
-
-enum stm32_synapse_action {
-	STM32_SYNAPSE_ACTION_NONE,
-	STM32_SYNAPSE_ACTION_BOTH_EDGES
+static struct counter_comp stm32_count_ext[] = {
+	COUNTER_COMP_DIRECTION(stm32_count_direction_read),
+	COUNTER_COMP_ENABLE(stm32_count_enable_read, stm32_count_enable_write),
+	COUNTER_COMP_CEILING(stm32_count_ceiling_read,
+			     stm32_count_ceiling_write),
 };
 
 static const enum counter_synapse_action stm32_synapse_actions[] = {
-	[STM32_SYNAPSE_ACTION_NONE] = COUNTER_SYNAPSE_ACTION_NONE,
-	[STM32_SYNAPSE_ACTION_BOTH_EDGES] = COUNTER_SYNAPSE_ACTION_BOTH_EDGES
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES
 };
 
-static int stm32_action_get(struct counter_device *counter,
-			    struct counter_count *count,
-			    struct counter_synapse *synapse,
-			    size_t *action)
+static int stm32_action_read(struct counter_device *counter,
+			     struct counter_count *count,
+			     struct counter_synapse *synapse,
+			     enum counter_synapse_action *action)
 {
-	size_t function;
+	enum counter_function function;
 	int err;
 
-	err = stm32_count_function_get(counter, count, &function);
+	err = stm32_count_function_read(counter, count, &function);
 	if (err)
 		return err;
 
 	switch (function) {
-	case STM32_COUNT_SLAVE_MODE_DISABLED:
+	case COUNTER_FUNCTION_INCREASE:
 		/* counts on internal clock when CEN=1 */
-		*action = STM32_SYNAPSE_ACTION_NONE;
+		*action = COUNTER_SYNAPSE_ACTION_NONE;
 		return 0;
-	case STM32_COUNT_ENCODER_MODE_1:
+	case COUNTER_FUNCTION_QUADRATURE_X2_A:
 		/* counts up/down on TI1FP1 edge depending on TI2FP2 level */
 		if (synapse->signal->id == count->synapses[0].signal->id)
-			*action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
+			*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		else
-			*action = STM32_SYNAPSE_ACTION_NONE;
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
 		return 0;
-	case STM32_COUNT_ENCODER_MODE_2:
+	case COUNTER_FUNCTION_QUADRATURE_X2_B:
 		/* counts up/down on TI2FP2 edge depending on TI1FP1 level */
 		if (synapse->signal->id == count->synapses[1].signal->id)
-			*action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
+			*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		else
-			*action = STM32_SYNAPSE_ACTION_NONE;
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
 		return 0;
-	case STM32_COUNT_ENCODER_MODE_3:
+	case COUNTER_FUNCTION_QUADRATURE_X4:
 		/* counts up/down on both TI1FP1 and TI2FP2 edges */
-		*action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
+		*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		return 0;
 	default:
 		return -EINVAL;
@@ -315,9 +272,9 @@ static int stm32_action_get(struct counter_device *counter,
 static const struct counter_ops stm32_timer_cnt_ops = {
 	.count_read = stm32_count_read,
 	.count_write = stm32_count_write,
-	.function_get = stm32_count_function_get,
-	.function_set = stm32_count_function_set,
-	.action_get = stm32_action_get,
+	.function_read = stm32_count_function_read,
+	.function_write = stm32_count_function_write,
+	.action_read = stm32_action_read,
 };
 
 static struct counter_signal stm32_signals[] = {
diff --git a/drivers/counter/ti-eqep.c b/drivers/counter/ti-eqep.c
index 94fe58bb3eab..09817c953f9a 100644
--- a/drivers/counter/ti-eqep.c
+++ b/drivers/counter/ti-eqep.c
@@ -13,6 +13,7 @@
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
+#include <linux/types.h>
 
 /* 32-bit registers */
 #define QPOSCNT		0x0
@@ -73,19 +74,13 @@ enum {
 };
 
 /* Position Counter Input Modes */
-enum {
+enum ti_eqep_count_func {
 	TI_EQEP_COUNT_FUNC_QUAD_COUNT,
 	TI_EQEP_COUNT_FUNC_DIR_COUNT,
 	TI_EQEP_COUNT_FUNC_UP_COUNT,
 	TI_EQEP_COUNT_FUNC_DOWN_COUNT,
 };
 
-enum {
-	TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES,
-	TI_EQEP_SYNAPSE_ACTION_RISING_EDGE,
-	TI_EQEP_SYNAPSE_ACTION_NONE,
-};
-
 struct ti_eqep_cnt {
 	struct counter_device counter;
 	struct regmap *regmap32;
@@ -93,7 +88,7 @@ struct ti_eqep_cnt {
 };
 
 static int ti_eqep_count_read(struct counter_device *counter,
-			      struct counter_count *count, unsigned long *val)
+			      struct counter_count *count, u64 *val)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
 	u32 cnt;
@@ -105,7 +100,7 @@ static int ti_eqep_count_read(struct counter_device *counter,
 }
 
 static int ti_eqep_count_write(struct counter_device *counter,
-			       struct counter_count *count, unsigned long val)
+			       struct counter_count *count, u64 val)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
 	u32 max;
@@ -117,64 +112,100 @@ static int ti_eqep_count_write(struct counter_device *counter,
 	return regmap_write(priv->regmap32, QPOSCNT, val);
 }
 
-static int ti_eqep_function_get(struct counter_device *counter,
-				struct counter_count *count, size_t *function)
+static int ti_eqep_function_read(struct counter_device *counter,
+				 struct counter_count *count,
+				 enum counter_function *function)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
 	u32 qdecctl;
 
 	regmap_read(priv->regmap16, QDECCTL, &qdecctl);
-	*function = (qdecctl & QDECCTL_QSRC) >> QDECCTL_QSRC_SHIFT;
+
+	switch ((qdecctl & QDECCTL_QSRC) >> QDECCTL_QSRC_SHIFT) {
+	case TI_EQEP_COUNT_FUNC_QUAD_COUNT:
+		*function = COUNTER_FUNCTION_QUADRATURE_X4;
+		break;
+	case TI_EQEP_COUNT_FUNC_DIR_COUNT:
+		*function = COUNTER_FUNCTION_PULSE_DIRECTION;
+		break;
+	case TI_EQEP_COUNT_FUNC_UP_COUNT:
+		*function = COUNTER_FUNCTION_INCREASE;
+		break;
+	case TI_EQEP_COUNT_FUNC_DOWN_COUNT:
+		*function = COUNTER_FUNCTION_DECREASE;
+		break;
+	}
 
 	return 0;
 }
 
-static int ti_eqep_function_set(struct counter_device *counter,
-				struct counter_count *count, size_t function)
+static int ti_eqep_function_write(struct counter_device *counter,
+				  struct counter_count *count,
+				  enum counter_function function)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
+	enum ti_eqep_count_func qsrc;
+
+	switch (function) {
+	case COUNTER_FUNCTION_QUADRATURE_X4:
+		qsrc = TI_EQEP_COUNT_FUNC_QUAD_COUNT;
+		break;
+	case COUNTER_FUNCTION_PULSE_DIRECTION:
+		qsrc = TI_EQEP_COUNT_FUNC_DIR_COUNT;
+		break;
+	case COUNTER_FUNCTION_INCREASE:
+		qsrc = TI_EQEP_COUNT_FUNC_UP_COUNT;
+		break;
+	case COUNTER_FUNCTION_DECREASE:
+		qsrc = TI_EQEP_COUNT_FUNC_DOWN_COUNT;
+		break;
+	default:
+		/* should never reach this path */
+		return -EINVAL;
+	}
 
 	return regmap_write_bits(priv->regmap16, QDECCTL, QDECCTL_QSRC,
-				 function << QDECCTL_QSRC_SHIFT);
+				 qsrc << QDECCTL_QSRC_SHIFT);
 }
 
-static int ti_eqep_action_get(struct counter_device *counter,
-			      struct counter_count *count,
-			      struct counter_synapse *synapse, size_t *action)
+static int ti_eqep_action_read(struct counter_device *counter,
+			       struct counter_count *count,
+			       struct counter_synapse *synapse,
+			       enum counter_synapse_action *action)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
-	size_t function;
+	enum counter_function function;
 	u32 qdecctl;
 	int err;
 
-	err = ti_eqep_function_get(counter, count, &function);
+	err = ti_eqep_function_read(counter, count, &function);
 	if (err)
 		return err;
 
 	switch (function) {
-	case TI_EQEP_COUNT_FUNC_QUAD_COUNT:
+	case COUNTER_FUNCTION_QUADRATURE_X4:
 		/* In quadrature mode, the rising and falling edge of both
 		 * QEPA and QEPB trigger QCLK.
 		 */
-		*action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
+		*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 		return 0;
-	case TI_EQEP_COUNT_FUNC_DIR_COUNT:
+	case COUNTER_FUNCTION_PULSE_DIRECTION:
 		/* In direction-count mode only rising edge of QEPA is counted
 		 * and QEPB gives direction.
 		 */
 		switch (synapse->signal->id) {
 		case TI_EQEP_SIGNAL_QEPA:
-			*action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
+			*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 			return 0;
 		case TI_EQEP_SIGNAL_QEPB:
-			*action = TI_EQEP_SYNAPSE_ACTION_NONE;
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
 			return 0;
 		default:
 			/* should never reach this path */
 			return -EINVAL;
 		}
-	case TI_EQEP_COUNT_FUNC_UP_COUNT:
-	case TI_EQEP_COUNT_FUNC_DOWN_COUNT:
+	case COUNTER_FUNCTION_INCREASE:
+	case COUNTER_FUNCTION_DECREASE:
 		/* In up/down-count modes only QEPA is counted and QEPB is not
 		 * used.
 		 */
@@ -185,12 +216,12 @@ static int ti_eqep_action_get(struct counter_device *counter,
 				return err;
 
 			if (qdecctl & QDECCTL_XCR)
-				*action = TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES;
+				*action = COUNTER_SYNAPSE_ACTION_BOTH_EDGES;
 			else
-				*action = TI_EQEP_SYNAPSE_ACTION_RISING_EDGE;
+				*action = COUNTER_SYNAPSE_ACTION_RISING_EDGE;
 			return 0;
 		case TI_EQEP_SIGNAL_QEPB:
-			*action = TI_EQEP_SYNAPSE_ACTION_NONE;
+			*action = COUNTER_SYNAPSE_ACTION_NONE;
 			return 0;
 		default:
 			/* should never reach this path */
@@ -205,82 +236,67 @@ static int ti_eqep_action_get(struct counter_device *counter,
 static const struct counter_ops ti_eqep_counter_ops = {
 	.count_read	= ti_eqep_count_read,
 	.count_write	= ti_eqep_count_write,
-	.function_get	= ti_eqep_function_get,
-	.function_set	= ti_eqep_function_set,
-	.action_get	= ti_eqep_action_get,
+	.function_read	= ti_eqep_function_read,
+	.function_write	= ti_eqep_function_write,
+	.action_read	= ti_eqep_action_read,
 };
 
-static ssize_t ti_eqep_position_ceiling_read(struct counter_device *counter,
-					     struct counter_count *count,
-					     void *ext_priv, char *buf)
+static int ti_eqep_position_ceiling_read(struct counter_device *counter,
+					 struct counter_count *count,
+					 u64 *ceiling)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
 	u32 qposmax;
 
 	regmap_read(priv->regmap32, QPOSMAX, &qposmax);
 
-	return sprintf(buf, "%u\n", qposmax);
+	*ceiling = qposmax;
+
+	return 0;
 }
 
-static ssize_t ti_eqep_position_ceiling_write(struct counter_device *counter,
-					      struct counter_count *count,
-					      void *ext_priv, const char *buf,
-					      size_t len)
+static int ti_eqep_position_ceiling_write(struct counter_device *counter,
+					  struct counter_count *count,
+					  u64 ceiling)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
-	int err;
-	u32 res;
 
-	err = kstrtouint(buf, 0, &res);
-	if (err < 0)
-		return err;
+	if (ceiling != (u32)ceiling)
+		return -ERANGE;
 
-	regmap_write(priv->regmap32, QPOSMAX, res);
+	regmap_write(priv->regmap32, QPOSMAX, ceiling);
 
-	return len;
+	return 0;
 }
 
-static ssize_t ti_eqep_position_enable_read(struct counter_device *counter,
-					    struct counter_count *count,
-					    void *ext_priv, char *buf)
+static int ti_eqep_position_enable_read(struct counter_device *counter,
+					struct counter_count *count, u8 *enable)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
 	u32 qepctl;
 
 	regmap_read(priv->regmap16, QEPCTL, &qepctl);
 
-	return sprintf(buf, "%u\n", !!(qepctl & QEPCTL_PHEN));
+	*enable = !!(qepctl & QEPCTL_PHEN);
+
+	return 0;
 }
 
-static ssize_t ti_eqep_position_enable_write(struct counter_device *counter,
-					     struct counter_count *count,
-					     void *ext_priv, const char *buf,
-					     size_t len)
+static int ti_eqep_position_enable_write(struct counter_device *counter,
+					 struct counter_count *count, u8 enable)
 {
 	struct ti_eqep_cnt *priv = counter->priv;
-	int err;
-	bool res;
-
-	err = kstrtobool(buf, &res);
-	if (err < 0)
-		return err;
 
-	regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, res ? -1 : 0);
+	regmap_write_bits(priv->regmap16, QEPCTL, QEPCTL_PHEN, enable ? -1 : 0);
 
-	return len;
+	return 0;
 }
 
-static struct counter_count_ext ti_eqep_position_ext[] = {
-	{
-		.name	= "ceiling",
-		.read	= ti_eqep_position_ceiling_read,
-		.write	= ti_eqep_position_ceiling_write,
-	},
-	{
-		.name	= "enable",
-		.read	= ti_eqep_position_enable_read,
-		.write	= ti_eqep_position_enable_write,
-	},
+static struct counter_comp ti_eqep_position_ext[] = {
+	COUNTER_COMP_CEILING(ti_eqep_position_ceiling_read,
+			     ti_eqep_position_ceiling_write),
+	COUNTER_COMP_ENABLE(ti_eqep_position_enable_read,
+			    ti_eqep_position_enable_write),
 };
 
 static struct counter_signal ti_eqep_signals[] = {
@@ -295,16 +311,16 @@ static struct counter_signal ti_eqep_signals[] = {
 };
 
 static const enum counter_function ti_eqep_position_functions[] = {
-	[TI_EQEP_COUNT_FUNC_QUAD_COUNT]	= COUNTER_FUNCTION_QUADRATURE_X4,
-	[TI_EQEP_COUNT_FUNC_DIR_COUNT]	= COUNTER_FUNCTION_PULSE_DIRECTION,
-	[TI_EQEP_COUNT_FUNC_UP_COUNT]	= COUNTER_FUNCTION_INCREASE,
-	[TI_EQEP_COUNT_FUNC_DOWN_COUNT]	= COUNTER_FUNCTION_DECREASE,
+	COUNTER_FUNCTION_QUADRATURE_X4,
+	COUNTER_FUNCTION_PULSE_DIRECTION,
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_DECREASE,
 };
 
 static const enum counter_synapse_action ti_eqep_position_synapse_actions[] = {
-	[TI_EQEP_SYNAPSE_ACTION_BOTH_EDGES]	= COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
-	[TI_EQEP_SYNAPSE_ACTION_RISING_EDGE]	= COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	[TI_EQEP_SYNAPSE_ACTION_NONE]		= COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_NONE,
 };
 
 static struct counter_synapse ti_eqep_position_synapses[] = {
diff --git a/include/linux/counter.h b/include/linux/counter.h
index d16ce2819b48..b69277f5c4c5 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -6,42 +6,184 @@
 #ifndef _COUNTER_H_
 #define _COUNTER_H_
 
-#include <linux/counter_enum.h>
 #include <linux/device.h>
+#include <linux/kernel.h>
 #include <linux/types.h>
 
+struct counter_device;
+struct counter_count;
+struct counter_synapse;
+struct counter_signal;
+
+enum counter_comp_type {
+	COUNTER_COMP_U8,
+	COUNTER_COMP_U64,
+	COUNTER_COMP_BOOL,
+	COUNTER_COMP_SIGNAL_LEVEL,
+	COUNTER_COMP_FUNCTION,
+	COUNTER_COMP_SYNAPSE_ACTION,
+	COUNTER_COMP_ENUM,
+	COUNTER_COMP_COUNT_DIRECTION,
+	COUNTER_COMP_COUNT_MODE,
+};
+
+enum counter_scope {
+	COUNTER_SCOPE_DEVICE,
+	COUNTER_SCOPE_SIGNAL,
+	COUNTER_SCOPE_COUNT,
+};
+
 enum counter_count_direction {
-	COUNTER_COUNT_DIRECTION_FORWARD = 0,
-	COUNTER_COUNT_DIRECTION_BACKWARD
+	COUNTER_COUNT_DIRECTION_FORWARD,
+	COUNTER_COUNT_DIRECTION_BACKWARD,
 };
-extern const char *const counter_count_direction_str[2];
 
 enum counter_count_mode {
-	COUNTER_COUNT_MODE_NORMAL = 0,
+	COUNTER_COUNT_MODE_NORMAL,
 	COUNTER_COUNT_MODE_RANGE_LIMIT,
 	COUNTER_COUNT_MODE_NON_RECYCLE,
-	COUNTER_COUNT_MODE_MODULO_N
+	COUNTER_COUNT_MODE_MODULO_N,
 };
-extern const char *const counter_count_mode_str[4];
 
-struct counter_device;
-struct counter_signal;
+enum counter_function {
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_DECREASE,
+	COUNTER_FUNCTION_PULSE_DIRECTION,
+	COUNTER_FUNCTION_QUADRATURE_X1_A,
+	COUNTER_FUNCTION_QUADRATURE_X1_B,
+	COUNTER_FUNCTION_QUADRATURE_X2_A,
+	COUNTER_FUNCTION_QUADRATURE_X2_B,
+	COUNTER_FUNCTION_QUADRATURE_X4,
+};
+
+enum counter_signal_level {
+	COUNTER_SIGNAL_LEVEL_LOW,
+	COUNTER_SIGNAL_LEVEL_HIGH,
+};
+
+enum counter_synapse_action {
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+};
 
 /**
- * struct counter_signal_ext - Counter Signal extensions
- * @name:	attribute name
- * @read:	read callback for this attribute; may be NULL
- * @write:	write callback for this attribute; may be NULL
- * @priv:	data private to the driver
+ * struct counter_comp - Counter component node
+ * @type:		Counter component data type
+ * @name:		device-specific component name
+ * @priv:		component-relevant data
+ * @action_read		Synapse action mode read callback. The read value of the
+ *			respective Synapse action mode should be passed back via
+ *			the action parameter.
+ * @device_u8_read	Device u8 component read callback. The read value of the
+ *			respective Device u8 component should be passed back via
+ *			the val parameter.
+ * @count_u8_read	Count u8 component read callback. The read value of the
+ *			respective Count u8 component should be passed back via
+ *			the val parameter.
+ * @signal_u8_read	Signal u8 component read callback. The read value of the
+ *			respective Signal u8 component should be passed back via
+ *			the val parameter.
+ * @device_u32_read	Device u32 component read callback. The read value of
+ *			the respective Device u32 component should be passed
+ *			back via the val parameter.
+ * @count_u32_read	Count u32 component read callback. The read value of the
+ *			respective Count u32 component should be passed back via
+ *			the val parameter.
+ * @signal_u32_read	Signal u32 component read callback. The read value of
+ *			the respective Signal u32 component should be passed
+ *			back via the val parameter.
+ * @device_u64_read	Device u64 component read callback. The read value of
+ *			the respective Device u64 component should be passed
+ *			back via the val parameter.
+ * @count_u64_read	Count u64 component read callback. The read value of the
+ *			respective Count u64 component should be passed back via
+ *			the val parameter.
+ * @signal_u64_read	Signal u64 component read callback. The read value of
+ *			the respective Signal u64 component should be passed
+ *			back via the val parameter.
+ * @action_write	Synapse action mode write callback. The write value of
+ *			the respective Synapse action mode is passed via the
+ *			action parameter.
+ * @device_u8_write	Device u8 component write callback. The write value of
+ *			the respective Device u8 component is passed via the val
+ *			parameter.
+ * @count_u8_write	Count u8 component write callback. The write value of
+ *			the respective Count u8 component is passed via the val
+ *			parameter.
+ * @signal_u8_write	Signal u8 component write callback. The write value of
+ *			the respective Signal u8 component is passed via the val
+ *			parameter.
+ * @device_u32_write	Device u32 component write callback. The write value of
+ *			the respective Device u32 component is passed via the
+ *			val parameter.
+ * @count_u32_write	Count u32 component write callback. The write value of
+ *			the respective Count u32 component is passed via the val
+ *			parameter.
+ * @signal_u32_write	Signal u32 component write callback. The write value of
+ *			the respective Signal u32 component is passed via the
+ *			val parameter.
+ * @device_u64_write	Device u64 component write callback. The write value of
+ *			the respective Device u64 component is passed via the
+ *			val parameter.
+ * @count_u64_write	Count u64 component write callback. The write value of
+ *			the respective Count u64 component is passed via the val
+ *			parameter.
+ * @signal_u64_write	Signal u64 component write callback. The write value of
+ *			the respective Signal u64 component is passed via the
+ *			val parameter.
  */
-struct counter_signal_ext {
+struct counter_comp {
+	enum counter_comp_type type;
 	const char *name;
-	ssize_t (*read)(struct counter_device *counter,
-			struct counter_signal *signal, void *priv, char *buf);
-	ssize_t (*write)(struct counter_device *counter,
-			 struct counter_signal *signal, void *priv,
-			 const char *buf, size_t len);
 	void *priv;
+	union {
+		int (*action_read)(struct counter_device *counter,
+				   struct counter_count *count,
+				   struct counter_synapse *synapse,
+				   enum counter_synapse_action *action);
+		int (*device_u8_read)(struct counter_device *counter, u8 *val);
+		int (*count_u8_read)(struct counter_device *counter,
+				     struct counter_count *count, u8 *val);
+		int (*signal_u8_read)(struct counter_device *counter,
+				      struct counter_signal *signal, u8 *val);
+		int (*device_u32_read)(struct counter_device *counter,
+				       u32 *val);
+		int (*count_u32_read)(struct counter_device *counter,
+				      struct counter_count *count, u32 *val);
+		int (*signal_u32_read)(struct counter_device *counter,
+				       struct counter_signal *signal, u32 *val);
+		int (*device_u64_read)(struct counter_device *counter,
+				       u64 *val);
+		int (*count_u64_read)(struct counter_device *counter,
+				      struct counter_count *count, u64 *val);
+		int (*signal_u64_read)(struct counter_device *counter,
+				       struct counter_signal *signal, u64 *val);
+	};
+	union {
+		int (*action_write)(struct counter_device *counter,
+				    struct counter_count *count,
+				    struct counter_synapse *synapse,
+				    enum counter_synapse_action action);
+		int (*device_u8_write)(struct counter_device *counter, u8 val);
+		int (*count_u8_write)(struct counter_device *counter,
+				      struct counter_count *count, u8 val);
+		int (*signal_u8_write)(struct counter_device *counter,
+				       struct counter_signal *signal, u8 val);
+		int (*device_u32_write)(struct counter_device *counter,
+					u32 val);
+		int (*count_u32_write)(struct counter_device *counter,
+				       struct counter_count *count, u32 val);
+		int (*signal_u32_write)(struct counter_device *counter,
+					struct counter_signal *signal, u32 val);
+		int (*device_u64_write)(struct counter_device *counter,
+					u64 val);
+		int (*count_u64_write)(struct counter_device *counter,
+				       struct counter_count *count, u64 val);
+		int (*signal_u64_write)(struct counter_device *counter,
+					struct counter_signal *signal, u64 val);
+	};
 };
 
 /**
@@ -51,248 +193,52 @@ struct counter_signal_ext {
  *		as it appears in the datasheet documentation
  * @ext:	optional array of Counter Signal extensions
  * @num_ext:	number of Counter Signal extensions specified in @ext
- * @priv:	optional private data supplied by driver
  */
 struct counter_signal {
 	int id;
 	const char *name;
 
-	const struct counter_signal_ext *ext;
+	struct counter_comp *ext;
 	size_t num_ext;
-
-	void *priv;
-};
-
-/**
- * struct counter_signal_enum_ext - Signal enum extension attribute
- * @items:	Array of strings
- * @num_items:	Number of items specified in @items
- * @set:	Set callback function; may be NULL
- * @get:	Get callback function; may be NULL
- *
- * The counter_signal_enum_ext structure can be used to implement enum style
- * Signal extension attributes. Enum style attributes are those which have a set
- * of strings that map to unsigned integer values. The Generic Counter Signal
- * enum extension helper code takes care of mapping between value and string, as
- * well as generating a "_available" file which contains a list of all available
- * items. The get callback is used to query the currently active item; the index
- * of the item within the respective items array is returned via the 'item'
- * parameter. The set callback is called when the attribute is updated; the
- * 'item' parameter contains the index of the newly activated item within the
- * respective items array.
- */
-struct counter_signal_enum_ext {
-	const char * const *items;
-	size_t num_items;
-	int (*get)(struct counter_device *counter,
-		   struct counter_signal *signal, size_t *item);
-	int (*set)(struct counter_device *counter,
-		   struct counter_signal *signal, size_t item);
-};
-
-/**
- * COUNTER_SIGNAL_ENUM() - Initialize Signal enum extension
- * @_name:	Attribute name
- * @_e:		Pointer to a counter_signal_enum_ext structure
- *
- * This should usually be used together with COUNTER_SIGNAL_ENUM_AVAILABLE()
- */
-#define COUNTER_SIGNAL_ENUM(_name, _e) \
-{ \
-	.name = (_name), \
-	.read = counter_signal_enum_read, \
-	.write = counter_signal_enum_write, \
-	.priv = (_e) \
-}
-
-/**
- * COUNTER_SIGNAL_ENUM_AVAILABLE() - Initialize Signal enum available extension
- * @_name:	Attribute name ("_available" will be appended to the name)
- * @_e:		Pointer to a counter_signal_enum_ext structure
- *
- * Creates a read only attribute that lists all the available enum items in a
- * newline separated list. This should usually be used together with
- * COUNTER_SIGNAL_ENUM()
- */
-#define COUNTER_SIGNAL_ENUM_AVAILABLE(_name, _e) \
-{ \
-	.name = (_name "_available"), \
-	.read = counter_signal_enum_available_read, \
-	.priv = (_e) \
-}
-
-enum counter_synapse_action {
-	COUNTER_SYNAPSE_ACTION_NONE = 0,
-	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
-	COUNTER_SYNAPSE_ACTION_BOTH_EDGES
 };
 
 /**
  * struct counter_synapse - Counter Synapse node
- * @action:		index of current action mode
  * @actions_list:	array of available action modes
  * @num_actions:	number of action modes specified in @actions_list
  * @signal:		pointer to associated signal
  */
 struct counter_synapse {
-	size_t action;
 	const enum counter_synapse_action *actions_list;
 	size_t num_actions;
 
 	struct counter_signal *signal;
 };
 
-struct counter_count;
-
-/**
- * struct counter_count_ext - Counter Count extension
- * @name:	attribute name
- * @read:	read callback for this attribute; may be NULL
- * @write:	write callback for this attribute; may be NULL
- * @priv:	data private to the driver
- */
-struct counter_count_ext {
-	const char *name;
-	ssize_t (*read)(struct counter_device *counter,
-			struct counter_count *count, void *priv, char *buf);
-	ssize_t (*write)(struct counter_device *counter,
-			 struct counter_count *count, void *priv,
-			 const char *buf, size_t len);
-	void *priv;
-};
-
-enum counter_function {
-	COUNTER_FUNCTION_INCREASE = 0,
-	COUNTER_FUNCTION_DECREASE,
-	COUNTER_FUNCTION_PULSE_DIRECTION,
-	COUNTER_FUNCTION_QUADRATURE_X1_A,
-	COUNTER_FUNCTION_QUADRATURE_X1_B,
-	COUNTER_FUNCTION_QUADRATURE_X2_A,
-	COUNTER_FUNCTION_QUADRATURE_X2_B,
-	COUNTER_FUNCTION_QUADRATURE_X4
-};
-
 /**
  * struct counter_count - Counter Count node
  * @id:			unique ID used to identify Count
  * @name:		device-specific Count name; ideally, this should match
  *			the name as it appears in the datasheet documentation
- * @function:		index of current function mode
  * @functions_list:	array available function modes
  * @num_functions:	number of function modes specified in @functions_list
  * @synapses:		array of synapses for initialization
  * @num_synapses:	number of synapses specified in @synapses
  * @ext:		optional array of Counter Count extensions
  * @num_ext:		number of Counter Count extensions specified in @ext
- * @priv:		optional private data supplied by driver
  */
 struct counter_count {
 	int id;
 	const char *name;
 
-	size_t function;
 	const enum counter_function *functions_list;
 	size_t num_functions;
 
 	struct counter_synapse *synapses;
 	size_t num_synapses;
 
-	const struct counter_count_ext *ext;
+	struct counter_comp *ext;
 	size_t num_ext;
-
-	void *priv;
-};
-
-/**
- * struct counter_count_enum_ext - Count enum extension attribute
- * @items:	Array of strings
- * @num_items:	Number of items specified in @items
- * @set:	Set callback function; may be NULL
- * @get:	Get callback function; may be NULL
- *
- * The counter_count_enum_ext structure can be used to implement enum style
- * Count extension attributes. Enum style attributes are those which have a set
- * of strings that map to unsigned integer values. The Generic Counter Count
- * enum extension helper code takes care of mapping between value and string, as
- * well as generating a "_available" file which contains a list of all available
- * items. The get callback is used to query the currently active item; the index
- * of the item within the respective items array is returned via the 'item'
- * parameter. The set callback is called when the attribute is updated; the
- * 'item' parameter contains the index of the newly activated item within the
- * respective items array.
- */
-struct counter_count_enum_ext {
-	const char * const *items;
-	size_t num_items;
-	int (*get)(struct counter_device *counter, struct counter_count *count,
-		   size_t *item);
-	int (*set)(struct counter_device *counter, struct counter_count *count,
-		   size_t item);
-};
-
-/**
- * COUNTER_COUNT_ENUM() - Initialize Count enum extension
- * @_name:	Attribute name
- * @_e:		Pointer to a counter_count_enum_ext structure
- *
- * This should usually be used together with COUNTER_COUNT_ENUM_AVAILABLE()
- */
-#define COUNTER_COUNT_ENUM(_name, _e) \
-{ \
-	.name = (_name), \
-	.read = counter_count_enum_read, \
-	.write = counter_count_enum_write, \
-	.priv = (_e) \
-}
-
-/**
- * COUNTER_COUNT_ENUM_AVAILABLE() - Initialize Count enum available extension
- * @_name:	Attribute name ("_available" will be appended to the name)
- * @_e:		Pointer to a counter_count_enum_ext structure
- *
- * Creates a read only attribute that lists all the available enum items in a
- * newline separated list. This should usually be used together with
- * COUNTER_COUNT_ENUM()
- */
-#define COUNTER_COUNT_ENUM_AVAILABLE(_name, _e) \
-{ \
-	.name = (_name "_available"), \
-	.read = counter_count_enum_available_read, \
-	.priv = (_e) \
-}
-
-/**
- * struct counter_device_attr_group - internal container for attribute group
- * @attr_group:	Counter sysfs attributes group
- * @attr_list:	list to keep track of created Counter sysfs attributes
- * @num_attr:	number of Counter sysfs attributes
- */
-struct counter_device_attr_group {
-	struct attribute_group attr_group;
-	struct list_head attr_list;
-	size_t num_attr;
-};
-
-/**
- * struct counter_device_state - internal state container for a Counter device
- * @id:			unique ID used to identify the Counter
- * @dev:		internal device structure
- * @groups_list:	attribute groups list (for Signals, Counts, and ext)
- * @num_groups:		number of attribute groups containers
- * @groups:		Counter sysfs attribute groups (to populate @dev.groups)
- */
-struct counter_device_state {
-	int id;
-	struct device dev;
-	struct counter_device_attr_group *groups_list;
-	size_t num_groups;
-	const struct attribute_group **groups;
-};
-
-enum counter_signal_level {
-	COUNTER_SIGNAL_LEVEL_LOW,
-	COUNTER_SIGNAL_LEVEL_HIGH,
 };
 
 /**
@@ -306,117 +252,47 @@ enum counter_signal_level {
  * @count_write:	optional write callback for Count attribute. The write
  *			value for the respective Count is passed in via the val
  *			parameter.
- * @function_get:	function to get the current count function mode. Returns
- *			0 on success and negative error code on error. The index
- *			of the respective Count's returned function mode should
- *			be passed back via the function parameter.
- * @function_set:	function to set the count function mode. function is the
- *			index of the requested function mode from the respective
- *			Count's functions_list array.
- * @action_get:		function to get the current action mode. Returns 0 on
- *			success and negative error code on error. The index of
- *			the respective Synapse's returned action mode should be
- *			passed back via the action parameter.
- * @action_set:		function to set the action mode. action is the index of
- *			the requested action mode from the respective Synapse's
- *			actions_list array.
+ * @function_read:	read callback the Count function modes. The read
+ *			function mode of the respective Count should be passed
+ *			back via the function parameter.
+ * @function_write:	write callback for Count function modes. The function
+ *			mode to write for the respective Count is passed in via
+ *			the function parameter.
+ * @action_read:	read callback the Synapse action modes. The read action
+ *			mode of the respective Synapse should be passed back via
+ *			the action parameter.
+ * @action_write:	write callback for Synapse action modes. The action mode
+ *			to write for the respective Synapse is passed in via the
+ *			action parameter.
  */
 struct counter_ops {
 	int (*signal_read)(struct counter_device *counter,
 			   struct counter_signal *signal,
 			   enum counter_signal_level *level);
 	int (*count_read)(struct counter_device *counter,
-			  struct counter_count *count, unsigned long *val);
+			  struct counter_count *count, u64 *value);
 	int (*count_write)(struct counter_device *counter,
-			   struct counter_count *count, unsigned long val);
-	int (*function_get)(struct counter_device *counter,
-			    struct counter_count *count, size_t *function);
-	int (*function_set)(struct counter_device *counter,
-			    struct counter_count *count, size_t function);
-	int (*action_get)(struct counter_device *counter,
-			  struct counter_count *count,
-			  struct counter_synapse *synapse, size_t *action);
-	int (*action_set)(struct counter_device *counter,
-			  struct counter_count *count,
-			  struct counter_synapse *synapse, size_t action);
-};
-
-/**
- * struct counter_device_ext - Counter device extension
- * @name:	attribute name
- * @read:	read callback for this attribute; may be NULL
- * @write:	write callback for this attribute; may be NULL
- * @priv:	data private to the driver
- */
-struct counter_device_ext {
-	const char *name;
-	ssize_t (*read)(struct counter_device *counter, void *priv, char *buf);
-	ssize_t (*write)(struct counter_device *counter, void *priv,
-			 const char *buf, size_t len);
-	void *priv;
+			   struct counter_count *count, u64 value);
+	int (*function_read)(struct counter_device *counter,
+			     struct counter_count *count,
+			     enum counter_function *function);
+	int (*function_write)(struct counter_device *counter,
+			      struct counter_count *count,
+			      enum counter_function function);
+	int (*action_read)(struct counter_device *counter,
+			   struct counter_count *count,
+			   struct counter_synapse *synapse,
+			   enum counter_synapse_action *action);
+	int (*action_write)(struct counter_device *counter,
+			    struct counter_count *count,
+			    struct counter_synapse *synapse,
+			    enum counter_synapse_action action);
 };
 
-/**
- * struct counter_device_enum_ext - Counter enum extension attribute
- * @items:	Array of strings
- * @num_items:	Number of items specified in @items
- * @set:	Set callback function; may be NULL
- * @get:	Get callback function; may be NULL
- *
- * The counter_device_enum_ext structure can be used to implement enum style
- * Counter extension attributes. Enum style attributes are those which have a
- * set of strings that map to unsigned integer values. The Generic Counter enum
- * extension helper code takes care of mapping between value and string, as well
- * as generating a "_available" file which contains a list of all available
- * items. The get callback is used to query the currently active item; the index
- * of the item within the respective items array is returned via the 'item'
- * parameter. The set callback is called when the attribute is updated; the
- * 'item' parameter contains the index of the newly activated item within the
- * respective items array.
- */
-struct counter_device_enum_ext {
-	const char * const *items;
-	size_t num_items;
-	int (*get)(struct counter_device *counter, size_t *item);
-	int (*set)(struct counter_device *counter, size_t item);
-};
-
-/**
- * COUNTER_DEVICE_ENUM() - Initialize Counter enum extension
- * @_name:	Attribute name
- * @_e:		Pointer to a counter_device_enum_ext structure
- *
- * This should usually be used together with COUNTER_DEVICE_ENUM_AVAILABLE()
- */
-#define COUNTER_DEVICE_ENUM(_name, _e) \
-{ \
-	.name = (_name), \
-	.read = counter_device_enum_read, \
-	.write = counter_device_enum_write, \
-	.priv = (_e) \
-}
-
-/**
- * COUNTER_DEVICE_ENUM_AVAILABLE() - Initialize Counter enum available extension
- * @_name:	Attribute name ("_available" will be appended to the name)
- * @_e:		Pointer to a counter_device_enum_ext structure
- *
- * Creates a read only attribute that lists all the available enum items in a
- * newline separated list. This should usually be used together with
- * COUNTER_DEVICE_ENUM()
- */
-#define COUNTER_DEVICE_ENUM_AVAILABLE(_name, _e) \
-{ \
-	.name = (_name "_available"), \
-	.read = counter_device_enum_available_read, \
-	.priv = (_e) \
-}
-
 /**
  * struct counter_device - Counter data structure
  * @name:		name of the device as it appears in the datasheet
  * @parent:		optional parent device providing the counters
- * @device_state:	internal device state container
  * @ops:		callbacks from driver
  * @signals:		array of Signals
  * @num_signals:	number of Signals specified in @signals
@@ -425,11 +301,11 @@ struct counter_device_enum_ext {
  * @ext:		optional array of Counter device extensions
  * @num_ext:		number of Counter device extensions specified in @ext
  * @priv:		optional private data supplied by driver
+ * @dev:		internal device structure
  */
 struct counter_device {
 	const char *name;
 	struct device *parent;
-	struct counter_device_state *device_state;
 
 	const struct counter_ops *ops;
 
@@ -438,17 +314,159 @@ struct counter_device {
 	struct counter_count *counts;
 	size_t num_counts;
 
-	const struct counter_device_ext *ext;
+	struct counter_comp *ext;
 	size_t num_ext;
 
 	void *priv;
+
+	struct device dev;
 };
 
 int counter_register(struct counter_device *const counter);
 void counter_unregister(struct counter_device *const counter);
 int devm_counter_register(struct device *dev,
 			  struct counter_device *const counter);
-void devm_counter_unregister(struct device *dev,
-			     struct counter_device *const counter);
+
+#define COUNTER_COMP_DEVICE_U8(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U8, \
+	.name = (_name), \
+	.device_u8_read = (_read), \
+	.device_u8_write = (_write), \
+}
+#define COUNTER_COMP_COUNT_U8(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U8, \
+	.name = (_name), \
+	.count_u8_read = (_read), \
+	.count_u8_write = (_write), \
+}
+#define COUNTER_COMP_SIGNAL_U8(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U8, \
+	.name = (_name), \
+	.signal_u8_read = (_read), \
+	.signal_u8_write = (_write), \
+}
+
+#define COUNTER_COMP_DEVICE_U64(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U64, \
+	.name = (_name), \
+	.device_u64_read = (_read), \
+	.device_u64_write = (_write), \
+}
+#define COUNTER_COMP_COUNT_U64(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U64, \
+	.name = (_name), \
+	.count_u64_read = (_read), \
+	.count_u64_write = (_write), \
+}
+#define COUNTER_COMP_SIGNAL_U64(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_U64, \
+	.name = (_name), \
+	.signal_u64_read = (_read), \
+	.signal_u64_write = (_write), \
+}
+
+#define COUNTER_COMP_DEVICE_BOOL(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_BOOL, \
+	.name = (_name), \
+	.device_u8_read = (_read), \
+	.device_u8_write = (_write), \
+}
+#define COUNTER_COMP_COUNT_BOOL(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_BOOL, \
+	.name = (_name), \
+	.count_u8_read = (_read), \
+	.count_u8_write = (_write), \
+}
+#define COUNTER_COMP_SIGNAL_BOOL(_name, _read, _write) \
+{ \
+	.type = COUNTER_COMP_BOOL, \
+	.name = (_name), \
+	.signal_u8_read = (_read), \
+	.signal_u8_write = (_write), \
+}
+
+struct counter_available {
+	union {
+		const u32 *enums;
+		const char *const *strs;
+	};
+	size_t num_items;
+};
+
+#define DEFINE_COUNTER_AVAILABLE(_name, _enums) \
+	struct counter_available _name = { \
+		.enums = (_enums), \
+		.num_items = ARRAY_SIZE(_enums), \
+	}
+
+#define DEFINE_COUNTER_ENUM(_name, _strs) \
+	struct counter_available _name = { \
+		.strs = (_strs), \
+		.num_items = ARRAY_SIZE(_strs), \
+	}
+
+#define COUNTER_COMP_DEVICE_ENUM(_name, _get, _set, _available) \
+{ \
+	.type = COUNTER_COMP_ENUM, \
+	.name = (_name), \
+	.device_u32_read = (_get), \
+	.device_u32_write = (_set), \
+	.priv = &(_available), \
+}
+#define COUNTER_COMP_COUNT_ENUM(_name, _get, _set, _available) \
+{ \
+	.type = COUNTER_COMP_ENUM, \
+	.name = (_name), \
+	.count_u32_read = (_get), \
+	.count_u32_write = (_set), \
+	.priv = &(_available), \
+}
+#define COUNTER_COMP_SIGNAL_ENUM(_name, _get, _set, _available) \
+{ \
+	.type = COUNTER_COMP_ENUM, \
+	.name = (_name), \
+	.signal_u32_read = (_get), \
+	.signal_u32_write = (_set), \
+	.priv = &(_available), \
+}
+
+#define COUNTER_COMP_CEILING(_read, _write) \
+	COUNTER_COMP_COUNT_U64("ceiling", _read, _write)
+
+#define COUNTER_COMP_COUNT_MODE(_read, _write, _available) \
+{ \
+	.type = COUNTER_COMP_COUNT_MODE, \
+	.name = "count_mode", \
+	.count_u32_read = (_read), \
+	.count_u32_write = (_write), \
+	.priv = &(_available), \
+}
+
+#define COUNTER_COMP_DIRECTION(_read) \
+{ \
+	.type = COUNTER_COMP_COUNT_DIRECTION, \
+	.name = "direction", \
+	.count_u32_read = (_read), \
+}
+
+#define COUNTER_COMP_ENABLE(_read, _write) \
+	COUNTER_COMP_COUNT_BOOL("enable", _read, _write)
+
+#define COUNTER_COMP_FLOOR(_read, _write) \
+	COUNTER_COMP_COUNT_U64("floor", _read, _write)
+
+#define COUNTER_COMP_PRESET(_read, _write) \
+	COUNTER_COMP_COUNT_U64("preset", _read, _write)
+
+#define COUNTER_COMP_PRESET_ENABLE(_read, _write) \
+	COUNTER_COMP_COUNT_BOOL("preset_enable", _read, _write)
 
 #endif /* _COUNTER_H_ */
diff --git a/include/linux/counter_enum.h b/include/linux/counter_enum.h
deleted file mode 100644
index 9f917298a88f..000000000000
--- a/include/linux/counter_enum.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Counter interface enum functions
- * Copyright (C) 2018 William Breathitt Gray
- */
-#ifndef _COUNTER_ENUM_H_
-#define _COUNTER_ENUM_H_
-
-#include <linux/types.h>
-
-struct counter_device;
-struct counter_signal;
-struct counter_count;
-
-ssize_t counter_signal_enum_read(struct counter_device *counter,
-				 struct counter_signal *signal, void *priv,
-				 char *buf);
-ssize_t counter_signal_enum_write(struct counter_device *counter,
-				  struct counter_signal *signal, void *priv,
-				  const char *buf, size_t len);
-
-ssize_t counter_signal_enum_available_read(struct counter_device *counter,
-					   struct counter_signal *signal,
-					   void *priv, char *buf);
-
-ssize_t counter_count_enum_read(struct counter_device *counter,
-				struct counter_count *count, void *priv,
-				char *buf);
-ssize_t counter_count_enum_write(struct counter_device *counter,
-				 struct counter_count *count, void *priv,
-				 const char *buf, size_t len);
-
-ssize_t counter_count_enum_available_read(struct counter_device *counter,
-					  struct counter_count *count,
-					  void *priv, char *buf);
-
-ssize_t counter_device_enum_read(struct counter_device *counter, void *priv,
-				 char *buf);
-ssize_t counter_device_enum_write(struct counter_device *counter, void *priv,
-				  const char *buf, size_t len);
-
-ssize_t counter_device_enum_available_read(struct counter_device *counter,
-					   void *priv, char *buf);
-
-#endif /* _COUNTER_ENUM_H_ */
-- 
cgit v1.2.3


From 712392f558ef4280b67659fb2d52854dcfc7c8ba Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Fri, 27 Aug 2021 12:47:48 +0900
Subject: counter: Update counter.h comments to reflect sysfs internalization

The Counter subsystem architecture and driver implementations have
changed in order to handle Counter sysfs interactions in a more
consistent way. This patch updates the Generic Counter interface
header file comments to reflect the changes.

Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/19da8ae0c05381b0967c8a334b67f86b814eb880.1630031207.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/counter/counter-core.c |  3 ++
 include/linux/counter.h        | 62 ++++++++++++++++++++----------------------
 2 files changed, 33 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index c533a6ff12cf..3cda2c47bacb 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -38,6 +38,9 @@ static struct bus_type counter_bus_type = {
  * This function registers a Counter to the system. A sysfs "counter" directory
  * will be created and populated with sysfs attributes correlating with the
  * Counter Signals, Synapses, and Counts respectively.
+ *
+ * RETURNS:
+ * 0 on success, negative error number on failure.
  */
 int counter_register(struct counter_device *const counter)
 {
diff --git a/include/linux/counter.h b/include/linux/counter.h
index b69277f5c4c5..445f22d8bfe2 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -188,11 +188,10 @@ struct counter_comp {
 
 /**
  * struct counter_signal - Counter Signal node
- * @id:		unique ID used to identify signal
- * @name:	device-specific Signal name; ideally, this should match the name
- *		as it appears in the datasheet documentation
- * @ext:	optional array of Counter Signal extensions
- * @num_ext:	number of Counter Signal extensions specified in @ext
+ * @id:		unique ID used to identify the Signal
+ * @name:	device-specific Signal name
+ * @ext:	optional array of Signal extensions
+ * @num_ext:	number of Signal extensions specified in @ext
  */
 struct counter_signal {
 	int id;
@@ -206,7 +205,7 @@ struct counter_signal {
  * struct counter_synapse - Counter Synapse node
  * @actions_list:	array of available action modes
  * @num_actions:	number of action modes specified in @actions_list
- * @signal:		pointer to associated signal
+ * @signal:		pointer to the associated Signal
  */
 struct counter_synapse {
 	const enum counter_synapse_action *actions_list;
@@ -217,15 +216,14 @@ struct counter_synapse {
 
 /**
  * struct counter_count - Counter Count node
- * @id:			unique ID used to identify Count
- * @name:		device-specific Count name; ideally, this should match
- *			the name as it appears in the datasheet documentation
- * @functions_list:	array available function modes
+ * @id:			unique ID used to identify the Count
+ * @name:		device-specific Count name
+ * @functions_list:	array of available function modes
  * @num_functions:	number of function modes specified in @functions_list
- * @synapses:		array of synapses for initialization
- * @num_synapses:	number of synapses specified in @synapses
- * @ext:		optional array of Counter Count extensions
- * @num_ext:		number of Counter Count extensions specified in @ext
+ * @synapses:		array of Synapses for initialization
+ * @num_synapses:	number of Synapses specified in @synapses
+ * @ext:		optional array of Count extensions
+ * @num_ext:		number of Count extensions specified in @ext
  */
 struct counter_count {
 	int id;
@@ -243,27 +241,27 @@ struct counter_count {
 
 /**
  * struct counter_ops - Callbacks from driver
- * @signal_read:	optional read callback for Signal attribute. The read
- *			level of the respective Signal should be passed back via
- *			the level parameter.
- * @count_read:		optional read callback for Count attribute. The read
- *			value of the respective Count should be passed back via
- *			the val parameter.
- * @count_write:	optional write callback for Count attribute. The write
- *			value for the respective Count is passed in via the val
+ * @signal_read:	optional read callback for Signals. The read level of
+ *			the respective Signal should be passed back via the
+ *			level parameter.
+ * @count_read:		read callback for Counts. The read value of the
+ *			respective Count should be passed back via the value
+ *			parameter.
+ * @count_write:	optional write callback for Counts. The write value for
+ *			the respective Count is passed in via the value
  *			parameter.
  * @function_read:	read callback the Count function modes. The read
  *			function mode of the respective Count should be passed
  *			back via the function parameter.
- * @function_write:	write callback for Count function modes. The function
- *			mode to write for the respective Count is passed in via
- *			the function parameter.
- * @action_read:	read callback the Synapse action modes. The read action
- *			mode of the respective Synapse should be passed back via
- *			the action parameter.
- * @action_write:	write callback for Synapse action modes. The action mode
- *			to write for the respective Synapse is passed in via the
- *			action parameter.
+ * @function_write:	optional write callback for Count function modes. The
+ *			function mode to write for the respective Count is
+ *			passed in via the function parameter.
+ * @action_read:	optional read callback the Synapse action modes. The
+ *			read action mode of the respective Synapse should be
+ *			passed back via the action parameter.
+ * @action_write:	optional write callback for Synapse action modes. The
+ *			action mode to write for the respective Synapse is
+ *			passed in via the action parameter.
  */
 struct counter_ops {
 	int (*signal_read)(struct counter_device *counter,
@@ -291,7 +289,7 @@ struct counter_ops {
 
 /**
  * struct counter_device - Counter data structure
- * @name:		name of the device as it appears in the datasheet
+ * @name:		name of the device
  * @parent:		optional parent device providing the counters
  * @ops:		callbacks from driver
  * @signals:		array of Signals
-- 
cgit v1.2.3


From e65c26f413718ed2e6d788491adcd8cebc0f44b6 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 29 Sep 2021 12:15:58 +0900
Subject: counter: Move counter enums to uapi header

This is in preparation for a subsequent patch implementing a character
device interface for the Counter subsystem.

Reviewed-by: David Lechner <david@lechnology.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/962a5f2027fafcf4f77c10e1baf520463960d1ee.1632884256.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 MAINTAINERS                  |  1 +
 include/linux/counter.h      | 42 +--------------------------------
 include/uapi/linux/counter.h | 56 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 58 insertions(+), 41 deletions(-)
 create mode 100644 include/uapi/linux/counter.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index b98a34fcec4b..8310a42abdab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4804,6 +4804,7 @@ F:	Documentation/ABI/testing/sysfs-bus-counter
 F:	Documentation/driver-api/generic-counter.rst
 F:	drivers/counter/
 F:	include/linux/counter.h
+F:	include/uapi/linux/counter.h
 
 CP2615 I2C DRIVER
 M:	Bence Csókás <bence98@sch.bme.hu>
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 445f22d8bfe2..7c9f7e23953a 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -9,6 +9,7 @@
 #include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
+#include <uapi/linux/counter.h>
 
 struct counter_device;
 struct counter_count;
@@ -27,47 +28,6 @@ enum counter_comp_type {
 	COUNTER_COMP_COUNT_MODE,
 };
 
-enum counter_scope {
-	COUNTER_SCOPE_DEVICE,
-	COUNTER_SCOPE_SIGNAL,
-	COUNTER_SCOPE_COUNT,
-};
-
-enum counter_count_direction {
-	COUNTER_COUNT_DIRECTION_FORWARD,
-	COUNTER_COUNT_DIRECTION_BACKWARD,
-};
-
-enum counter_count_mode {
-	COUNTER_COUNT_MODE_NORMAL,
-	COUNTER_COUNT_MODE_RANGE_LIMIT,
-	COUNTER_COUNT_MODE_NON_RECYCLE,
-	COUNTER_COUNT_MODE_MODULO_N,
-};
-
-enum counter_function {
-	COUNTER_FUNCTION_INCREASE,
-	COUNTER_FUNCTION_DECREASE,
-	COUNTER_FUNCTION_PULSE_DIRECTION,
-	COUNTER_FUNCTION_QUADRATURE_X1_A,
-	COUNTER_FUNCTION_QUADRATURE_X1_B,
-	COUNTER_FUNCTION_QUADRATURE_X2_A,
-	COUNTER_FUNCTION_QUADRATURE_X2_B,
-	COUNTER_FUNCTION_QUADRATURE_X4,
-};
-
-enum counter_signal_level {
-	COUNTER_SIGNAL_LEVEL_LOW,
-	COUNTER_SIGNAL_LEVEL_HIGH,
-};
-
-enum counter_synapse_action {
-	COUNTER_SYNAPSE_ACTION_NONE,
-	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
-	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
-	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
-};
-
 /**
  * struct counter_comp - Counter component node
  * @type:		Counter component data type
diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
new file mode 100644
index 000000000000..6113938a6044
--- /dev/null
+++ b/include/uapi/linux/counter.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace ABI for Counter character devices
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#ifndef _UAPI_COUNTER_H_
+#define _UAPI_COUNTER_H_
+
+/* Component scope definitions */
+enum counter_scope {
+	COUNTER_SCOPE_DEVICE,
+	COUNTER_SCOPE_SIGNAL,
+	COUNTER_SCOPE_COUNT,
+};
+
+/* Count direction values */
+enum counter_count_direction {
+	COUNTER_COUNT_DIRECTION_FORWARD,
+	COUNTER_COUNT_DIRECTION_BACKWARD,
+};
+
+/* Count mode values */
+enum counter_count_mode {
+	COUNTER_COUNT_MODE_NORMAL,
+	COUNTER_COUNT_MODE_RANGE_LIMIT,
+	COUNTER_COUNT_MODE_NON_RECYCLE,
+	COUNTER_COUNT_MODE_MODULO_N,
+};
+
+/* Count function values */
+enum counter_function {
+	COUNTER_FUNCTION_INCREASE,
+	COUNTER_FUNCTION_DECREASE,
+	COUNTER_FUNCTION_PULSE_DIRECTION,
+	COUNTER_FUNCTION_QUADRATURE_X1_A,
+	COUNTER_FUNCTION_QUADRATURE_X1_B,
+	COUNTER_FUNCTION_QUADRATURE_X2_A,
+	COUNTER_FUNCTION_QUADRATURE_X2_B,
+	COUNTER_FUNCTION_QUADRATURE_X4,
+};
+
+/* Signal values */
+enum counter_signal_level {
+	COUNTER_SIGNAL_LEVEL_LOW,
+	COUNTER_SIGNAL_LEVEL_HIGH,
+};
+
+/* Action mode values */
+enum counter_synapse_action {
+	COUNTER_SYNAPSE_ACTION_NONE,
+	COUNTER_SYNAPSE_ACTION_RISING_EDGE,
+	COUNTER_SYNAPSE_ACTION_FALLING_EDGE,
+	COUNTER_SYNAPSE_ACTION_BOTH_EDGES,
+};
+
+#endif /* _UAPI_COUNTER_H_ */
-- 
cgit v1.2.3


From b6c50affda5957a3629b149a91c7f6688ffce7f7 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Wed, 29 Sep 2021 12:15:59 +0900
Subject: counter: Add character device interface

This patch introduces a character device interface for the Counter
subsystem. Device data is exposed through standard character device read
operations. Device data is gathered when a Counter event is pushed by
the respective Counter device driver. Configuration is handled via ioctl
operations on the respective Counter character device node.

Cc: David Lechner <david@lechnology.com>
Cc: Gwendal Grignou <gwendal@chromium.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/b8b8c64b4065aedff43699ad1f0e2f8d1419c15b.1632884256.git.vilhelm.gray@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/counter/Makefile         |   2 +-
 drivers/counter/counter-chrdev.c | 578 +++++++++++++++++++++++++++++++++++++++
 drivers/counter/counter-chrdev.h |  14 +
 drivers/counter/counter-core.c   |  56 +++-
 include/linux/counter.h          |  56 ++++
 include/uapi/linux/counter.h     |  98 +++++++
 6 files changed, 798 insertions(+), 6 deletions(-)
 create mode 100644 drivers/counter/counter-chrdev.c
 create mode 100644 drivers/counter/counter-chrdev.h

(limited to 'include/linux')

diff --git a/drivers/counter/Makefile b/drivers/counter/Makefile
index 1ab7e087fdc2..8fde6c100ebc 100644
--- a/drivers/counter/Makefile
+++ b/drivers/counter/Makefile
@@ -4,7 +4,7 @@
 #
 
 obj-$(CONFIG_COUNTER) += counter.o
-counter-y := counter-core.o counter-sysfs.o
+counter-y := counter-core.o counter-sysfs.o counter-chrdev.o
 
 obj-$(CONFIG_104_QUAD_8)	+= 104-quad-8.o
 obj-$(CONFIG_INTERRUPT_CNT)		+= interrupt-cnt.o
diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c
new file mode 100644
index 000000000000..967c94ae95bb
--- /dev/null
+++ b/drivers/counter/counter-chrdev.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic Counter character device interface
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#include <linux/atomic.h>
+#include <linux/cdev.h>
+#include <linux/counter.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/kfifo.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/nospec.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/timekeeping.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+
+#include "counter-chrdev.h"
+
+struct counter_comp_node {
+	struct list_head l;
+	struct counter_component component;
+	struct counter_comp comp;
+	void *parent;
+};
+
+#define counter_comp_read_is_equal(a, b) \
+	(a.action_read == b.action_read || \
+	a.device_u8_read == b.device_u8_read || \
+	a.count_u8_read == b.count_u8_read || \
+	a.signal_u8_read == b.signal_u8_read || \
+	a.device_u32_read == b.device_u32_read || \
+	a.count_u32_read == b.count_u32_read || \
+	a.signal_u32_read == b.signal_u32_read || \
+	a.device_u64_read == b.device_u64_read || \
+	a.count_u64_read == b.count_u64_read || \
+	a.signal_u64_read == b.signal_u64_read)
+
+#define counter_comp_read_is_set(comp) \
+	(comp.action_read || \
+	comp.device_u8_read || \
+	comp.count_u8_read || \
+	comp.signal_u8_read || \
+	comp.device_u32_read || \
+	comp.count_u32_read || \
+	comp.signal_u32_read || \
+	comp.device_u64_read || \
+	comp.count_u64_read || \
+	comp.signal_u64_read)
+
+static ssize_t counter_chrdev_read(struct file *filp, char __user *buf,
+				   size_t len, loff_t *f_ps)
+{
+	struct counter_device *const counter = filp->private_data;
+	int err;
+	unsigned int copied;
+
+	if (!counter->ops)
+		return -ENODEV;
+
+	if (len < sizeof(struct counter_event))
+		return -EINVAL;
+
+	do {
+		if (kfifo_is_empty(&counter->events)) {
+			if (filp->f_flags & O_NONBLOCK)
+				return -EAGAIN;
+
+			err = wait_event_interruptible(counter->events_wait,
+					!kfifo_is_empty(&counter->events) ||
+					!counter->ops);
+			if (err < 0)
+				return err;
+			if (!counter->ops)
+				return -ENODEV;
+		}
+
+		if (mutex_lock_interruptible(&counter->events_lock))
+			return -ERESTARTSYS;
+		err = kfifo_to_user(&counter->events, buf, len, &copied);
+		mutex_unlock(&counter->events_lock);
+		if (err < 0)
+			return err;
+	} while (!copied);
+
+	return copied;
+}
+
+static __poll_t counter_chrdev_poll(struct file *filp,
+				    struct poll_table_struct *pollt)
+{
+	struct counter_device *const counter = filp->private_data;
+	__poll_t events = 0;
+
+	if (!counter->ops)
+		return events;
+
+	poll_wait(filp, &counter->events_wait, pollt);
+
+	if (!kfifo_is_empty(&counter->events))
+		events = EPOLLIN | EPOLLRDNORM;
+
+	return events;
+}
+
+static void counter_events_list_free(struct list_head *const events_list)
+{
+	struct counter_event_node *p, *n;
+	struct counter_comp_node *q, *o;
+
+	list_for_each_entry_safe(p, n, events_list, l) {
+		/* Free associated component nodes */
+		list_for_each_entry_safe(q, o, &p->comp_list, l) {
+			list_del(&q->l);
+			kfree(q);
+		}
+
+		/* Free event node */
+		list_del(&p->l);
+		kfree(p);
+	}
+}
+
+static int counter_set_event_node(struct counter_device *const counter,
+				  struct counter_watch *const watch,
+				  const struct counter_comp_node *const cfg)
+{
+	struct counter_event_node *event_node;
+	int err = 0;
+	struct counter_comp_node *comp_node;
+
+	/* Search for event in the list */
+	list_for_each_entry(event_node, &counter->next_events_list, l)
+		if (event_node->event == watch->event &&
+		    event_node->channel == watch->channel)
+			break;
+
+	/* If event is not already in the list */
+	if (&event_node->l == &counter->next_events_list) {
+		/* Allocate new event node */
+		event_node = kmalloc(sizeof(*event_node), GFP_KERNEL);
+		if (!event_node)
+			return -ENOMEM;
+
+		/* Configure event node and add to the list */
+		event_node->event = watch->event;
+		event_node->channel = watch->channel;
+		INIT_LIST_HEAD(&event_node->comp_list);
+		list_add(&event_node->l, &counter->next_events_list);
+	}
+
+	/* Check if component watch has already been set before */
+	list_for_each_entry(comp_node, &event_node->comp_list, l)
+		if (comp_node->parent == cfg->parent &&
+		    counter_comp_read_is_equal(comp_node->comp, cfg->comp)) {
+			err = -EINVAL;
+			goto exit_free_event_node;
+		}
+
+	/* Allocate component node */
+	comp_node = kmalloc(sizeof(*comp_node), GFP_KERNEL);
+	if (!comp_node) {
+		err = -ENOMEM;
+		goto exit_free_event_node;
+	}
+	*comp_node = *cfg;
+
+	/* Add component node to event node */
+	list_add_tail(&comp_node->l, &event_node->comp_list);
+
+exit_free_event_node:
+	/* Free event node if no one else is watching */
+	if (list_empty(&event_node->comp_list)) {
+		list_del(&event_node->l);
+		kfree(event_node);
+	}
+
+	return err;
+}
+
+static int counter_enable_events(struct counter_device *const counter)
+{
+	unsigned long flags;
+	int err = 0;
+
+	mutex_lock(&counter->n_events_list_lock);
+	spin_lock_irqsave(&counter->events_list_lock, flags);
+
+	counter_events_list_free(&counter->events_list);
+	list_replace_init(&counter->next_events_list,
+			  &counter->events_list);
+
+	if (counter->ops->events_configure)
+		err = counter->ops->events_configure(counter);
+
+	spin_unlock_irqrestore(&counter->events_list_lock, flags);
+	mutex_unlock(&counter->n_events_list_lock);
+
+	return err;
+}
+
+static int counter_disable_events(struct counter_device *const counter)
+{
+	unsigned long flags;
+	int err = 0;
+
+	spin_lock_irqsave(&counter->events_list_lock, flags);
+
+	counter_events_list_free(&counter->events_list);
+
+	if (counter->ops->events_configure)
+		err = counter->ops->events_configure(counter);
+
+	spin_unlock_irqrestore(&counter->events_list_lock, flags);
+
+	mutex_lock(&counter->n_events_list_lock);
+
+	counter_events_list_free(&counter->next_events_list);
+
+	mutex_unlock(&counter->n_events_list_lock);
+
+	return err;
+}
+
+static int counter_add_watch(struct counter_device *const counter,
+			     const unsigned long arg)
+{
+	void __user *const uwatch = (void __user *)arg;
+	struct counter_watch watch;
+	struct counter_comp_node comp_node = {};
+	size_t parent, id;
+	struct counter_comp *ext;
+	size_t num_ext;
+	int err = 0;
+
+	if (copy_from_user(&watch, uwatch, sizeof(watch)))
+		return -EFAULT;
+
+	if (watch.component.type == COUNTER_COMPONENT_NONE)
+		goto no_component;
+
+	parent = watch.component.parent;
+
+	/* Configure parent component info for comp node */
+	switch (watch.component.scope) {
+	case COUNTER_SCOPE_DEVICE:
+		ext = counter->ext;
+		num_ext = counter->num_ext;
+		break;
+	case COUNTER_SCOPE_SIGNAL:
+		if (parent >= counter->num_signals)
+			return -EINVAL;
+		parent = array_index_nospec(parent, counter->num_signals);
+
+		comp_node.parent = counter->signals + parent;
+
+		ext = counter->signals[parent].ext;
+		num_ext = counter->signals[parent].num_ext;
+		break;
+	case COUNTER_SCOPE_COUNT:
+		if (parent >= counter->num_counts)
+			return -EINVAL;
+		parent = array_index_nospec(parent, counter->num_counts);
+
+		comp_node.parent = counter->counts + parent;
+
+		ext = counter->counts[parent].ext;
+		num_ext = counter->counts[parent].num_ext;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	id = watch.component.id;
+
+	/* Configure component info for comp node */
+	switch (watch.component.type) {
+	case COUNTER_COMPONENT_SIGNAL:
+		if (watch.component.scope != COUNTER_SCOPE_SIGNAL)
+			return -EINVAL;
+
+		comp_node.comp.type = COUNTER_COMP_SIGNAL_LEVEL;
+		comp_node.comp.signal_u32_read = counter->ops->signal_read;
+		break;
+	case COUNTER_COMPONENT_COUNT:
+		if (watch.component.scope != COUNTER_SCOPE_COUNT)
+			return -EINVAL;
+
+		comp_node.comp.type = COUNTER_COMP_U64;
+		comp_node.comp.count_u64_read = counter->ops->count_read;
+		break;
+	case COUNTER_COMPONENT_FUNCTION:
+		if (watch.component.scope != COUNTER_SCOPE_COUNT)
+			return -EINVAL;
+
+		comp_node.comp.type = COUNTER_COMP_FUNCTION;
+		comp_node.comp.count_u32_read = counter->ops->function_read;
+		break;
+	case COUNTER_COMPONENT_SYNAPSE_ACTION:
+		if (watch.component.scope != COUNTER_SCOPE_COUNT)
+			return -EINVAL;
+		if (id >= counter->counts[parent].num_synapses)
+			return -EINVAL;
+		id = array_index_nospec(id, counter->counts[parent].num_synapses);
+
+		comp_node.comp.type = COUNTER_COMP_SYNAPSE_ACTION;
+		comp_node.comp.action_read = counter->ops->action_read;
+		comp_node.comp.priv = counter->counts[parent].synapses + id;
+		break;
+	case COUNTER_COMPONENT_EXTENSION:
+		if (id >= num_ext)
+			return -EINVAL;
+		id = array_index_nospec(id, num_ext);
+
+		comp_node.comp = ext[id];
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (!counter_comp_read_is_set(comp_node.comp))
+		return -EOPNOTSUPP;
+
+no_component:
+	mutex_lock(&counter->n_events_list_lock);
+
+	if (counter->ops->watch_validate) {
+		err = counter->ops->watch_validate(counter, &watch);
+		if (err < 0)
+			goto err_exit;
+	}
+
+	comp_node.component = watch.component;
+
+	err = counter_set_event_node(counter, &watch, &comp_node);
+
+err_exit:
+	mutex_unlock(&counter->n_events_list_lock);
+
+	return err;
+}
+
+static long counter_chrdev_ioctl(struct file *filp, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct counter_device *const counter = filp->private_data;
+	int ret = -ENODEV;
+
+	mutex_lock(&counter->ops_exist_lock);
+
+	if (!counter->ops)
+		goto out_unlock;
+
+	switch (cmd) {
+	case COUNTER_ADD_WATCH_IOCTL:
+		ret = counter_add_watch(counter, arg);
+		break;
+	case COUNTER_ENABLE_EVENTS_IOCTL:
+		ret = counter_enable_events(counter);
+		break;
+	case COUNTER_DISABLE_EVENTS_IOCTL:
+		ret = counter_disable_events(counter);
+		break;
+	default:
+		ret = -ENOIOCTLCMD;
+		break;
+	}
+
+out_unlock:
+	mutex_unlock(&counter->ops_exist_lock);
+
+	return ret;
+}
+
+static int counter_chrdev_open(struct inode *inode, struct file *filp)
+{
+	struct counter_device *const counter = container_of(inode->i_cdev,
+							    typeof(*counter),
+							    chrdev);
+
+	/* Ensure chrdev is not opened more than 1 at a time */
+	if (!atomic_add_unless(&counter->chrdev_lock, 1, 1))
+		return -EBUSY;
+
+	get_device(&counter->dev);
+	filp->private_data = counter;
+
+	return nonseekable_open(inode, filp);
+}
+
+static int counter_chrdev_release(struct inode *inode, struct file *filp)
+{
+	struct counter_device *const counter = filp->private_data;
+	int ret = 0;
+
+	mutex_lock(&counter->ops_exist_lock);
+
+	if (!counter->ops) {
+		/* Free any lingering held memory */
+		counter_events_list_free(&counter->events_list);
+		counter_events_list_free(&counter->next_events_list);
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	ret = counter_disable_events(counter);
+	if (ret < 0) {
+		mutex_unlock(&counter->ops_exist_lock);
+		return ret;
+	}
+
+out_unlock:
+	mutex_unlock(&counter->ops_exist_lock);
+
+	put_device(&counter->dev);
+	atomic_dec(&counter->chrdev_lock);
+
+	return ret;
+}
+
+static const struct file_operations counter_fops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.read = counter_chrdev_read,
+	.poll = counter_chrdev_poll,
+	.unlocked_ioctl = counter_chrdev_ioctl,
+	.open = counter_chrdev_open,
+	.release = counter_chrdev_release,
+};
+
+int counter_chrdev_add(struct counter_device *const counter)
+{
+	/* Initialize Counter events lists */
+	INIT_LIST_HEAD(&counter->events_list);
+	INIT_LIST_HEAD(&counter->next_events_list);
+	spin_lock_init(&counter->events_list_lock);
+	mutex_init(&counter->n_events_list_lock);
+	init_waitqueue_head(&counter->events_wait);
+	mutex_init(&counter->events_lock);
+
+	/* Initialize character device */
+	atomic_set(&counter->chrdev_lock, 0);
+	cdev_init(&counter->chrdev, &counter_fops);
+
+	/* Allocate Counter events queue */
+	return kfifo_alloc(&counter->events, 64, GFP_KERNEL);
+}
+
+void counter_chrdev_remove(struct counter_device *const counter)
+{
+	kfifo_free(&counter->events);
+}
+
+static int counter_get_data(struct counter_device *const counter,
+			    const struct counter_comp_node *const comp_node,
+			    u64 *const value)
+{
+	const struct counter_comp *const comp = &comp_node->comp;
+	void *const parent = comp_node->parent;
+	u8 value_u8 = 0;
+	u32 value_u32 = 0;
+	int ret;
+
+	if (comp_node->component.type == COUNTER_COMPONENT_NONE)
+		return 0;
+
+	switch (comp->type) {
+	case COUNTER_COMP_U8:
+	case COUNTER_COMP_BOOL:
+		switch (comp_node->component.scope) {
+		case COUNTER_SCOPE_DEVICE:
+			ret = comp->device_u8_read(counter, &value_u8);
+			break;
+		case COUNTER_SCOPE_SIGNAL:
+			ret = comp->signal_u8_read(counter, parent, &value_u8);
+			break;
+		case COUNTER_SCOPE_COUNT:
+			ret = comp->count_u8_read(counter, parent, &value_u8);
+			break;
+		}
+		*value = value_u8;
+		return ret;
+	case COUNTER_COMP_SIGNAL_LEVEL:
+	case COUNTER_COMP_FUNCTION:
+	case COUNTER_COMP_ENUM:
+	case COUNTER_COMP_COUNT_DIRECTION:
+	case COUNTER_COMP_COUNT_MODE:
+		switch (comp_node->component.scope) {
+		case COUNTER_SCOPE_DEVICE:
+			ret = comp->device_u32_read(counter, &value_u32);
+			break;
+		case COUNTER_SCOPE_SIGNAL:
+			ret = comp->signal_u32_read(counter, parent,
+						    &value_u32);
+			break;
+		case COUNTER_SCOPE_COUNT:
+			ret = comp->count_u32_read(counter, parent, &value_u32);
+			break;
+		}
+		*value = value_u32;
+		return ret;
+	case COUNTER_COMP_U64:
+		switch (comp_node->component.scope) {
+		case COUNTER_SCOPE_DEVICE:
+			return comp->device_u64_read(counter, value);
+		case COUNTER_SCOPE_SIGNAL:
+			return comp->signal_u64_read(counter, parent, value);
+		case COUNTER_SCOPE_COUNT:
+			return comp->count_u64_read(counter, parent, value);
+		default:
+			return -EINVAL;
+		}
+	case COUNTER_COMP_SYNAPSE_ACTION:
+		ret = comp->action_read(counter, parent, comp->priv,
+					&value_u32);
+		*value = value_u32;
+		return ret;
+	default:
+		return -EINVAL;
+	}
+}
+
+/**
+ * counter_push_event - queue event for userspace reading
+ * @counter:	pointer to Counter structure
+ * @event:	triggered event
+ * @channel:	event channel
+ *
+ * Note: If no one is watching for the respective event, it is silently
+ * discarded.
+ */
+void counter_push_event(struct counter_device *const counter, const u8 event,
+			const u8 channel)
+{
+	struct counter_event ev;
+	unsigned int copied = 0;
+	unsigned long flags;
+	struct counter_event_node *event_node;
+	struct counter_comp_node *comp_node;
+
+	ev.timestamp = ktime_get_ns();
+	ev.watch.event = event;
+	ev.watch.channel = channel;
+
+	/* Could be in an interrupt context, so use a spin lock */
+	spin_lock_irqsave(&counter->events_list_lock, flags);
+
+	/* Search for event in the list */
+	list_for_each_entry(event_node, &counter->events_list, l)
+		if (event_node->event == event &&
+		    event_node->channel == channel)
+			break;
+
+	/* If event is not in the list */
+	if (&event_node->l == &counter->events_list)
+		goto exit_early;
+
+	/* Read and queue relevant comp for userspace */
+	list_for_each_entry(comp_node, &event_node->comp_list, l) {
+		ev.watch.component = comp_node->component;
+		ev.status = -counter_get_data(counter, comp_node, &ev.value);
+
+		copied += kfifo_in(&counter->events, &ev, 1);
+	}
+
+exit_early:
+	spin_unlock_irqrestore(&counter->events_list_lock, flags);
+
+	if (copied)
+		wake_up_poll(&counter->events_wait, EPOLLIN);
+}
+EXPORT_SYMBOL_GPL(counter_push_event);
diff --git a/drivers/counter/counter-chrdev.h b/drivers/counter/counter-chrdev.h
new file mode 100644
index 000000000000..5529d16703c4
--- /dev/null
+++ b/drivers/counter/counter-chrdev.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Counter character device interface
+ * Copyright (C) 2020 William Breathitt Gray
+ */
+#ifndef _COUNTER_CHRDEV_H_
+#define _COUNTER_CHRDEV_H_
+
+#include <linux/counter.h>
+
+int counter_chrdev_add(struct counter_device *const counter);
+void counter_chrdev_remove(struct counter_device *const counter);
+
+#endif /* _COUNTER_CHRDEV_H_ */
diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c
index 3cda2c47bacb..5acc54539623 100644
--- a/drivers/counter/counter-core.c
+++ b/drivers/counter/counter-core.c
@@ -3,14 +3,22 @@
  * Generic Counter interface
  * Copyright (C) 2020 William Breathitt Gray
  */
+#include <linux/cdev.h>
 #include <linux/counter.h>
 #include <linux/device.h>
+#include <linux/device/bus.h>
 #include <linux/export.h>
+#include <linux/fs.h>
 #include <linux/gfp.h>
 #include <linux/idr.h>
 #include <linux/init.h>
+#include <linux/kdev_t.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/wait.h>
 
+#include "counter-chrdev.h"
 #include "counter-sysfs.h"
 
 /* Provides a unique ID for each counter device */
@@ -18,6 +26,9 @@ static DEFINE_IDA(counter_ida);
 
 static void counter_device_release(struct device *dev)
 {
+	struct counter_device *const counter = dev_get_drvdata(dev);
+
+	counter_chrdev_remove(counter);
 	ida_free(&counter_ida, dev->id);
 }
 
@@ -31,6 +42,8 @@ static struct bus_type counter_bus_type = {
 	.dev_name = "counter",
 };
 
+static dev_t counter_devt;
+
 /**
  * counter_register - register Counter to the system
  * @counter:	pointer to Counter to register
@@ -53,10 +66,13 @@ int counter_register(struct counter_device *const counter)
 	if (id < 0)
 		return id;
 
+	mutex_init(&counter->ops_exist_lock);
+
 	/* Configure device structure for Counter */
 	dev->id = id;
 	dev->type = &counter_device_type;
 	dev->bus = &counter_bus_type;
+	dev->devt = MKDEV(MAJOR(counter_devt), id);
 	if (counter->parent) {
 		dev->parent = counter->parent;
 		dev->of_node = counter->parent->of_node;
@@ -64,18 +80,22 @@ int counter_register(struct counter_device *const counter)
 	device_initialize(dev);
 	dev_set_drvdata(dev, counter);
 
-	/* Add Counter sysfs attributes */
 	err = counter_sysfs_add(counter);
 	if (err < 0)
 		goto err_free_id;
 
-	/* Add device to system */
-	err = device_add(dev);
+	err = counter_chrdev_add(counter);
 	if (err < 0)
 		goto err_free_id;
 
+	err = cdev_device_add(&counter->chrdev, dev);
+	if (err < 0)
+		goto err_remove_chrdev;
+
 	return 0;
 
+err_remove_chrdev:
+	counter_chrdev_remove(counter);
 err_free_id:
 	put_device(dev);
 	return err;
@@ -93,7 +113,16 @@ void counter_unregister(struct counter_device *const counter)
 	if (!counter)
 		return;
 
-	device_unregister(&counter->dev);
+	cdev_device_del(&counter->chrdev, &counter->dev);
+
+	mutex_lock(&counter->ops_exist_lock);
+
+	counter->ops = NULL;
+	wake_up(&counter->events_wait);
+
+	mutex_unlock(&counter->ops_exist_lock);
+
+	put_device(&counter->dev);
 }
 EXPORT_SYMBOL_GPL(counter_unregister);
 
@@ -127,13 +156,30 @@ int devm_counter_register(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_counter_register);
 
+#define COUNTER_DEV_MAX 256
+
 static int __init counter_init(void)
 {
-	return bus_register(&counter_bus_type);
+	int err;
+
+	err = bus_register(&counter_bus_type);
+	if (err < 0)
+		return err;
+
+	err = alloc_chrdev_region(&counter_devt, 0, COUNTER_DEV_MAX, "counter");
+	if (err < 0)
+		goto err_unregister_bus;
+
+	return 0;
+
+err_unregister_bus:
+	bus_unregister(&counter_bus_type);
+	return err;
 }
 
 static void __exit counter_exit(void)
 {
+	unregister_chrdev_region(counter_devt, COUNTER_DEV_MAX);
 	bus_unregister(&counter_bus_type);
 }
 
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 7c9f7e23953a..22b14a552b1d 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -6,9 +6,14 @@
 #ifndef _COUNTER_H_
 #define _COUNTER_H_
 
+#include <linux/cdev.h>
 #include <linux/device.h>
 #include <linux/kernel.h>
+#include <linux/kfifo.h>
+#include <linux/mutex.h>
+#include <linux/spinlock_types.h>
 #include <linux/types.h>
+#include <linux/wait.h>
 #include <uapi/linux/counter.h>
 
 struct counter_device;
@@ -199,6 +204,20 @@ struct counter_count {
 	size_t num_ext;
 };
 
+/**
+ * struct counter_event_node - Counter Event node
+ * @l:		list of current watching Counter events
+ * @event:	event that triggers
+ * @channel:	event channel
+ * @comp_list:	list of components to watch when event triggers
+ */
+struct counter_event_node {
+	struct list_head l;
+	u8 event;
+	u8 channel;
+	struct list_head comp_list;
+};
+
 /**
  * struct counter_ops - Callbacks from driver
  * @signal_read:	optional read callback for Signals. The read level of
@@ -222,6 +241,13 @@ struct counter_count {
  * @action_write:	optional write callback for Synapse action modes. The
  *			action mode to write for the respective Synapse is
  *			passed in via the action parameter.
+ * @events_configure:	optional write callback to configure events. The list of
+ *			struct counter_event_node may be accessed via the
+ *			events_list member of the counter parameter.
+ * @watch_validate:	optional callback to validate a watch. The Counter
+ *			component watch configuration is passed in via the watch
+ *			parameter. A return value of 0 indicates a valid Counter
+ *			component watch configuration.
  */
 struct counter_ops {
 	int (*signal_read)(struct counter_device *counter,
@@ -245,6 +271,9 @@ struct counter_ops {
 			    struct counter_count *count,
 			    struct counter_synapse *synapse,
 			    enum counter_synapse_action action);
+	int (*events_configure)(struct counter_device *counter);
+	int (*watch_validate)(struct counter_device *counter,
+			      const struct counter_watch *watch);
 };
 
 /**
@@ -260,6 +289,16 @@ struct counter_ops {
  * @num_ext:		number of Counter device extensions specified in @ext
  * @priv:		optional private data supplied by driver
  * @dev:		internal device structure
+ * @chrdev:		internal character device structure
+ * @events_list:	list of current watching Counter events
+ * @events_list_lock:	lock to protect Counter events list operations
+ * @next_events_list:	list of next watching Counter events
+ * @n_events_list_lock:	lock to protect Counter next events list operations
+ * @events:		queue of detected Counter events
+ * @events_wait:	wait queue to allow blocking reads of Counter events
+ * @events_lock:	lock to protect Counter events queue read operations
+ * @chrdev_lock:	lock to limit chrdev to a single open at a time
+ * @ops_exist_lock:	lock to prevent use during removal
  */
 struct counter_device {
 	const char *name;
@@ -278,12 +317,29 @@ struct counter_device {
 	void *priv;
 
 	struct device dev;
+	struct cdev chrdev;
+	struct list_head events_list;
+	spinlock_t events_list_lock;
+	struct list_head next_events_list;
+	struct mutex n_events_list_lock;
+	DECLARE_KFIFO_PTR(events, struct counter_event);
+	wait_queue_head_t events_wait;
+	struct mutex events_lock;
+	/*
+	 * chrdev_lock is locked by counter_chrdev_open() and unlocked by
+	 * counter_chrdev_release(), so a mutex is not possible here because
+	 * chrdev_lock will invariably be held when returning to user space
+	 */
+	atomic_t chrdev_lock;
+	struct mutex ops_exist_lock;
 };
 
 int counter_register(struct counter_device *const counter);
 void counter_unregister(struct counter_device *const counter);
 int devm_counter_register(struct device *dev,
 			  struct counter_device *const counter);
+void counter_push_event(struct counter_device *const counter, const u8 event,
+			const u8 channel);
 
 #define COUNTER_COMP_DEVICE_U8(_name, _read, _write) \
 { \
diff --git a/include/uapi/linux/counter.h b/include/uapi/linux/counter.h
index 6113938a6044..d0aa95aeff7b 100644
--- a/include/uapi/linux/counter.h
+++ b/include/uapi/linux/counter.h
@@ -6,6 +6,19 @@
 #ifndef _UAPI_COUNTER_H_
 #define _UAPI_COUNTER_H_
 
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/* Component type definitions */
+enum counter_component_type {
+	COUNTER_COMPONENT_NONE,
+	COUNTER_COMPONENT_SIGNAL,
+	COUNTER_COMPONENT_COUNT,
+	COUNTER_COMPONENT_FUNCTION,
+	COUNTER_COMPONENT_SYNAPSE_ACTION,
+	COUNTER_COMPONENT_EXTENSION,
+};
+
 /* Component scope definitions */
 enum counter_scope {
 	COUNTER_SCOPE_DEVICE,
@@ -13,6 +26,91 @@ enum counter_scope {
 	COUNTER_SCOPE_COUNT,
 };
 
+/**
+ * struct counter_component - Counter component identification
+ * @type: component type (one of enum counter_component_type)
+ * @scope: component scope (one of enum counter_scope)
+ * @parent: parent ID (matching the ID suffix of the respective parent sysfs
+ *          path as described by the ABI documentation file
+ *          Documentation/ABI/testing/sysfs-bus-counter)
+ * @id: component ID (matching the ID provided by the respective *_component_id
+ *      sysfs attribute of the desired component)
+ *
+ * For example, if the Count 2 ceiling extension of Counter device 4 is desired,
+ * set type equal to COUNTER_COMPONENT_EXTENSION, scope equal to
+ * COUNTER_COUNT_SCOPE, parent equal to 2, and id equal to the value provided by
+ * the respective /sys/bus/counter/devices/counter4/count2/ceiling_component_id
+ * sysfs attribute.
+ */
+struct counter_component {
+	__u8 type;
+	__u8 scope;
+	__u8 parent;
+	__u8 id;
+};
+
+/* Event type definitions */
+enum counter_event_type {
+	/* Count value increased past ceiling */
+	COUNTER_EVENT_OVERFLOW,
+	/* Count value decreased past floor */
+	COUNTER_EVENT_UNDERFLOW,
+	/* Count value increased past ceiling, or decreased past floor */
+	COUNTER_EVENT_OVERFLOW_UNDERFLOW,
+	/* Count value reached threshold */
+	COUNTER_EVENT_THRESHOLD,
+	/* Index signal detected */
+	COUNTER_EVENT_INDEX,
+};
+
+/**
+ * struct counter_watch - Counter component watch configuration
+ * @component: component to watch when event triggers
+ * @event: event that triggers (one of enum counter_event_type)
+ * @channel: event channel (typically 0 unless the device supports concurrent
+ *	     events of the same type)
+ */
+struct counter_watch {
+	struct counter_component component;
+	__u8 event;
+	__u8 channel;
+};
+
+/*
+ * Queues a Counter watch for the specified event.
+ *
+ * The queued watches will not be applied until COUNTER_ENABLE_EVENTS_IOCTL is
+ * called.
+ */
+#define COUNTER_ADD_WATCH_IOCTL _IOW(0x3E, 0x00, struct counter_watch)
+/*
+ * Enables monitoring the events specified by the Counter watches that were
+ * queued by COUNTER_ADD_WATCH_IOCTL.
+ *
+ * If events are already enabled, the new set of watches replaces the old one.
+ * Calling this ioctl also has the effect of clearing the queue of watches added
+ * by COUNTER_ADD_WATCH_IOCTL.
+ */
+#define COUNTER_ENABLE_EVENTS_IOCTL _IO(0x3E, 0x01)
+/*
+ * Stops monitoring the previously enabled events.
+ */
+#define COUNTER_DISABLE_EVENTS_IOCTL _IO(0x3E, 0x02)
+
+/**
+ * struct counter_event - Counter event data
+ * @timestamp: best estimate of time of event occurrence, in nanoseconds
+ * @value: component value
+ * @watch: component watch configuration
+ * @status: return status (system error number)
+ */
+struct counter_event {
+	__aligned_u64 timestamp;
+	__aligned_u64 value;
+	struct counter_watch watch;
+	__u8 status;
+};
+
 /* Count direction values */
 enum counter_count_direction {
 	COUNTER_COUNT_DIRECTION_FORWARD,
-- 
cgit v1.2.3


From 914ff7719e8adaf87131dd7cd0a3520afcf89516 Mon Sep 17 00:00:00 2001
From: Kyung Min Park <kyung.min.park@intel.com>
Date: Thu, 14 Oct 2021 13:38:32 +0800
Subject: iommu/vt-d: Dump DMAR translation structure when DMA fault occurs

When the dmar translation fault happens, the kernel prints a single line
fault reason with corresponding hexadecimal code defined in the Intel VT-d
specification.

Currently, when user wants to debug the translation fault in detail,
debugfs is used for dumping the dmar_translation_struct, which is not
available when the kernel failed to boot.

Dump the DMAR translation structure, pagewalk the IO page table and print
the page table entry when the fault happens.

This takes effect only when CONFIG_DMAR_DEBUG is enabled.

Signed-off-by: Kyung Min Park <kyung.min.park@intel.com>
Link: https://lore.kernel.org/r/20210815203845.31287-1-kyung.min.park@intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20211014053839.727419-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/Kconfig |   4 ++
 drivers/iommu/intel/dmar.c  |  10 +++-
 drivers/iommu/intel/iommu.c | 113 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dmar.h        |   8 ++++
 4 files changed, 133 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 0ddb77115be7..247d0f2d5fdf 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -6,6 +6,9 @@ config DMAR_TABLE
 config DMAR_PERF
 	bool
 
+config DMAR_DEBUG
+	bool
+
 config INTEL_IOMMU
 	bool "Support for Intel IOMMU using DMA Remapping Devices"
 	depends on PCI_MSI && ACPI && (X86 || IA64)
@@ -31,6 +34,7 @@ config INTEL_IOMMU_DEBUGFS
 	bool "Export Intel IOMMU internals in Debugfs"
 	depends on IOMMU_DEBUGFS
 	select DMAR_PERF
+	select DMAR_DEBUG
 	help
 	  !!!WARNING!!!
 
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index b7708b93f3fa..915bff76fe96 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1941,12 +1941,16 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 
 	reason = dmar_get_fault_reason(fault_reason, &fault_type);
 
-	if (fault_type == INTR_REMAP)
+	if (fault_type == INTR_REMAP) {
 		pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
 		       PCI_FUNC(source_id & 0xFF), addr >> 48,
 		       fault_reason, reason);
-	else if (pasid == INVALID_IOASID)
+
+		return 0;
+	}
+
+	if (pasid == INVALID_IOASID)
 		pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
 		       type ? "DMA Read" : "DMA Write",
 		       source_id >> 8, PCI_SLOT(source_id & 0xFF),
@@ -1959,6 +1963,8 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
 		       PCI_FUNC(source_id & 0xFF), addr,
 		       fault_reason, reason);
 
+	dmar_fault_dump_ptes(iommu, source_id, addr, pasid);
+
 	return 0;
 }
 
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 9a356075d345..abb2599998b1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -156,6 +156,8 @@ static struct intel_iommu **g_iommus;
 
 static void __init check_tylersburg_isoch(void);
 static int rwbf_quirk;
+static inline struct device_domain_info *
+dmar_search_domain_by_dev_info(int segment, int bus, int devfn);
 
 /*
  * set to 1 to panic kernel if can't successfully enable VT-d
@@ -996,6 +998,117 @@ out:
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
 
+#ifdef CONFIG_DMAR_DEBUG
+static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn, u8 bus, u8 devfn)
+{
+	struct device_domain_info *info;
+	struct dma_pte *parent, *pte;
+	struct dmar_domain *domain;
+	int offset, level;
+
+	info = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
+	if (!info || !info->domain) {
+		pr_info("device [%02x:%02x.%d] not probed\n",
+			bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+		return;
+	}
+
+	domain = info->domain;
+	level = agaw_to_level(domain->agaw);
+	parent = domain->pgd;
+	if (!parent) {
+		pr_info("no page table setup\n");
+		return;
+	}
+
+	while (1) {
+		offset = pfn_level_offset(pfn, level);
+		pte = &parent[offset];
+		if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
+			pr_info("PTE not present at level %d\n", level);
+			break;
+		}
+
+		pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
+
+		if (level == 1)
+			break;
+
+		parent = phys_to_virt(dma_pte_addr(pte));
+		level--;
+	}
+}
+
+void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
+			  unsigned long long addr, u32 pasid)
+{
+	struct pasid_dir_entry *dir, *pde;
+	struct pasid_entry *entries, *pte;
+	struct context_entry *ctx_entry;
+	struct root_entry *rt_entry;
+	u8 devfn = source_id & 0xff;
+	u8 bus = source_id >> 8;
+	int i, dir_index, index;
+
+	pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
+
+	/* root entry dump */
+	rt_entry = &iommu->root_entry[bus];
+	if (!rt_entry) {
+		pr_info("root table entry is not present\n");
+		return;
+	}
+
+	if (sm_supported(iommu))
+		pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
+			rt_entry->hi, rt_entry->lo);
+	else
+		pr_info("root entry: 0x%016llx", rt_entry->lo);
+
+	/* context entry dump */
+	ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
+	if (!ctx_entry) {
+		pr_info("context table entry is not present\n");
+		return;
+	}
+
+	pr_info("context entry: hi 0x%016llx, low 0x%016llx\n",
+		ctx_entry->hi, ctx_entry->lo);
+
+	/* legacy mode does not require PASID entries */
+	if (!sm_supported(iommu))
+		goto pgtable_walk;
+
+	/* get the pointer to pasid directory entry */
+	dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
+	if (!dir) {
+		pr_info("pasid directory entry is not present\n");
+		return;
+	}
+	/* For request-without-pasid, get the pasid from context entry */
+	if (intel_iommu_sm && pasid == INVALID_IOASID)
+		pasid = PASID_RID2PASID;
+
+	dir_index = pasid >> PASID_PDE_SHIFT;
+	pde = &dir[dir_index];
+	pr_info("pasid dir entry: 0x%016llx\n", pde->val);
+
+	/* get the pointer to the pasid table entry */
+	entries = get_pasid_table_from_pde(pde);
+	if (!entries) {
+		pr_info("pasid table entry is not present\n");
+		return;
+	}
+	index = pasid & PASID_PTE_MASK;
+	pte = &entries[index];
+	for (i = 0; i < ARRAY_SIZE(pte->val); i++)
+		pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
+
+pgtable_walk:
+	pgtable_walk(iommu, addr >> VTD_PAGE_SHIFT, bus, devfn);
+}
+#endif
+
 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 				      unsigned long pfn, int *target_level)
 {
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index e04436a7ff27..45e903d84733 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -131,6 +131,14 @@ static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg)
 	return 0;
 }
 
+#ifdef CONFIG_DMAR_DEBUG
+void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
+			  unsigned long long addr, u32 pasid);
+#else
+static inline void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
+					unsigned long long addr, u32 pasid) {}
+#endif
+
 #ifdef CONFIG_INTEL_IOMMU
 extern int iommu_detected, no_iommu;
 extern int intel_iommu_init(void);
-- 
cgit v1.2.3


From b34380a6d767c54480a937951e6189a7f9699443 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 14 Oct 2021 13:38:33 +0800
Subject: iommu/vt-d: Remove duplicate identity domain flag

The iommu_domain data structure already has the "type" field to keep the
type of a domain. It's unnecessary to have the DOMAIN_FLAG_STATIC_IDENTITY
flag in the vt-d implementation. This cleans it up with no functionality
change.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20210926114535.923263-1-baolu.lu@linux.intel.com
Link: https://lore.kernel.org/r/20211014053839.727419-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 9 ++++-----
 include/linux/intel-iommu.h | 3 ---
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index abb2599998b1..ec9ceb2dcf57 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -528,7 +528,7 @@ static inline void free_devinfo_mem(void *vaddr)
 
 static inline int domain_type_is_si(struct dmar_domain *domain)
 {
-	return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
+	return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
 }
 
 static inline bool domain_use_first_level(struct dmar_domain *domain)
@@ -1996,7 +1996,7 @@ static bool first_level_by_default(void)
 	return scalable_mode_support() && intel_cap_flts_sanity();
 }
 
-static struct dmar_domain *alloc_domain(int flags)
+static struct dmar_domain *alloc_domain(unsigned int type)
 {
 	struct dmar_domain *domain;
 
@@ -2006,7 +2006,6 @@ static struct dmar_domain *alloc_domain(int flags)
 
 	memset(domain, 0, sizeof(*domain));
 	domain->nid = NUMA_NO_NODE;
-	domain->flags = flags;
 	if (first_level_by_default())
 		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
 	domain->has_iotlb_device = false;
@@ -2825,7 +2824,7 @@ static int __init si_domain_init(int hw)
 	struct device *dev;
 	int i, nid, ret;
 
-	si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
+	si_domain = alloc_domain(IOMMU_DOMAIN_IDENTITY);
 	if (!si_domain)
 		return -EFAULT;
 
@@ -4634,7 +4633,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 	case IOMMU_DOMAIN_DMA:
 	case IOMMU_DOMAIN_DMA_FQ:
 	case IOMMU_DOMAIN_UNMANAGED:
-		dmar_domain = alloc_domain(0);
+		dmar_domain = alloc_domain(type);
 		if (!dmar_domain) {
 			pr_err("Can't allocate dmar_domain\n");
 			return NULL;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 05a65eb155f7..65b15af3cf1a 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -517,9 +517,6 @@ struct context_entry {
 	u64 hi;
 };
 
-/* si_domain contains mulitple devices */
-#define DOMAIN_FLAG_STATIC_IDENTITY		BIT(0)
-
 /*
  * When VT-d works in the scalable mode, it allows DMA translation to
  * happen through either first level or second level page table. This
-- 
cgit v1.2.3


From 37c8041a818dec2cea22ea48a3f90d512a9e1fcd Mon Sep 17 00:00:00 2001
From: "Longpeng(Mike)" <longpeng2@huawei.com>
Date: Thu, 14 Oct 2021 13:38:38 +0800
Subject: iommu/vt-d: Convert the return type of first_pte_in_page to bool

The first_pte_in_page() returns true or false, so let's convert its
return type to bool. In addition, use 'IS_ALIGNED' to make the
code more readable and neater.

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Link: https://lore.kernel.org/r/20211008000433.1115-1-longpeng2@huawei.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20211014053839.727419-9-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 65b15af3cf1a..52481625838c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -705,9 +705,9 @@ static inline bool dma_pte_superpage(struct dma_pte *pte)
 	return (pte->val & DMA_PTE_LARGE_PAGE);
 }
 
-static inline int first_pte_in_page(struct dma_pte *pte)
+static inline bool first_pte_in_page(struct dma_pte *pte)
 {
-	return !((unsigned long)pte & ~VTD_PAGE_MASK);
+	return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
 }
 
 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
-- 
cgit v1.2.3


From 9906b9352a35427508d974db3a496eb3c49e2010 Mon Sep 17 00:00:00 2001
From: "Longpeng(Mike)" <longpeng2@huawei.com>
Date: Thu, 14 Oct 2021 13:38:39 +0800
Subject: iommu/vt-d: Avoid duplicate removing in __domain_mapping()

The __domain_mapping() always removes the pages in the range from
'iov_pfn' to 'end_pfn', but the 'end_pfn' is always the last pfn
of the range that the caller wants to map.

This would introduce too many duplicated removing and leads the
map operation take too long, for example:

  Map iova=0x100000,nr_pages=0x7d61800
    iov_pfn: 0x100000, end_pfn: 0x7e617ff
    iov_pfn: 0x140000, end_pfn: 0x7e617ff
    iov_pfn: 0x180000, end_pfn: 0x7e617ff
    iov_pfn: 0x1c0000, end_pfn: 0x7e617ff
    iov_pfn: 0x200000, end_pfn: 0x7e617ff
    ...
  it takes about 50ms in total.

We can reduce the cost by recalculate the 'end_pfn' and limit it
to the boundary of the end of this pte page.

  Map iova=0x100000,nr_pages=0x7d61800
    iov_pfn: 0x100000, end_pfn: 0x13ffff
    iov_pfn: 0x140000, end_pfn: 0x17ffff
    iov_pfn: 0x180000, end_pfn: 0x1bffff
    iov_pfn: 0x1c0000, end_pfn: 0x1fffff
    iov_pfn: 0x200000, end_pfn: 0x23ffff
    ...
  it only need 9ms now.

This also removes a meaningless BUG_ON() in __domain_mapping().

Signed-off-by: Longpeng(Mike) <longpeng2@huawei.com>
Tested-by: Liujunjie <liujunjie23@huawei.com>
Link: https://lore.kernel.org/r/20211008000433.1115-1-longpeng2@huawei.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20211014053839.727419-10-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 11 ++++++-----
 include/linux/intel-iommu.h |  6 ++++++
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 16a35669a9d0..0bde0c8b4126 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2479,12 +2479,17 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 				return -ENOMEM;
 			first_pte = pte;
 
+			lvl_pages = lvl_to_nr_pages(largepage_lvl);
+
 			/* It is large page*/
 			if (largepage_lvl > 1) {
 				unsigned long end_pfn;
+				unsigned long pages_to_remove;
 
 				pteval |= DMA_PTE_LARGE_PAGE;
-				end_pfn = ((iov_pfn + nr_pages) & level_mask(largepage_lvl)) - 1;
+				pages_to_remove = min_t(unsigned long, nr_pages,
+							nr_pte_to_next_page(pte) * lvl_pages);
+				end_pfn = iov_pfn + pages_to_remove - 1;
 				switch_to_super_page(domain, iov_pfn, end_pfn, largepage_lvl);
 			} else {
 				pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
@@ -2506,10 +2511,6 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 			WARN_ON(1);
 		}
 
-		lvl_pages = lvl_to_nr_pages(largepage_lvl);
-
-		BUG_ON(nr_pages < lvl_pages);
-
 		nr_pages -= lvl_pages;
 		iov_pfn += lvl_pages;
 		phys_pfn += lvl_pages;
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 52481625838c..69230fd695ea 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -710,6 +710,12 @@ static inline bool first_pte_in_page(struct dma_pte *pte)
 	return IS_ALIGNED((unsigned long)pte, VTD_PAGE_SIZE);
 }
 
+static inline int nr_pte_to_next_page(struct dma_pte *pte)
+{
+	return first_pte_in_page(pte) ? BIT_ULL(VTD_STRIDE_SHIFT) :
+		(struct dma_pte *)ALIGN((unsigned long)pte, VTD_PAGE_SIZE) - pte;
+}
+
 extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
 extern int dmar_find_matched_atsr_unit(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From 53c36de0701f2fdda6b1d209dc89e2bad4a9c7b8 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 30 Dec 2020 10:21:39 -0500
Subject: mm: Add kmap_local_folio()

This allows us to map a portion of a folio.  Callers can only expect
to access up to the next page boundary.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/highmem-internal.h | 11 +++++++++++
 include/linux/highmem.h          | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 4aa1031d3e4c..0a0b2b09b1b8 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -73,6 +73,12 @@ static inline void *kmap_local_page(struct page *page)
 	return __kmap_local_page_prot(page, kmap_prot);
 }
 
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+	struct page *page = folio_page(folio, offset / PAGE_SIZE);
+	return __kmap_local_page_prot(page, kmap_prot) + offset % PAGE_SIZE;
+}
+
 static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
 {
 	return __kmap_local_page_prot(page, prot);
@@ -171,6 +177,11 @@ static inline void *kmap_local_page(struct page *page)
 	return page_address(page);
 }
 
+static inline void *kmap_local_folio(struct folio *folio, size_t offset)
+{
+	return page_address(&folio->page) + offset;
+}
+
 static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
 {
 	return kmap_local_page(page);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b4c49f9cc379..27cdd715c5f9 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -96,6 +96,43 @@ static inline void kmap_flush_unused(void);
  */
 static inline void *kmap_local_page(struct page *page);
 
+/**
+ * kmap_local_folio - Map a page in this folio for temporary usage
+ * @folio: The folio containing the page.
+ * @offset: The byte offset within the folio which identifies the page.
+ *
+ * Requires careful handling when nesting multiple mappings because the map
+ * management is stack based. The unmap has to be in the reverse order of
+ * the map operation::
+ *
+ *   addr1 = kmap_local_folio(folio1, offset1);
+ *   addr2 = kmap_local_folio(folio2, offset2);
+ *   ...
+ *   kunmap_local(addr2);
+ *   kunmap_local(addr1);
+ *
+ * Unmapping addr1 before addr2 is invalid and causes malfunction.
+ *
+ * Contrary to kmap() mappings the mapping is only valid in the context of
+ * the caller and cannot be handed to other contexts.
+ *
+ * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
+ * virtual address of the direct mapping. Only real highmem pages are
+ * temporarily mapped.
+ *
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_folio() can rely on this side effect.
+ *
+ * Context: Can be invoked from any context.
+ * Return: The virtual address of @offset.
+ */
+static inline void *kmap_local_folio(struct folio *folio, size_t offset);
+
 /**
  * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
  * @page:	Pointer to the page to be mapped
-- 
cgit v1.2.3


From b424de33c42dbd03e39ca8f521126d39acb6c335 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 24 Apr 2021 14:50:36 -0400
Subject: mm: Add arch_make_folio_accessible()

As a default implementation, call arch_make_page_accessible n times.
If an architecture can do better, it can override this.

Also move the default implementation of arch_make_page_accessible()
from gfp.h to mm.h.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/gfp.h |  6 ------
 include/linux/mm.h  | 23 +++++++++++++++++++++++
 2 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 55b2ec1f965a..dc5ff40608ce 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -520,12 +520,6 @@ static inline void arch_free_page(struct page *page, int order) { }
 #ifndef HAVE_ARCH_ALLOC_PAGE
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
-#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
-static inline int arch_make_page_accessible(struct page *page)
-{
-	return 0;
-}
-#endif
 
 struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 47143f3e7f0a..c28cace6fe6b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1734,6 +1734,29 @@ static inline size_t folio_size(struct folio *folio)
 	return PAGE_SIZE << folio_order(folio);
 }
 
+#ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
+static inline int arch_make_page_accessible(struct page *page)
+{
+	return 0;
+}
+#endif
+
+#ifndef HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
+static inline int arch_make_folio_accessible(struct folio *folio)
+{
+	int ret;
+	long i, nr = folio_nr_pages(folio);
+
+	for (i = 0; i < nr; i++) {
+		ret = arch_make_page_accessible(folio_page(folio, i));
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+#endif
+
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
-- 
cgit v1.2.3


From 35a020ba0802f732ba070e03e50b140aea4373c4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 27 Apr 2021 09:00:12 -0400
Subject: mm: Add folio_young and folio_idle

Idle page tracking is handled through page_ext on 32-bit architectures.
Add folio equivalents for 32-bit and move all the page compatibility
parts to common code.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/page_idle.h | 99 +++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index d8a6aecf99cb..83abf95e9fa7 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -8,46 +8,16 @@
 
 #ifdef CONFIG_PAGE_IDLE_FLAG
 
-#ifdef CONFIG_64BIT
-static inline bool page_is_young(struct page *page)
-{
-	return PageYoung(page);
-}
-
-static inline void set_page_young(struct page *page)
-{
-	SetPageYoung(page);
-}
-
-static inline bool test_and_clear_page_young(struct page *page)
-{
-	return TestClearPageYoung(page);
-}
-
-static inline bool page_is_idle(struct page *page)
-{
-	return PageIdle(page);
-}
-
-static inline void set_page_idle(struct page *page)
-{
-	SetPageIdle(page);
-}
-
-static inline void clear_page_idle(struct page *page)
-{
-	ClearPageIdle(page);
-}
-#else /* !CONFIG_64BIT */
+#ifndef CONFIG_64BIT
 /*
  * If there is not enough space to store Idle and Young bits in page flags, use
  * page ext flags instead.
  */
 extern struct page_ext_operations page_idle_ops;
 
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return false;
@@ -55,9 +25,9 @@ static inline bool page_is_young(struct page *page)
 	return test_bit(PAGE_EXT_YOUNG, &page_ext->flags);
 }
 
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return;
@@ -65,9 +35,9 @@ static inline void set_page_young(struct page *page)
 	set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
 }
 
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return false;
@@ -75,9 +45,9 @@ static inline bool test_and_clear_page_young(struct page *page)
 	return test_and_clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
 }
 
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return false;
@@ -85,9 +55,9 @@ static inline bool page_is_idle(struct page *page)
 	return test_bit(PAGE_EXT_IDLE, &page_ext->flags);
 }
 
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return;
@@ -95,46 +65,75 @@ static inline void set_page_idle(struct page *page)
 	set_bit(PAGE_EXT_IDLE, &page_ext->flags);
 }
 
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
 {
-	struct page_ext *page_ext = lookup_page_ext(page);
+	struct page_ext *page_ext = lookup_page_ext(&folio->page);
 
 	if (unlikely(!page_ext))
 		return;
 
 	clear_bit(PAGE_EXT_IDLE, &page_ext->flags);
 }
-#endif /* CONFIG_64BIT */
+#endif /* !CONFIG_64BIT */
 
 #else /* !CONFIG_PAGE_IDLE_FLAG */
 
-static inline bool page_is_young(struct page *page)
+static inline bool folio_test_young(struct folio *folio)
 {
 	return false;
 }
 
-static inline void set_page_young(struct page *page)
+static inline void folio_set_young(struct folio *folio)
 {
 }
 
-static inline bool test_and_clear_page_young(struct page *page)
+static inline bool folio_test_clear_young(struct folio *folio)
 {
 	return false;
 }
 
-static inline bool page_is_idle(struct page *page)
+static inline bool folio_test_idle(struct folio *folio)
 {
 	return false;
 }
 
-static inline void set_page_idle(struct page *page)
+static inline void folio_set_idle(struct folio *folio)
 {
 }
 
-static inline void clear_page_idle(struct page *page)
+static inline void folio_clear_idle(struct folio *folio)
 {
 }
 
 #endif /* CONFIG_PAGE_IDLE_FLAG */
 
+static inline bool page_is_young(struct page *page)
+{
+	return folio_test_young(page_folio(page));
+}
+
+static inline void set_page_young(struct page *page)
+{
+	folio_set_young(page_folio(page));
+}
+
+static inline bool test_and_clear_page_young(struct page *page)
+{
+	return folio_test_clear_young(page_folio(page));
+}
+
+static inline bool page_is_idle(struct page *page)
+{
+	return folio_test_idle(page_folio(page));
+}
+
+static inline void set_page_idle(struct page *page)
+{
+	folio_set_idle(page_folio(page));
+}
+
+static inline void clear_page_idle(struct page *page)
+{
+	folio_clear_idle(page_folio(page));
+}
 #endif /* _LINUX_MM_PAGE_IDLE_H */
-- 
cgit v1.2.3


From 76580b6529db622964b4ffee59ded25efcb73748 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 27 Apr 2021 10:47:39 -0400
Subject: mm/swap: Add folio_mark_accessed()

Convert mark_page_accessed() to folio_mark_accessed().  It already
operated on the entire compound page, but now we can avoid calling
compound_head quite so many times.  Shrinks the function from 424 bytes
to 295 bytes (shrinking by 129 bytes).  The compatibility wrapper is 30
bytes, plus the 8 bytes for the exported symbol means the kernel shrinks
by 91 bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/swap.h |  3 ++-
 mm/folio-compat.c    |  7 +++++++
 mm/swap.c            | 34 ++++++++++++++++------------------
 3 files changed, 25 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0fc84797623f..067b7af5242c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -352,7 +352,8 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file,
 			  unsigned int nr_pages);
 extern void lru_note_cost_page(struct page *);
 extern void lru_cache_add(struct page *);
-extern void mark_page_accessed(struct page *);
+void mark_page_accessed(struct page *);
+void folio_mark_accessed(struct folio *);
 
 extern atomic_t lru_disable_count;
 
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 7044fcc8a8aa..a374747ae1c6 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/swap.h>
 
 struct address_space *page_mapping(struct page *page)
 {
@@ -41,3 +42,9 @@ bool page_mapped(struct page *page)
 	return folio_mapped(page_folio(page));
 }
 EXPORT_SYMBOL(page_mapped);
+
+void mark_page_accessed(struct page *page)
+{
+	folio_mark_accessed(page_folio(page));
+}
+EXPORT_SYMBOL(mark_page_accessed);
diff --git a/mm/swap.c b/mm/swap.c
index 3860d6bc8c8a..b95fd6d05768 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -368,7 +368,7 @@ static void folio_activate(struct folio *folio)
 }
 #endif
 
-static void __lru_cache_activate_page(struct page *page)
+static void __lru_cache_activate_folio(struct folio *folio)
 {
 	struct pagevec *pvec;
 	int i;
@@ -389,8 +389,8 @@ static void __lru_cache_activate_page(struct page *page)
 	for (i = pagevec_count(pvec) - 1; i >= 0; i--) {
 		struct page *pagevec_page = pvec->pages[i];
 
-		if (pagevec_page == page) {
-			SetPageActive(page);
+		if (pagevec_page == &folio->page) {
+			folio_set_active(folio);
 			break;
 		}
 	}
@@ -408,36 +408,34 @@ static void __lru_cache_activate_page(struct page *page)
  * When a newly allocated page is not yet visible, so safe for non-atomic ops,
  * __SetPageReferenced(page) may be substituted for mark_page_accessed(page).
  */
-void mark_page_accessed(struct page *page)
+void folio_mark_accessed(struct folio *folio)
 {
-	page = compound_head(page);
-
-	if (!PageReferenced(page)) {
-		SetPageReferenced(page);
-	} else if (PageUnevictable(page)) {
+	if (!folio_test_referenced(folio)) {
+		folio_set_referenced(folio);
+	} else if (folio_test_unevictable(folio)) {
 		/*
 		 * Unevictable pages are on the "LRU_UNEVICTABLE" list. But,
 		 * this list is never rotated or maintained, so marking an
 		 * evictable page accessed has no effect.
 		 */
-	} else if (!PageActive(page)) {
+	} else if (!folio_test_active(folio)) {
 		/*
 		 * If the page is on the LRU, queue it for activation via
 		 * lru_pvecs.activate_page. Otherwise, assume the page is on a
 		 * pagevec, mark it active and it'll be moved to the active
 		 * LRU on the next drain.
 		 */
-		if (PageLRU(page))
-			folio_activate(page_folio(page));
+		if (folio_test_lru(folio))
+			folio_activate(folio);
 		else
-			__lru_cache_activate_page(page);
-		ClearPageReferenced(page);
-		workingset_activation(page_folio(page));
+			__lru_cache_activate_folio(folio);
+		folio_clear_referenced(folio);
+		workingset_activation(folio);
 	}
-	if (page_is_idle(page))
-		clear_page_idle(page);
+	if (folio_test_idle(folio))
+		folio_clear_idle(folio);
 }
-EXPORT_SYMBOL(mark_page_accessed);
+EXPORT_SYMBOL(folio_mark_accessed);
 
 /**
  * lru_cache_add - add a page to a page list
-- 
cgit v1.2.3


From d9c08e2232fbca4fa56b9350f7f84835c4aa3add Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 28 Feb 2021 16:02:57 -0500
Subject: mm/rmap: Add folio_mkclean()

Transform page_mkclean() into folio_mkclean() and add a page_mkclean()
wrapper around folio_mkclean().

folio_mkclean is 15 bytes smaller than page_mkclean, but the kernel
is enlarged by 33 bytes due to inlining page_folio() into each caller.
This will go away once the callers are converted to use folio_mkclean().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/rmap.h | 10 ++++++----
 mm/rmap.c            | 12 ++++++------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c976cc6de257..e704b1a4c06c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -235,7 +235,7 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
  *
  * returns the number of cleaned PTEs.
  */
-int page_mkclean(struct page *);
+int folio_mkclean(struct folio *);
 
 /*
  * called in munlock()/munmap() path to check for other vmas holding
@@ -295,12 +295,14 @@ static inline void try_to_unmap(struct page *page, enum ttu_flags flags)
 {
 }
 
-static inline int page_mkclean(struct page *page)
+static inline int folio_mkclean(struct folio *folio)
 {
 	return 0;
 }
-
-
 #endif	/* CONFIG_MMU */
 
+static inline int page_mkclean(struct page *page)
+{
+	return folio_mkclean(page_folio(page));
+}
 #endif	/* _LINUX_RMAP_H */
diff --git a/mm/rmap.c b/mm/rmap.c
index 059556dbefec..3a1059c284c3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -981,7 +981,7 @@ static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
 	return true;
 }
 
-int page_mkclean(struct page *page)
+int folio_mkclean(struct folio *folio)
 {
 	int cleaned = 0;
 	struct address_space *mapping;
@@ -991,20 +991,20 @@ int page_mkclean(struct page *page)
 		.invalid_vma = invalid_mkclean_vma,
 	};
 
-	BUG_ON(!PageLocked(page));
+	BUG_ON(!folio_test_locked(folio));
 
-	if (!page_mapped(page))
+	if (!folio_mapped(folio))
 		return 0;
 
-	mapping = page_mapping(page);
+	mapping = folio_mapping(folio);
 	if (!mapping)
 		return 0;
 
-	rmap_walk(page, &rwc);
+	rmap_walk(&folio->page, &rwc);
 
 	return cleaned;
 }
-EXPORT_SYMBOL_GPL(page_mkclean);
+EXPORT_SYMBOL_GPL(folio_mkclean);
 
 /**
  * page_move_anon_rmap - move a page to our anon_vma
-- 
cgit v1.2.3


From 3417013e0d183be9b42d794082eec0ec1c5b5f15 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 7 May 2021 07:28:40 -0400
Subject: mm/migrate: Add folio_migrate_mapping()

Reimplement migrate_page_move_mapping() as a wrapper around
folio_migrate_mapping().  Saves 193 bytes of kernel text.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/migrate.h |  2 ++
 mm/folio-compat.c       | 11 +++++++
 mm/migrate.c            | 85 +++++++++++++++++++++++++------------------------
 3 files changed, 57 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index c8077e936691..58b3af645e20 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -57,6 +57,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page, int extra_count);
+int folio_migrate_mapping(struct address_space *mapping,
+		struct folio *newfolio, struct folio *folio, int extra_count);
 #else
 
 static inline void putback_movable_pages(struct list_head *l) {}
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index a374747ae1c6..d883d964fd52 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -4,6 +4,7 @@
  * eventually.
  */
 
+#include <linux/migrate.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 
@@ -48,3 +49,13 @@ void mark_page_accessed(struct page *page)
 	folio_mark_accessed(page_folio(page));
 }
 EXPORT_SYMBOL(mark_page_accessed);
+
+#ifdef CONFIG_MIGRATION
+int migrate_page_move_mapping(struct address_space *mapping,
+		struct page *newpage, struct page *page, int extra_count)
+{
+	return folio_migrate_mapping(mapping, page_folio(newpage),
+					page_folio(page), extra_count);
+}
+EXPORT_SYMBOL(migrate_page_move_mapping);
+#endif
diff --git a/mm/migrate.c b/mm/migrate.c
index bfb8ba490479..0e408ee4b7b2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -364,7 +364,7 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
 	 */
 	expected_count += is_device_private_page(page);
 	if (mapping)
-		expected_count += thp_nr_pages(page) + page_has_private(page);
+		expected_count += compound_nr(page) + page_has_private(page);
 
 	return expected_count;
 }
@@ -377,74 +377,75 @@ static int expected_page_refs(struct address_space *mapping, struct page *page)
  * 2 for pages with a mapping
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
-int migrate_page_move_mapping(struct address_space *mapping,
-		struct page *newpage, struct page *page, int extra_count)
+int folio_migrate_mapping(struct address_space *mapping,
+		struct folio *newfolio, struct folio *folio, int extra_count)
 {
-	XA_STATE(xas, &mapping->i_pages, page_index(page));
+	XA_STATE(xas, &mapping->i_pages, folio_index(folio));
 	struct zone *oldzone, *newzone;
 	int dirty;
-	int expected_count = expected_page_refs(mapping, page) + extra_count;
-	int nr = thp_nr_pages(page);
+	int expected_count = expected_page_refs(mapping, &folio->page) + extra_count;
+	long nr = folio_nr_pages(folio);
 
 	if (!mapping) {
 		/* Anonymous page without mapping */
-		if (page_count(page) != expected_count)
+		if (folio_ref_count(folio) != expected_count)
 			return -EAGAIN;
 
 		/* No turning back from here */
-		newpage->index = page->index;
-		newpage->mapping = page->mapping;
-		if (PageSwapBacked(page))
-			__SetPageSwapBacked(newpage);
+		newfolio->index = folio->index;
+		newfolio->mapping = folio->mapping;
+		if (folio_test_swapbacked(folio))
+			__folio_set_swapbacked(newfolio);
 
 		return MIGRATEPAGE_SUCCESS;
 	}
 
-	oldzone = page_zone(page);
-	newzone = page_zone(newpage);
+	oldzone = folio_zone(folio);
+	newzone = folio_zone(newfolio);
 
 	xas_lock_irq(&xas);
-	if (page_count(page) != expected_count || xas_load(&xas) != page) {
+	if (folio_ref_count(folio) != expected_count ||
+	    xas_load(&xas) != folio) {
 		xas_unlock_irq(&xas);
 		return -EAGAIN;
 	}
 
-	if (!page_ref_freeze(page, expected_count)) {
+	if (!folio_ref_freeze(folio, expected_count)) {
 		xas_unlock_irq(&xas);
 		return -EAGAIN;
 	}
 
 	/*
-	 * Now we know that no one else is looking at the page:
+	 * Now we know that no one else is looking at the folio:
 	 * no turning back from here.
 	 */
-	newpage->index = page->index;
-	newpage->mapping = page->mapping;
-	page_ref_add(newpage, nr); /* add cache reference */
-	if (PageSwapBacked(page)) {
-		__SetPageSwapBacked(newpage);
-		if (PageSwapCache(page)) {
-			SetPageSwapCache(newpage);
-			set_page_private(newpage, page_private(page));
+	newfolio->index = folio->index;
+	newfolio->mapping = folio->mapping;
+	folio_ref_add(newfolio, nr); /* add cache reference */
+	if (folio_test_swapbacked(folio)) {
+		__folio_set_swapbacked(newfolio);
+		if (folio_test_swapcache(folio)) {
+			folio_set_swapcache(newfolio);
+			newfolio->private = folio_get_private(folio);
 		}
 	} else {
-		VM_BUG_ON_PAGE(PageSwapCache(page), page);
+		VM_BUG_ON_FOLIO(folio_test_swapcache(folio), folio);
 	}
 
 	/* Move dirty while page refs frozen and newpage not yet exposed */
-	dirty = PageDirty(page);
+	dirty = folio_test_dirty(folio);
 	if (dirty) {
-		ClearPageDirty(page);
-		SetPageDirty(newpage);
+		folio_clear_dirty(folio);
+		folio_set_dirty(newfolio);
 	}
 
-	xas_store(&xas, newpage);
-	if (PageTransHuge(page)) {
+	xas_store(&xas, newfolio);
+	if (nr > 1) {
 		int i;
 
 		for (i = 1; i < nr; i++) {
 			xas_next(&xas);
-			xas_store(&xas, newpage);
+			xas_store(&xas, newfolio);
 		}
 	}
 
@@ -453,7 +454,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	 * to one less reference.
 	 * We know this isn't the last reference.
 	 */
-	page_ref_unfreeze(page, expected_count - nr);
+	folio_ref_unfreeze(folio, expected_count - nr);
 
 	xas_unlock(&xas);
 	/* Leave irq disabled to prevent preemption while updating stats */
@@ -472,18 +473,18 @@ int migrate_page_move_mapping(struct address_space *mapping,
 		struct lruvec *old_lruvec, *new_lruvec;
 		struct mem_cgroup *memcg;
 
-		memcg = page_memcg(page);
+		memcg = folio_memcg(folio);
 		old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
 		new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
 
 		__mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
 		__mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
-		if (PageSwapBacked(page) && !PageSwapCache(page)) {
+		if (folio_test_swapbacked(folio) && !folio_test_swapcache(folio)) {
 			__mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
 			__mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
 		}
 #ifdef CONFIG_SWAP
-		if (PageSwapCache(page)) {
+		if (folio_test_swapcache(folio)) {
 			__mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr);
 			__mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr);
 		}
@@ -499,11 +500,11 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
-EXPORT_SYMBOL(migrate_page_move_mapping);
+EXPORT_SYMBOL(folio_migrate_mapping);
 
 /*
  * The expected number of remaining references is the same as that
- * of migrate_page_move_mapping().
+ * of folio_migrate_mapping().
  */
 int migrate_huge_page_move_mapping(struct address_space *mapping,
 				   struct page *newpage, struct page *page)
@@ -564,7 +565,7 @@ void migrate_page_states(struct page *newpage, struct page *page)
 	if (PageMappedToDisk(page))
 		SetPageMappedToDisk(newpage);
 
-	/* Move dirty on pages not done by migrate_page_move_mapping() */
+	/* Move dirty on pages not done by folio_migrate_mapping() */
 	if (PageDirty(page))
 		SetPageDirty(newpage);
 
@@ -640,11 +641,13 @@ int migrate_page(struct address_space *mapping,
 		struct page *newpage, struct page *page,
 		enum migrate_mode mode)
 {
+	struct folio *newfolio = page_folio(newpage);
+	struct folio *folio = page_folio(page);
 	int rc;
 
-	BUG_ON(PageWriteback(page));	/* Writeback must be complete */
+	BUG_ON(folio_test_writeback(folio));	/* Writeback must be complete */
 
-	rc = migrate_page_move_mapping(mapping, newpage, page, 0);
+	rc = folio_migrate_mapping(mapping, newfolio, folio, 0);
 
 	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
@@ -2470,7 +2473,7 @@ static void migrate_vma_collect(struct migrate_vma *migrate)
  * @page: struct page to check
  *
  * Pinned pages cannot be migrated. This is the same test as in
- * migrate_page_move_mapping(), except that here we allow migration of a
+ * folio_migrate_mapping(), except that here we allow migration of a
  * ZONE_DEVICE page.
  */
 static bool migrate_vma_check_page(struct page *page)
-- 
cgit v1.2.3


From 19138349ed59b90ce58aca319b873eca2e04ad43 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 7 May 2021 15:26:29 -0400
Subject: mm/migrate: Add folio_migrate_flags()

Turn migrate_page_states() into a wrapper around folio_migrate_flags().
Also convert two functions only called from folio_migrate_flags() to
be folio-based.  ksm_migrate_page() becomes folio_migrate_ksm() and
copy_page_owner() becomes folio_copy_owner().  folio_migrate_flags()
alone shrinks by two thirds -- 1967 bytes down to 642 bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/ksm.h        |  4 +--
 include/linux/migrate.h    |  1 +
 include/linux/page_owner.h |  8 ++---
 mm/folio-compat.c          |  6 ++++
 mm/ksm.c                   | 31 ++++++++++-------
 mm/migrate.c               | 84 ++++++++++++++++++++++------------------------
 mm/page_owner.c            | 10 +++---
 7 files changed, 77 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 161e8164abcf..a38a5bca1ba5 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -52,7 +52,7 @@ struct page *ksm_might_need_to_copy(struct page *page,
 			struct vm_area_struct *vma, unsigned long address);
 
 void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc);
-void ksm_migrate_page(struct page *newpage, struct page *oldpage);
+void folio_migrate_ksm(struct folio *newfolio, struct folio *folio);
 
 #else  /* !CONFIG_KSM */
 
@@ -83,7 +83,7 @@ static inline void rmap_walk_ksm(struct page *page,
 {
 }
 
-static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+static inline void folio_migrate_ksm(struct folio *newfolio, struct folio *old)
 {
 }
 #endif /* CONFIG_MMU */
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 58b3af645e20..aa875b83f7ba 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -57,6 +57,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page, int extra_count);
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
 		struct folio *newfolio, struct folio *folio, int extra_count);
 #else
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 719bfe5108c5..43c638c51c1f 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -12,7 +12,7 @@ extern void __reset_page_owner(struct page *page, unsigned int order);
 extern void __set_page_owner(struct page *page,
 			unsigned int order, gfp_t gfp_mask);
 extern void __split_page_owner(struct page *page, unsigned int nr);
-extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
+extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
 extern void __dump_page_owner(const struct page *page);
 extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
@@ -36,10 +36,10 @@ static inline void split_page_owner(struct page *page, unsigned int nr)
 	if (static_branch_unlikely(&page_owner_inited))
 		__split_page_owner(page, nr);
 }
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *old)
 {
 	if (static_branch_unlikely(&page_owner_inited))
-		__copy_page_owner(oldpage, newpage);
+		__folio_copy_owner(newfolio, old);
 }
 static inline void set_page_owner_migrate_reason(struct page *page, int reason)
 {
@@ -63,7 +63,7 @@ static inline void split_page_owner(struct page *page,
 			unsigned int order)
 {
 }
-static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
+static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
 {
 }
 static inline void set_page_owner_migrate_reason(struct page *page, int reason)
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index d883d964fd52..3f00ad92d1ff 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -58,4 +58,10 @@ int migrate_page_move_mapping(struct address_space *mapping,
 					page_folio(page), extra_count);
 }
 EXPORT_SYMBOL(migrate_page_move_mapping);
+
+void migrate_page_states(struct page *newpage, struct page *page)
+{
+	folio_migrate_flags(page_folio(newpage), page_folio(page));
+}
+EXPORT_SYMBOL(migrate_page_states);
 #endif
diff --git a/mm/ksm.c b/mm/ksm.c
index c246a0b0ac75..0662093237e4 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -751,7 +751,7 @@ stale:
 	/*
 	 * We come here from above when page->mapping or !PageSwapCache
 	 * suggests that the node is stale; but it might be under migration.
-	 * We need smp_rmb(), matching the smp_wmb() in ksm_migrate_page(),
+	 * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(),
 	 * before checking whether node->kpfn has been changed.
 	 */
 	smp_rmb();
@@ -852,9 +852,14 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 	return err;
 }
 
+static inline struct stable_node *folio_stable_node(struct folio *folio)
+{
+	return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL;
+}
+
 static inline struct stable_node *page_stable_node(struct page *page)
 {
-	return PageKsm(page) ? page_rmapping(page) : NULL;
+	return folio_stable_node(page_folio(page));
 }
 
 static inline void set_page_stable_node(struct page *page,
@@ -2659,26 +2664,26 @@ again:
 }
 
 #ifdef CONFIG_MIGRATION
-void ksm_migrate_page(struct page *newpage, struct page *oldpage)
+void folio_migrate_ksm(struct folio *newfolio, struct folio *folio)
 {
 	struct stable_node *stable_node;
 
-	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
-	VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
-	VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio);
+	VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio);
 
-	stable_node = page_stable_node(newpage);
+	stable_node = folio_stable_node(folio);
 	if (stable_node) {
-		VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage);
-		stable_node->kpfn = page_to_pfn(newpage);
+		VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio);
+		stable_node->kpfn = folio_pfn(newfolio);
 		/*
-		 * newpage->mapping was set in advance; now we need smp_wmb()
+		 * newfolio->mapping was set in advance; now we need smp_wmb()
 		 * to make sure that the new stable_node->kpfn is visible
-		 * to get_ksm_page() before it can see that oldpage->mapping
-		 * has gone stale (or that PageSwapCache has been cleared).
+		 * to get_ksm_page() before it can see that folio->mapping
+		 * has gone stale (or that folio_test_swapcache has been cleared).
 		 */
 		smp_wmb();
-		set_page_stable_node(oldpage, NULL);
+		set_page_stable_node(&folio->page, NULL);
 	}
 }
 #endif /* CONFIG_MIGRATION */
diff --git a/mm/migrate.c b/mm/migrate.c
index 0e408ee4b7b2..f6e0017ef0bf 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -539,82 +539,80 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 }
 
 /*
- * Copy the page to its new location
+ * Copy the flags and some other ancillary information
  */
-void migrate_page_states(struct page *newpage, struct page *page)
+void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-	struct folio *newfolio = page_folio(newpage);
 	int cpupid;
 
-	if (PageError(page))
-		SetPageError(newpage);
-	if (PageReferenced(page))
-		SetPageReferenced(newpage);
-	if (PageUptodate(page))
-		SetPageUptodate(newpage);
-	if (TestClearPageActive(page)) {
-		VM_BUG_ON_PAGE(PageUnevictable(page), page);
-		SetPageActive(newpage);
-	} else if (TestClearPageUnevictable(page))
-		SetPageUnevictable(newpage);
-	if (PageWorkingset(page))
-		SetPageWorkingset(newpage);
-	if (PageChecked(page))
-		SetPageChecked(newpage);
-	if (PageMappedToDisk(page))
-		SetPageMappedToDisk(newpage);
+	if (folio_test_error(folio))
+		folio_set_error(newfolio);
+	if (folio_test_referenced(folio))
+		folio_set_referenced(newfolio);
+	if (folio_test_uptodate(folio))
+		folio_mark_uptodate(newfolio);
+	if (folio_test_clear_active(folio)) {
+		VM_BUG_ON_FOLIO(folio_test_unevictable(folio), folio);
+		folio_set_active(newfolio);
+	} else if (folio_test_clear_unevictable(folio))
+		folio_set_unevictable(newfolio);
+	if (folio_test_workingset(folio))
+		folio_set_workingset(newfolio);
+	if (folio_test_checked(folio))
+		folio_set_checked(newfolio);
+	if (folio_test_mappedtodisk(folio))
+		folio_set_mappedtodisk(newfolio);
 
 	/* Move dirty on pages not done by folio_migrate_mapping() */
-	if (PageDirty(page))
-		SetPageDirty(newpage);
+	if (folio_test_dirty(folio))
+		folio_set_dirty(newfolio);
 
-	if (page_is_young(page))
-		set_page_young(newpage);
-	if (page_is_idle(page))
-		set_page_idle(newpage);
+	if (folio_test_young(folio))
+		folio_set_young(newfolio);
+	if (folio_test_idle(folio))
+		folio_set_idle(newfolio);
 
 	/*
 	 * Copy NUMA information to the new page, to prevent over-eager
 	 * future migrations of this same page.
 	 */
-	cpupid = page_cpupid_xchg_last(page, -1);
-	page_cpupid_xchg_last(newpage, cpupid);
+	cpupid = page_cpupid_xchg_last(&folio->page, -1);
+	page_cpupid_xchg_last(&newfolio->page, cpupid);
 
-	ksm_migrate_page(newpage, page);
+	folio_migrate_ksm(newfolio, folio);
 	/*
 	 * Please do not reorder this without considering how mm/ksm.c's
 	 * get_ksm_page() depends upon ksm_migrate_page() and PageSwapCache().
 	 */
-	if (PageSwapCache(page))
-		ClearPageSwapCache(page);
-	ClearPagePrivate(page);
+	if (folio_test_swapcache(folio))
+		folio_clear_swapcache(folio);
+	folio_clear_private(folio);
 
 	/* page->private contains hugetlb specific flags */
-	if (!PageHuge(page))
-		set_page_private(page, 0);
+	if (!folio_test_hugetlb(folio))
+		folio->private = NULL;
 
 	/*
 	 * If any waiters have accumulated on the new page then
 	 * wake them up.
 	 */
-	if (PageWriteback(newpage))
-		end_page_writeback(newpage);
+	if (folio_test_writeback(newfolio))
+		folio_end_writeback(newfolio);
 
 	/*
 	 * PG_readahead shares the same bit with PG_reclaim.  The above
 	 * end_page_writeback() may clear PG_readahead mistakenly, so set the
 	 * bit after that.
 	 */
-	if (PageReadahead(page))
-		SetPageReadahead(newpage);
+	if (folio_test_readahead(folio))
+		folio_set_readahead(newfolio);
 
-	copy_page_owner(page, newpage);
+	folio_copy_owner(newfolio, folio);
 
-	if (!PageHuge(page))
+	if (!folio_test_hugetlb(folio))
 		mem_cgroup_migrate(folio, newfolio);
 }
-EXPORT_SYMBOL(migrate_page_states);
+EXPORT_SYMBOL(folio_migrate_flags);
 
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
@@ -655,7 +653,7 @@ int migrate_page(struct address_space *mapping,
 	if (mode != MIGRATE_SYNC_NO_COPY)
 		migrate_page_copy(newpage, page);
 	else
-		migrate_page_states(newpage, page);
+		folio_migrate_flags(newfolio, folio);
 	return MIGRATEPAGE_SUCCESS;
 }
 EXPORT_SYMBOL(migrate_page);
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 62402d22539b..d24ed221357c 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -210,10 +210,10 @@ void __split_page_owner(struct page *page, unsigned int nr)
 	}
 }
 
-void __copy_page_owner(struct page *oldpage, struct page *newpage)
+void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 {
-	struct page_ext *old_ext = lookup_page_ext(oldpage);
-	struct page_ext *new_ext = lookup_page_ext(newpage);
+	struct page_ext *old_ext = lookup_page_ext(&old->page);
+	struct page_ext *new_ext = lookup_page_ext(&newfolio->page);
 	struct page_owner *old_page_owner, *new_page_owner;
 
 	if (unlikely(!old_ext || !new_ext))
@@ -231,11 +231,11 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
 	new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
 
 	/*
-	 * We don't clear the bit on the oldpage as it's going to be freed
+	 * We don't clear the bit on the old folio as it's going to be freed
 	 * after migration. Until then, the info can be useful in case of
 	 * a bug, and the overall stats will be off a bit only temporarily.
 	 * Also, migrate_misplaced_transhuge_page() can still fail the
-	 * migration and then we want the oldpage to retain the info. But
+	 * migration and then we want the old folio to retain the info. But
 	 * in that case we also don't need to explicitly clear the info from
 	 * the new page, which will be freed.
 	 */
-- 
cgit v1.2.3


From 715cbfd6c5c595bc8b7a6f9ad1fe9fec0122bb20 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 7 May 2021 15:05:06 -0400
Subject: mm/migrate: Add folio_migrate_copy()

This is the folio equivalent of migrate_page_copy(), which is retained
as a wrapper for filesystems which are not yet converted to folios.
Also convert copy_huge_page() to folio_copy().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/migrate.h |  1 +
 include/linux/mm.h      |  2 +-
 mm/folio-compat.c       |  6 ++++++
 mm/hugetlb.c            |  2 +-
 mm/migrate.c            | 14 +++++---------
 mm/util.c               | 21 +++++++++++++++++----
 6 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index aa875b83f7ba..0d2aeb9b0f66 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -58,6 +58,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page, int extra_count);
 void folio_migrate_flags(struct folio *newfolio, struct folio *folio);
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio);
 int folio_migrate_mapping(struct address_space *mapping,
 		struct folio *newfolio, struct folio *folio, int extra_count);
 #else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c28cace6fe6b..93d5fbe2e4e3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -912,7 +912,7 @@ void __put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
 void split_page(struct page *page, unsigned int order);
-void copy_huge_page(struct page *dst, struct page *src);
+void folio_copy(struct folio *dst, struct folio *src);
 
 /*
  * Compound pages have a destructor function.  Provide a
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 3f00ad92d1ff..2ccd8f213fc4 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -64,4 +64,10 @@ void migrate_page_states(struct page *newpage, struct page *page)
 	folio_migrate_flags(page_folio(newpage), page_folio(page));
 }
 EXPORT_SYMBOL(migrate_page_states);
+
+void migrate_page_copy(struct page *newpage, struct page *page)
+{
+	folio_migrate_copy(page_folio(newpage), page_folio(page));
+}
+EXPORT_SYMBOL(migrate_page_copy);
 #endif
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 95dc7b83381f..6378c1066459 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5302,7 +5302,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 			*pagep = NULL;
 			goto out;
 		}
-		copy_huge_page(page, *pagep);
+		folio_copy(page_folio(page), page_folio(*pagep));
 		put_page(*pagep);
 		*pagep = NULL;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index f6e0017ef0bf..433c453b47f9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -614,16 +614,12 @@ void folio_migrate_flags(struct folio *newfolio, struct folio *folio)
 }
 EXPORT_SYMBOL(folio_migrate_flags);
 
-void migrate_page_copy(struct page *newpage, struct page *page)
+void folio_migrate_copy(struct folio *newfolio, struct folio *folio)
 {
-	if (PageHuge(page) || PageTransHuge(page))
-		copy_huge_page(newpage, page);
-	else
-		copy_highpage(newpage, page);
-
-	migrate_page_states(newpage, page);
+	folio_copy(newfolio, folio);
+	folio_migrate_flags(newfolio, folio);
 }
-EXPORT_SYMBOL(migrate_page_copy);
+EXPORT_SYMBOL(folio_migrate_copy);
 
 /************************************************************
  *                    Migration functions
@@ -651,7 +647,7 @@ int migrate_page(struct address_space *mapping,
 		return rc;
 
 	if (mode != MIGRATE_SYNC_NO_COPY)
-		migrate_page_copy(newpage, page);
+		folio_migrate_copy(newfolio, folio);
 	else
 		folio_migrate_flags(newfolio, folio);
 	return MIGRATEPAGE_SUCCESS;
diff --git a/mm/util.c b/mm/util.c
index bf860838282c..e58151a61255 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -747,13 +747,26 @@ int __page_mapcount(struct page *page)
 }
 EXPORT_SYMBOL_GPL(__page_mapcount);
 
-void copy_huge_page(struct page *dst, struct page *src)
+/**
+ * folio_copy - Copy the contents of one folio to another.
+ * @dst: Folio to copy to.
+ * @src: Folio to copy from.
+ *
+ * The bytes in the folio represented by @src are copied to @dst.
+ * Assumes the caller has validated that @dst is at least as large as @src.
+ * Can be called in atomic context for order-0 folios, but if the folio is
+ * larger, it may sleep.
+ */
+void folio_copy(struct folio *dst, struct folio *src)
 {
-	unsigned i, nr = compound_nr(src);
+	long i = 0;
+	long nr = folio_nr_pages(src);
 
-	for (i = 0; i < nr; i++) {
+	for (;;) {
+		copy_highpage(folio_page(dst, i), folio_page(src, i));
+		if (++i == nr)
+			break;
 		cond_resched();
-		copy_highpage(nth_page(dst, i), nth_page(src, i));
 	}
 }
 
-- 
cgit v1.2.3


From bd3488e7b4d61780eb3dfaca1cc6f4026bcffd48 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 19 Mar 2021 08:39:50 -0400
Subject: mm/writeback: Rename __add_wb_stat() to wb_stat_mod()

Make this look like the newly renamed vmstat functions.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/backing-dev.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ac7f231b8825..a62e72dd829f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -64,7 +64,7 @@ static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi)
 	return atomic_long_read(&bdi->tot_write_bandwidth);
 }
 
-static inline void __add_wb_stat(struct bdi_writeback *wb,
+static inline void wb_stat_mod(struct bdi_writeback *wb,
 				 enum wb_stat_item item, s64 amount)
 {
 	percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH);
@@ -72,12 +72,12 @@ static inline void __add_wb_stat(struct bdi_writeback *wb,
 
 static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	__add_wb_stat(wb, item, 1);
+	wb_stat_mod(wb, item, 1);
 }
 
 static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
 {
-	__add_wb_stat(wb, item, -1);
+	wb_stat_mod(wb, item, -1);
 }
 
 static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item)
-- 
cgit v1.2.3


From be5f1797523004d0d9aaee81a523d86e3b890007 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 20 Mar 2021 16:34:54 -0400
Subject: flex_proportions: Allow N events instead of 1

When batching events (such as writing back N pages in a single I/O), it
is better to do one flex_proportion operation instead of N.  There is
only one caller of __fprop_inc_percpu_max(), and it's the one we're
going to change in the next patch, so rename it instead of adding a
compatibility wrapper.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 include/linux/flex_proportions.h |  9 +++++----
 lib/flex_proportions.c           | 28 +++++++++++++++++++---------
 mm/page-writeback.c              |  4 ++--
 3 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index c12df59d3f5f..3e378b1fb0bc 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -83,9 +83,10 @@ struct fprop_local_percpu {
 
 int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
 void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
-void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
-void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
-			    int max_frac);
+void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl,
+		long nr);
+void __fprop_add_percpu_max(struct fprop_global *p,
+		struct fprop_local_percpu *pl, int max_frac, long nr);
 void fprop_fraction_percpu(struct fprop_global *p,
 	struct fprop_local_percpu *pl, unsigned long *numerator,
 	unsigned long *denominator);
@@ -96,7 +97,7 @@ void fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__fprop_inc_percpu(p, pl);
+	__fprop_add_percpu(p, pl, 1);
 	local_irq_restore(flags);
 }
 
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index 451543937524..53e7eb1dd76c 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -217,11 +217,12 @@ static void fprop_reflect_period_percpu(struct fprop_global *p,
 }
 
 /* Event of type pl happened */
-void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl)
+void __fprop_add_percpu(struct fprop_global *p, struct fprop_local_percpu *pl,
+		long nr)
 {
 	fprop_reflect_period_percpu(p, pl);
-	percpu_counter_add_batch(&pl->events, 1, PROP_BATCH);
-	percpu_counter_add(&p->events, 1);
+	percpu_counter_add_batch(&pl->events, nr, PROP_BATCH);
+	percpu_counter_add(&p->events, nr);
 }
 
 void fprop_fraction_percpu(struct fprop_global *p,
@@ -253,20 +254,29 @@ void fprop_fraction_percpu(struct fprop_global *p,
 }
 
 /*
- * Like __fprop_inc_percpu() except that event is counted only if the given
+ * Like __fprop_add_percpu() except that event is counted only if the given
  * type has fraction smaller than @max_frac/FPROP_FRAC_BASE
  */
-void __fprop_inc_percpu_max(struct fprop_global *p,
-			    struct fprop_local_percpu *pl, int max_frac)
+void __fprop_add_percpu_max(struct fprop_global *p,
+		struct fprop_local_percpu *pl, int max_frac, long nr)
 {
 	if (unlikely(max_frac < FPROP_FRAC_BASE)) {
 		unsigned long numerator, denominator;
+		s64 tmp;
 
 		fprop_fraction_percpu(p, pl, &numerator, &denominator);
-		if (numerator >
-		    (((u64)denominator) * max_frac) >> FPROP_FRAC_SHIFT)
+		/* Adding 'nr' to fraction exceeds max_frac/FPROP_FRAC_BASE? */
+		tmp = (u64)denominator * max_frac -
+					((u64)numerator << FPROP_FRAC_SHIFT);
+		if (tmp < 0) {
+			/* Maximum fraction already exceeded? */
 			return;
+		} else if (tmp < nr * (FPROP_FRAC_BASE - max_frac)) {
+			/* Add just enough for the fraction to saturate */
+			nr = div_u64(tmp + FPROP_FRAC_BASE - max_frac - 1,
+					FPROP_FRAC_BASE - max_frac);
+		}
 	}
 
-	__fprop_inc_percpu(p, pl);
+	__fprop_add_percpu(p, pl, nr);
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 1d8f2ee2e065..cb7387d0e77d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -566,8 +566,8 @@ static void wb_domain_writeout_inc(struct wb_domain *dom,
 				   struct fprop_local_percpu *completions,
 				   unsigned int max_prop_frac)
 {
-	__fprop_inc_percpu_max(&dom->completions, completions,
-			       max_prop_frac);
+	__fprop_add_percpu_max(&dom->completions, completions,
+			       max_prop_frac, 1);
 	/* First event after period switching was turned off? */
 	if (unlikely(!dom->period_time)) {
 		/*
-- 
cgit v1.2.3


From 269ccca3899f6bce49e004f50f623e0b161fb027 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 15 Jan 2021 23:34:16 -0500
Subject: mm/writeback: Add __folio_end_writeback()

test_clear_page_writeback() is actually an mm-internal function, although
it's named as if it's a pagecache function.  Move it to mm/internal.h,
rename it to __folio_end_writeback() and change the return type to bool.

The conversion from page to folio is mostly about accounting the number
of pages being written back, although it does eliminate a couple of
calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/page-flags.h |  1 -
 mm/filemap.c               |  2 +-
 mm/internal.h              |  1 +
 mm/page-writeback.c        | 29 +++++++++++++++--------------
 4 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2491e84b8e52..3aebb2060a26 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -657,7 +657,6 @@ static __always_inline void SetPageUptodate(struct page *page)
 
 CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
 
-int test_clear_page_writeback(struct page *page);
 int __test_set_page_writeback(struct page *page, bool keep_write);
 
 #define test_set_page_writeback(page)			\
diff --git a/mm/filemap.c b/mm/filemap.c
index 5368a4dcc35e..5cb18399a1b6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1590,7 +1590,7 @@ void folio_end_writeback(struct folio *folio)
 	 * reused before the folio_wake().
 	 */
 	folio_get(folio);
-	if (!test_clear_page_writeback(&folio->page))
+	if (!__folio_end_writeback(folio))
 		BUG();
 
 	smp_mb__after_atomic();
diff --git a/mm/internal.h b/mm/internal.h
index 187a032fed4d..08fb03b16d3d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -43,6 +43,7 @@ static inline void *folio_raw_mapping(struct folio *folio)
 
 vm_fault_t do_swap_page(struct vm_fault *vmf);
 void folio_rotate_reclaimable(struct folio *folio);
+bool __folio_end_writeback(struct folio *folio);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 943cf74a76ed..9ff9a33bced1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -583,7 +583,7 @@ static void wb_domain_writeout_add(struct wb_domain *dom,
 
 /*
  * Increment @wb's writeout completion count and the global writeout
- * completion count. Called from test_clear_page_writeback().
+ * completion count. Called from __folio_end_writeback().
  */
 static inline void __wb_writeout_add(struct bdi_writeback *wb, long nr)
 {
@@ -2766,27 +2766,28 @@ static void wb_inode_writeback_end(struct bdi_writeback *wb)
 	queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
 }
 
-int test_clear_page_writeback(struct page *page)
+bool __folio_end_writeback(struct folio *folio)
 {
-	struct address_space *mapping = page_mapping(page);
-	int ret;
+	long nr = folio_nr_pages(folio);
+	struct address_space *mapping = folio_mapping(folio);
+	bool ret;
 
-	lock_page_memcg(page);
+	folio_memcg_lock(folio);
 	if (mapping && mapping_use_writeback_tags(mapping)) {
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
 		xa_lock_irqsave(&mapping->i_pages, flags);
-		ret = TestClearPageWriteback(page);
+		ret = folio_test_clear_writeback(folio);
 		if (ret) {
-			__xa_clear_mark(&mapping->i_pages, page_index(page),
+			__xa_clear_mark(&mapping->i_pages, folio_index(folio),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
-				dec_wb_stat(wb, WB_WRITEBACK);
-				__wb_writeout_add(wb, 1);
+				wb_stat_mod(wb, WB_WRITEBACK, -nr);
+				__wb_writeout_add(wb, nr);
 				if (!mapping_tagged(mapping,
 						    PAGECACHE_TAG_WRITEBACK))
 					wb_inode_writeback_end(wb);
@@ -2799,14 +2800,14 @@ int test_clear_page_writeback(struct page *page)
 
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 	} else {
-		ret = TestClearPageWriteback(page);
+		ret = folio_test_clear_writeback(folio);
 	}
 	if (ret) {
-		dec_lruvec_page_state(page, NR_WRITEBACK);
-		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		inc_node_page_state(page, NR_WRITTEN);
+		lruvec_stat_mod_folio(folio, NR_WRITEBACK, -nr);
+		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
+		node_stat_mod_folio(folio, NR_WRITTEN, nr);
 	}
-	unlock_page_memcg(page);
+	folio_memcg_unlock(folio);
 	return ret;
 }
 
-- 
cgit v1.2.3


From f143f1ea5a5380b2682e6836fcd24338a2aa82c7 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 24 Apr 2021 12:00:48 -0400
Subject: mm/writeback: Add folio_start_writeback()

Rename set_page_writeback() to folio_start_writeback() to match
folio_end_writeback().  Do not bother with wrappers that return void;
callers are perfectly capable of ignoring return values.

Add wrappers for set_page_writeback(), set_page_writeback_keepwrite() and
test_set_page_writeback() for compatibililty with existing filesystems.
The main advantage of this patch is getting the statistics right,
although it does eliminate a couple of calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/page-flags.h | 19 ++++++++++---------
 mm/folio-compat.c          |  6 ++++++
 mm/page-writeback.c        | 39 ++++++++++++++++++++-------------------
 3 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 3aebb2060a26..a68af80649a4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -657,21 +657,22 @@ static __always_inline void SetPageUptodate(struct page *page)
 
 CLEARPAGEFLAG(Uptodate, uptodate, PF_NO_TAIL)
 
-int __test_set_page_writeback(struct page *page, bool keep_write);
+bool __folio_start_writeback(struct folio *folio, bool keep_write);
+bool set_page_writeback(struct page *page);
 
-#define test_set_page_writeback(page)			\
-	__test_set_page_writeback(page, false)
-#define test_set_page_writeback_keepwrite(page)	\
-	__test_set_page_writeback(page, true)
+#define folio_start_writeback(folio)			\
+	__folio_start_writeback(folio, false)
+#define folio_start_writeback_keepwrite(folio)	\
+	__folio_start_writeback(folio, true)
 
-static inline void set_page_writeback(struct page *page)
+static inline void set_page_writeback_keepwrite(struct page *page)
 {
-	test_set_page_writeback(page);
+	folio_start_writeback_keepwrite(page_folio(page));
 }
 
-static inline void set_page_writeback_keepwrite(struct page *page)
+static inline bool test_set_page_writeback(struct page *page)
 {
-	test_set_page_writeback_keepwrite(page);
+	return set_page_writeback(page);
 }
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 2ccd8f213fc4..10ce5582d869 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -71,3 +71,9 @@ void migrate_page_copy(struct page *newpage, struct page *page)
 }
 EXPORT_SYMBOL(migrate_page_copy);
 #endif
+
+bool set_page_writeback(struct page *page)
+{
+	return folio_start_writeback(page_folio(page));
+}
+EXPORT_SYMBOL(set_page_writeback);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 9ff9a33bced1..82938b037103 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2811,21 +2811,23 @@ bool __folio_end_writeback(struct folio *folio)
 	return ret;
 }
 
-int __test_set_page_writeback(struct page *page, bool keep_write)
+bool __folio_start_writeback(struct folio *folio, bool keep_write)
 {
-	struct address_space *mapping = page_mapping(page);
-	int ret, access_ret;
+	long nr = folio_nr_pages(folio);
+	struct address_space *mapping = folio_mapping(folio);
+	bool ret;
+	int access_ret;
 
-	lock_page_memcg(page);
+	folio_memcg_lock(folio);
 	if (mapping && mapping_use_writeback_tags(mapping)) {
-		XA_STATE(xas, &mapping->i_pages, page_index(page));
+		XA_STATE(xas, &mapping->i_pages, folio_index(folio));
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
 		xas_lock_irqsave(&xas, flags);
 		xas_load(&xas);
-		ret = TestSetPageWriteback(page);
+		ret = folio_test_set_writeback(folio);
 		if (!ret) {
 			bool on_wblist;
 
@@ -2836,43 +2838,42 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 			if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
 
-				inc_wb_stat(wb, WB_WRITEBACK);
+				wb_stat_mod(wb, WB_WRITEBACK, nr);
 				if (!on_wblist)
 					wb_inode_writeback_start(wb);
 			}
 
 			/*
-			 * We can come through here when swapping anonymous
-			 * pages, so we don't necessarily have an inode to track
-			 * for sync.
+			 * We can come through here when swapping
+			 * anonymous folios, so we don't necessarily
+			 * have an inode to track for sync.
 			 */
 			if (mapping->host && !on_wblist)
 				sb_mark_inode_writeback(mapping->host);
 		}
-		if (!PageDirty(page))
+		if (!folio_test_dirty(folio))
 			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
 		if (!keep_write)
 			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
 		xas_unlock_irqrestore(&xas, flags);
 	} else {
-		ret = TestSetPageWriteback(page);
+		ret = folio_test_set_writeback(folio);
 	}
 	if (!ret) {
-		inc_lruvec_page_state(page, NR_WRITEBACK);
-		inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
+		lruvec_stat_mod_folio(folio, NR_WRITEBACK, nr);
+		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
 	}
-	unlock_page_memcg(page);
-	access_ret = arch_make_page_accessible(page);
+	folio_memcg_unlock(folio);
+	access_ret = arch_make_folio_accessible(folio);
 	/*
 	 * If writeback has been triggered on a page that cannot be made
 	 * accessible, it is too late to recover here.
 	 */
-	VM_BUG_ON_PAGE(access_ret != 0, page);
+	VM_BUG_ON_FOLIO(access_ret != 0, folio);
 
 	return ret;
-
 }
-EXPORT_SYMBOL(__test_set_page_writeback);
+EXPORT_SYMBOL(__folio_start_writeback);
 
 /**
  * folio_wait_writeback - Wait for a folio to finish writeback.
-- 
cgit v1.2.3


From b5e84594cafb934e023ee7d4ea4208b6c6b65fcc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 26 Apr 2021 23:53:10 -0400
Subject: mm/writeback: Add folio_mark_dirty()

Reimplement set_page_dirty() as a wrapper around folio_mark_dirty().
There is no change to filesystems as they were already being called
with the compound_head of the page being marked dirty.  We avoid
several calls to compound_head(), both statically (through
using folio_test_dirty() instead of PageDirty() and dynamically by
calling folio_mapping() instead of page_mapping().

Also return bool instead of int to show the range of values actually
returned, and add kernel-doc.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h  |  3 ++-
 mm/folio-compat.c   |  6 ++++++
 mm/page-writeback.c | 35 +++++++++++++++++++----------------
 3 files changed, 27 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 93d5fbe2e4e3..00510b02ffe1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2008,7 +2008,8 @@ int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_cleaned(struct page *page, struct address_space *mapping,
 			  struct bdi_writeback *wb);
-int set_page_dirty(struct page *page);
+bool folio_mark_dirty(struct folio *folio);
+bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 void __cancel_dirty_page(struct page *page);
 static inline void cancel_dirty_page(struct page *page)
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 10ce5582d869..2c2b3917b5dc 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -77,3 +77,9 @@ bool set_page_writeback(struct page *page)
 	return folio_start_writeback(page_folio(page));
 }
 EXPORT_SYMBOL(set_page_writeback);
+
+bool set_page_dirty(struct page *page)
+{
+	return folio_mark_dirty(page_folio(page));
+}
+EXPORT_SYMBOL(set_page_dirty);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 82938b037103..c79801656f5b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2581,18 +2581,21 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
 
-/*
- * Dirty a page.
+/**
+ * folio_mark_dirty - Mark a folio as being modified.
+ * @folio: The folio.
+ *
+ * For folios with a mapping this should be done under the page lock
+ * for the benefit of asynchronous memory errors who prefer a consistent
+ * dirty state. This rule can be broken in some special cases,
+ * but should be better not to.
  *
- * For pages with a mapping this should be done under the page lock for the
- * benefit of asynchronous memory errors who prefer a consistent dirty state.
- * This rule can be broken in some special cases, but should be better not to.
+ * Return: True if the folio was newly dirtied, false if it was already dirty.
  */
-int set_page_dirty(struct page *page)
+bool folio_mark_dirty(struct folio *folio)
 {
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = folio_mapping(folio);
 
-	page = compound_head(page);
 	if (likely(mapping)) {
 		/*
 		 * readahead/lru_deactivate_page could remain
@@ -2604,17 +2607,17 @@ int set_page_dirty(struct page *page)
 		 * it will confuse readahead and make it restart the size rampup
 		 * process. But it's a trivial problem.
 		 */
-		if (PageReclaim(page))
-			ClearPageReclaim(page);
-		return mapping->a_ops->set_page_dirty(page);
+		if (folio_test_reclaim(folio))
+			folio_clear_reclaim(folio);
+		return mapping->a_ops->set_page_dirty(&folio->page);
 	}
-	if (!PageDirty(page)) {
-		if (!TestSetPageDirty(page))
-			return 1;
+	if (!folio_test_dirty(folio)) {
+		if (!folio_test_set_dirty(folio))
+			return true;
 	}
-	return 0;
+	return false;
 }
-EXPORT_SYMBOL(set_page_dirty);
+EXPORT_SYMBOL(folio_mark_dirty);
 
 /*
  * set_page_dirty() is racy if the caller has no reference against
-- 
cgit v1.2.3


From 203a31516616111b8eaaf00c16fa3fcaa25e6f81 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 4 May 2021 11:01:10 -0400
Subject: mm/writeback: Add __folio_mark_dirty()

Turn __set_page_dirty() into a wrapper around __folio_mark_dirty().
Convert account_page_dirtied() into folio_account_dirtied() and account
the number of pages in the folio to support multi-page folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/memcontrol.h |  5 ++---
 include/linux/pagemap.h    |  7 ++++++-
 mm/page-writeback.c        | 41 +++++++++++++++++++++--------------------
 3 files changed, 29 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 7bd78c13d1fa..e34bf0cbdf55 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1615,10 +1615,9 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
 					     struct bdi_writeback *wb);
 
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
 						  struct bdi_writeback *wb)
 {
-	struct folio *folio = page_folio(page);
 	if (mem_cgroup_disabled())
 		return;
 
@@ -1643,7 +1642,7 @@ static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb,
 {
 }
 
-static inline void mem_cgroup_track_foreign_dirty(struct page *page,
+static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
 						  struct bdi_writeback *wb)
 {
 }
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bdbd7be67812..aa9a083083df 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -772,8 +772,13 @@ void end_page_writeback(struct page *page);
 void folio_end_writeback(struct folio *folio);
 void wait_for_stable_page(struct page *page);
 void folio_wait_stable(struct folio *folio);
+void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
+static inline void __set_page_dirty(struct page *page,
+		struct address_space *mapping, int warn)
+{
+	__folio_mark_dirty(page_folio(page), mapping, warn);
+}
 
-void __set_page_dirty(struct page *, struct address_space *, int warn);
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c79801656f5b..40d77597385f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2438,29 +2438,30 @@ EXPORT_SYMBOL(__set_page_dirty_no_writeback);
  *
  * NOTE: This relies on being atomic wrt interrupts.
  */
-static void account_page_dirtied(struct page *page,
+static void folio_account_dirtied(struct folio *folio,
 		struct address_space *mapping)
 {
 	struct inode *inode = mapping->host;
 
-	trace_writeback_dirty_page(page, mapping);
+	trace_writeback_dirty_page(&folio->page, mapping);
 
 	if (mapping_can_writeback(mapping)) {
 		struct bdi_writeback *wb;
+		long nr = folio_nr_pages(folio);
 
-		inode_attach_wb(inode, page);
+		inode_attach_wb(inode, &folio->page);
 		wb = inode_to_wb(inode);
 
-		__inc_lruvec_page_state(page, NR_FILE_DIRTY);
-		__inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		__inc_node_page_state(page, NR_DIRTIED);
-		inc_wb_stat(wb, WB_RECLAIMABLE);
-		inc_wb_stat(wb, WB_DIRTIED);
-		task_io_account_write(PAGE_SIZE);
-		current->nr_dirtied++;
-		__this_cpu_inc(bdp_ratelimits);
+		__lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, nr);
+		__zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, nr);
+		__node_stat_mod_folio(folio, NR_DIRTIED, nr);
+		wb_stat_mod(wb, WB_RECLAIMABLE, nr);
+		wb_stat_mod(wb, WB_DIRTIED, nr);
+		task_io_account_write(nr * PAGE_SIZE);
+		current->nr_dirtied += nr;
+		__this_cpu_add(bdp_ratelimits, nr);
 
-		mem_cgroup_track_foreign_dirty(page, wb);
+		mem_cgroup_track_foreign_dirty(folio, wb);
 	}
 }
 
@@ -2481,24 +2482,24 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 }
 
 /*
- * Mark the page dirty, and set it dirty in the page cache, and mark the inode
- * dirty.
+ * Mark the folio dirty, and set it dirty in the page cache, and mark
+ * the inode dirty.
  *
- * If warn is true, then emit a warning if the page is not uptodate and has
+ * If warn is true, then emit a warning if the folio is not uptodate and has
  * not been truncated.
  *
  * The caller must hold lock_page_memcg().
  */
-void __set_page_dirty(struct page *page, struct address_space *mapping,
+void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
 			     int warn)
 {
 	unsigned long flags;
 
 	xa_lock_irqsave(&mapping->i_pages, flags);
-	if (page->mapping) {	/* Race with truncate? */
-		WARN_ON_ONCE(warn && !PageUptodate(page));
-		account_page_dirtied(page, mapping);
-		__xa_set_mark(&mapping->i_pages, page_index(page),
+	if (folio->mapping) {	/* Race with truncate? */
+		WARN_ON_ONCE(warn && !folio_test_uptodate(folio));
+		folio_account_dirtied(folio, mapping);
+		__xa_set_mark(&mapping->i_pages, folio_index(folio),
 				PAGECACHE_TAG_DIRTY);
 	}
 	xa_unlock_irqrestore(&mapping->i_pages, flags);
-- 
cgit v1.2.3


From 85d4d2ebc86f02740c5f5f72ec43cc47d3560720 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 3 May 2021 23:30:44 -0400
Subject: mm/writeback: Add filemap_dirty_folio()

Reimplement __set_page_dirty_nobuffers() as a wrapper around
filemap_dirty_folio().  Eventually folio_mark_dirty() will pass
the folio's mapping to the address space's ->dirty_folio()
operation, so add the parameter to filemap_dirty_folio() now.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/writeback.h |  1 +
 mm/folio-compat.c         |  6 +++++
 mm/page-writeback.c       | 60 ++++++++++++++++++++++++-----------------------
 3 files changed, 38 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d1f65adf6a26..d4f24eba066f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -393,6 +393,7 @@ void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
 
+bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
 void account_page_redirty(struct page *page);
 
 void sb_mark_inode_writeback(struct inode *inode);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 2c2b3917b5dc..dad962b920e5 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -83,3 +83,9 @@ bool set_page_dirty(struct page *page)
 	return folio_mark_dirty(page_folio(page));
 }
 EXPORT_SYMBOL(set_page_dirty);
+
+int __set_page_dirty_nobuffers(struct page *page)
+{
+	return filemap_dirty_folio(page_mapping(page), page_folio(page));
+}
+EXPORT_SYMBOL(__set_page_dirty_nobuffers);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ebbd15f0d841..a501dad430af 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2505,41 +2505,43 @@ void __folio_mark_dirty(struct folio *folio, struct address_space *mapping,
 	xa_unlock_irqrestore(&mapping->i_pages, flags);
 }
 
-/*
- * For address_spaces which do not use buffers.  Just tag the page as dirty in
- * the xarray.
- *
- * This is also used when a single buffer is being dirtied: we want to set the
- * page dirty in that case, but not all the buffers.  This is a "bottom-up"
- * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
- *
- * The caller must ensure this doesn't race with truncation.  Most will simply
- * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
- * the pte lock held, which also locks out truncation.
+/**
+ * filemap_dirty_folio - Mark a folio dirty for filesystems which do not use buffer_heads.
+ * @mapping: Address space this folio belongs to.
+ * @folio: Folio to be marked as dirty.
+ *
+ * Filesystems which do not use buffer heads should call this function
+ * from their set_page_dirty address space operation.  It ignores the
+ * contents of folio_get_private(), so if the filesystem marks individual
+ * blocks as dirty, the filesystem should handle that itself.
+ *
+ * This is also sometimes used by filesystems which use buffer_heads when
+ * a single buffer is being dirtied: we want to set the folio dirty in
+ * that case, but not all the buffers.  This is a "bottom-up" dirtying,
+ * whereas __set_page_dirty_buffers() is a "top-down" dirtying.
+ *
+ * The caller must ensure this doesn't race with truncation.  Most will
+ * simply hold the folio lock, but e.g. zap_pte_range() calls with the
+ * folio mapped and the pte lock held, which also locks out truncation.
  */
-int __set_page_dirty_nobuffers(struct page *page)
+bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
 {
-	lock_page_memcg(page);
-	if (!TestSetPageDirty(page)) {
-		struct address_space *mapping = page_mapping(page);
+	folio_memcg_lock(folio);
+	if (folio_test_set_dirty(folio)) {
+		folio_memcg_unlock(folio);
+		return false;
+	}
 
-		if (!mapping) {
-			unlock_page_memcg(page);
-			return 1;
-		}
-		__set_page_dirty(page, mapping, !PagePrivate(page));
-		unlock_page_memcg(page);
+	__folio_mark_dirty(folio, mapping, !folio_test_private(folio));
+	folio_memcg_unlock(folio);
 
-		if (mapping->host) {
-			/* !PageAnon && !swapper_space */
-			__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
-		}
-		return 1;
+	if (mapping->host) {
+		/* !PageAnon && !swapper_space */
+		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 	}
-	unlock_page_memcg(page);
-	return 0;
+	return true;
 }
-EXPORT_SYMBOL(__set_page_dirty_nobuffers);
+EXPORT_SYMBOL(filemap_dirty_folio);
 
 /*
  * Call this whenever redirtying a page, to de-account the dirty counters
-- 
cgit v1.2.3


From fc9b6a538b222eaeb4d1e3f93e716b6626d9b653 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 4 May 2021 16:12:09 -0400
Subject: mm/writeback: Add folio_account_cleaned()

Get the statistics right; compound pages were being accounted as a
single page.  This didn't matter before now as no filesystem which
supported compound pages did writeback.  Also move the declaration
to pagemap.h since this is part of the page cache.  Add a wrapper for
account_page_cleaned().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h      |  3 ---
 include/linux/pagemap.h |  7 +++++++
 mm/page-writeback.c     | 11 ++++++-----
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 00510b02ffe1..f47af4ca4873 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -39,7 +39,6 @@ struct anon_vma_chain;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
-struct bdi_writeback;
 struct pt_regs;
 
 extern int sysctl_page_lock_unfairness;
@@ -2006,8 +2005,6 @@ extern void do_invalidatepage(struct page *page, unsigned int offset,
 
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-void account_page_cleaned(struct page *page, struct address_space *mapping,
-			  struct bdi_writeback *wb);
 bool folio_mark_dirty(struct folio *folio);
 bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index aa9a083083df..8ba7da513ac4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -778,6 +778,13 @@ static inline void __set_page_dirty(struct page *page,
 {
 	__folio_mark_dirty(page_folio(page), mapping, warn);
 }
+void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
+			  struct bdi_writeback *wb);
+static inline void account_page_cleaned(struct page *page,
+		struct address_space *mapping, struct bdi_writeback *wb)
+{
+	return folio_account_cleaned(page_folio(page), mapping, wb);
+}
 
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a501dad430af..82e3bc3d4eae 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2470,14 +2470,15 @@ static void folio_account_dirtied(struct folio *folio,
  *
  * Caller must hold lock_page_memcg().
  */
-void account_page_cleaned(struct page *page, struct address_space *mapping,
+void folio_account_cleaned(struct folio *folio, struct address_space *mapping,
 			  struct bdi_writeback *wb)
 {
 	if (mapping_can_writeback(mapping)) {
-		dec_lruvec_page_state(page, NR_FILE_DIRTY);
-		dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-		dec_wb_stat(wb, WB_RECLAIMABLE);
-		task_io_account_cancelled_write(PAGE_SIZE);
+		long nr = folio_nr_pages(folio);
+		lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
+		zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
+		wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
+		task_io_account_cancelled_write(nr * PAGE_SIZE);
 	}
 }
 
-- 
cgit v1.2.3


From fdaf532a23795e320c47e1caab50a53c202135c5 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 8 Mar 2021 16:43:04 -0500
Subject: mm/writeback: Add folio_cancel_dirty()

Turn __cancel_dirty_page() into __folio_cancel_dirty() and add wrappers.
Move the prototypes into pagemap.h since this is page cache functionality.
Saves 44 bytes of kernel text in total; 33 bytes from __folio_cancel_dirty
and 11 from two callers of cancel_dirty_page().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h      |  7 -------
 include/linux/pagemap.h | 11 +++++++++++
 mm/page-writeback.c     | 16 ++++++++--------
 3 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f47af4ca4873..3bc394aafbc0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2008,13 +2008,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc,
 bool folio_mark_dirty(struct folio *folio);
 bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
-void __cancel_dirty_page(struct page *page);
-static inline void cancel_dirty_page(struct page *page)
-{
-	/* Avoid atomic ops, locking, etc. when not actually needed. */
-	if (PageDirty(page))
-		__cancel_dirty_page(page);
-}
 int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8ba7da513ac4..8f4a9ba0bfb0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -785,6 +785,17 @@ static inline void account_page_cleaned(struct page *page,
 {
 	return folio_account_cleaned(page_folio(page), mapping, wb);
 }
+void __folio_cancel_dirty(struct folio *folio);
+static inline void folio_cancel_dirty(struct folio *folio)
+{
+	/* Avoid atomic ops, locking, etc. when not actually needed. */
+	if (folio_test_dirty(folio))
+		__folio_cancel_dirty(folio);
+}
+static inline void cancel_dirty_page(struct page *page)
+{
+	folio_cancel_dirty(page_folio(page));
+}
 
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 82e3bc3d4eae..16c4ab0cc1c9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2657,28 +2657,28 @@ EXPORT_SYMBOL(set_page_dirty_lock);
  * page without actually doing it through the VM. Can you say "ext3 is
  * horribly ugly"? Thought you could.
  */
-void __cancel_dirty_page(struct page *page)
+void __folio_cancel_dirty(struct folio *folio)
 {
-	struct address_space *mapping = page_mapping(page);
+	struct address_space *mapping = folio_mapping(folio);
 
 	if (mapping_can_writeback(mapping)) {
 		struct inode *inode = mapping->host;
 		struct bdi_writeback *wb;
 		struct wb_lock_cookie cookie = {};
 
-		lock_page_memcg(page);
+		folio_memcg_lock(folio);
 		wb = unlocked_inode_to_wb_begin(inode, &cookie);
 
-		if (TestClearPageDirty(page))
-			account_page_cleaned(page, mapping, wb);
+		if (folio_test_clear_dirty(folio))
+			folio_account_cleaned(folio, mapping, wb);
 
 		unlocked_inode_to_wb_end(inode, &cookie);
-		unlock_page_memcg(page);
+		folio_memcg_unlock(folio);
 	} else {
-		ClearPageDirty(page);
+		folio_clear_dirty(folio);
 	}
 }
-EXPORT_SYMBOL(__cancel_dirty_page);
+EXPORT_SYMBOL(__folio_cancel_dirty);
 
 /*
  * Clear a page's dirty flag, while caring for dirty memory accounting.
-- 
cgit v1.2.3


From 9350f20a070d27a6c6a292a2b6f6091e7ea90a65 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 28 Feb 2021 16:21:20 -0500
Subject: mm/writeback: Add folio_clear_dirty_for_io()

Transform clear_page_dirty_for_io() into folio_clear_dirty_for_io()
and add a compatibility wrapper.  Also move the declaration to pagemap.h
as this is page cache functionality that doesn't need to be used by the
rest of the kernel.

Increases the size of the kernel by 79 bytes.  While we remove a few
calls to compound_head(), we add a call to folio_nr_pages() to get the
stats correct for the eventual support of multi-page folios.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h      |  1 -
 include/linux/pagemap.h |  2 ++
 mm/folio-compat.c       |  6 +++++
 mm/page-writeback.c     | 63 +++++++++++++++++++++++++------------------------
 4 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3bc394aafbc0..dd5c6970ba67 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2008,7 +2008,6 @@ int redirty_page_for_writepage(struct writeback_control *wbc,
 bool folio_mark_dirty(struct folio *folio);
 bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
-int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 8f4a9ba0bfb0..0dda6459d2d1 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -796,6 +796,8 @@ static inline void cancel_dirty_page(struct page *page)
 {
 	folio_cancel_dirty(page_folio(page));
 }
+bool folio_clear_dirty_for_io(struct folio *folio);
+bool clear_page_dirty_for_io(struct page *page);
 
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index dad962b920e5..39f5a8d963b1 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -89,3 +89,9 @@ int __set_page_dirty_nobuffers(struct page *page)
 	return filemap_dirty_folio(page_mapping(page), page_folio(page));
 }
 EXPORT_SYMBOL(__set_page_dirty_nobuffers);
+
+bool clear_page_dirty_for_io(struct page *page)
+{
+	return folio_clear_dirty_for_io(page_folio(page));
+}
+EXPORT_SYMBOL(clear_page_dirty_for_io);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 16c4ab0cc1c9..85c04445de97 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2681,25 +2681,25 @@ void __folio_cancel_dirty(struct folio *folio)
 EXPORT_SYMBOL(__folio_cancel_dirty);
 
 /*
- * Clear a page's dirty flag, while caring for dirty memory accounting.
- * Returns true if the page was previously dirty.
- *
- * This is for preparing to put the page under writeout.  We leave the page
- * tagged as dirty in the xarray so that a concurrent write-for-sync
- * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
- * implementation will run either set_page_writeback() or set_page_dirty(),
- * at which stage we bring the page's dirty flag and xarray dirty tag
- * back into sync.
- *
- * This incoherency between the page's dirty flag and xarray tag is
- * unfortunate, but it only exists while the page is locked.
+ * Clear a folio's dirty flag, while caring for dirty memory accounting.
+ * Returns true if the folio was previously dirty.
+ *
+ * This is for preparing to put the folio under writeout.  We leave
+ * the folio tagged as dirty in the xarray so that a concurrent
+ * write-for-sync can discover it via a PAGECACHE_TAG_DIRTY walk.
+ * The ->writepage implementation will run either folio_start_writeback()
+ * or folio_mark_dirty(), at which stage we bring the folio's dirty flag
+ * and xarray dirty tag back into sync.
+ *
+ * This incoherency between the folio's dirty flag and xarray tag is
+ * unfortunate, but it only exists while the folio is locked.
  */
-int clear_page_dirty_for_io(struct page *page)
+bool folio_clear_dirty_for_io(struct folio *folio)
 {
-	struct address_space *mapping = page_mapping(page);
-	int ret = 0;
+	struct address_space *mapping = folio_mapping(folio);
+	bool ret = false;
 
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 
 	if (mapping && mapping_can_writeback(mapping)) {
 		struct inode *inode = mapping->host;
@@ -2712,48 +2712,49 @@ int clear_page_dirty_for_io(struct page *page)
 		 * We use this sequence to make sure that
 		 *  (a) we account for dirty stats properly
 		 *  (b) we tell the low-level filesystem to
-		 *      mark the whole page dirty if it was
+		 *      mark the whole folio dirty if it was
 		 *      dirty in a pagetable. Only to then
-		 *  (c) clean the page again and return 1 to
+		 *  (c) clean the folio again and return 1 to
 		 *      cause the writeback.
 		 *
 		 * This way we avoid all nasty races with the
 		 * dirty bit in multiple places and clearing
 		 * them concurrently from different threads.
 		 *
-		 * Note! Normally the "set_page_dirty(page)"
+		 * Note! Normally the "folio_mark_dirty(folio)"
 		 * has no effect on the actual dirty bit - since
 		 * that will already usually be set. But we
 		 * need the side effects, and it can help us
 		 * avoid races.
 		 *
-		 * We basically use the page "master dirty bit"
+		 * We basically use the folio "master dirty bit"
 		 * as a serialization point for all the different
 		 * threads doing their things.
 		 */
-		if (page_mkclean(page))
-			set_page_dirty(page);
+		if (folio_mkclean(folio))
+			folio_mark_dirty(folio);
 		/*
 		 * We carefully synchronise fault handlers against
-		 * installing a dirty pte and marking the page dirty
+		 * installing a dirty pte and marking the folio dirty
 		 * at this point.  We do this by having them hold the
-		 * page lock while dirtying the page, and pages are
+		 * page lock while dirtying the folio, and folios are
 		 * always locked coming in here, so we get the desired
 		 * exclusion.
 		 */
 		wb = unlocked_inode_to_wb_begin(inode, &cookie);
-		if (TestClearPageDirty(page)) {
-			dec_lruvec_page_state(page, NR_FILE_DIRTY);
-			dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
-			dec_wb_stat(wb, WB_RECLAIMABLE);
-			ret = 1;
+		if (folio_test_clear_dirty(folio)) {
+			long nr = folio_nr_pages(folio);
+			lruvec_stat_mod_folio(folio, NR_FILE_DIRTY, -nr);
+			zone_stat_mod_folio(folio, NR_ZONE_WRITE_PENDING, -nr);
+			wb_stat_mod(wb, WB_RECLAIMABLE, -nr);
+			ret = true;
 		}
 		unlocked_inode_to_wb_end(inode, &cookie);
 		return ret;
 	}
-	return TestClearPageDirty(page);
+	return folio_test_clear_dirty(folio);
 }
-EXPORT_SYMBOL(clear_page_dirty_for_io);
+EXPORT_SYMBOL(folio_clear_dirty_for_io);
 
 static void wb_inode_writeback_start(struct bdi_writeback *wb)
 {
-- 
cgit v1.2.3


From 25ff8b15537dfa0e1a62d55cfcc48f3c8bd8a76c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 3 May 2021 10:06:55 -0400
Subject: mm/writeback: Add folio_account_redirty()

Account the number of pages in the folio that we're redirtying.
Turn account_page_dirty() into a wrapper around it.  Also turn
the comment on folio_account_redirty() into kernel-doc and
edit it slightly so it makes sense to its potential callers.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/writeback.h |  6 +++++-
 mm/page-writeback.c       | 32 +++++++++++++++++++-------------
 2 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d4f24eba066f..852100bea351 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -394,7 +394,11 @@ void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
 
 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void account_page_redirty(struct page *page);
+void folio_account_redirty(struct folio *folio);
+static inline void account_page_redirty(struct page *page)
+{
+	folio_account_redirty(page_folio(page));
+}
 
 void sb_mark_inode_writeback(struct inode *inode);
 void sb_clear_inode_writeback(struct inode *inode);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 85c04445de97..7ef904363fb7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
 	 * write_bandwidth = ---------------------------------------------------
 	 *                                          period
 	 *
-	 * @written may have decreased due to account_page_redirty().
+	 * @written may have decreased due to folio_account_redirty().
 	 * Avoid underflowing @bw calculation.
 	 */
 	bw = written - min(written, wb->written_stamp);
@@ -2544,30 +2544,36 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
 }
 EXPORT_SYMBOL(filemap_dirty_folio);
 
-/*
- * Call this whenever redirtying a page, to de-account the dirty counters
- * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written
- * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to
- * systematic errors in balanced_dirty_ratelimit and the dirty pages position
- * control.
+/**
+ * folio_account_redirty - Manually account for redirtying a page.
+ * @folio: The folio which is being redirtied.
+ *
+ * Most filesystems should call folio_redirty_for_writepage() instead
+ * of this fuction.  If your filesystem is doing writeback outside the
+ * context of a writeback_control(), it can call this when redirtying
+ * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
+ * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
+ * WB_WRITTEN) in long term. The mismatches will lead to systematic errors
+ * in balanced_dirty_ratelimit and the dirty pages position control.
  */
-void account_page_redirty(struct page *page)
+void folio_account_redirty(struct folio *folio)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping = folio->mapping;
 
 	if (mapping && mapping_can_writeback(mapping)) {
 		struct inode *inode = mapping->host;
 		struct bdi_writeback *wb;
 		struct wb_lock_cookie cookie = {};
+		long nr = folio_nr_pages(folio);
 
 		wb = unlocked_inode_to_wb_begin(inode, &cookie);
-		current->nr_dirtied--;
-		dec_node_page_state(page, NR_DIRTIED);
-		dec_wb_stat(wb, WB_DIRTIED);
+		current->nr_dirtied -= nr;
+		node_stat_mod_folio(folio, NR_DIRTIED, -nr);
+		wb_stat_mod(wb, WB_DIRTIED, -nr);
 		unlocked_inode_to_wb_end(inode, &cookie);
 	}
 }
-EXPORT_SYMBOL(account_page_redirty);
+EXPORT_SYMBOL(folio_account_redirty);
 
 /*
  * When a writepage implementation decides that it doesn't want to write this
-- 
cgit v1.2.3


From cd78ab11a8810dd297f4751d17cc53e3dce36024 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 2 May 2021 23:22:52 -0400
Subject: mm/writeback: Add folio_redirty_for_writepage()

Reimplement redirty_page_for_writepage() as a wrapper around
folio_redirty_for_writepage().  Account the number of pages in the
folio, add kernel-doc and move the prototype to writeback.h.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 fs/jfs/jfs_metapage.c     |  1 +
 include/linux/mm.h        |  4 ----
 include/linux/writeback.h |  2 ++
 mm/folio-compat.c         |  7 +++++++
 mm/page-writeback.c       | 30 ++++++++++++++++++++----------
 5 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 176580f54af9..104ae698443e 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -13,6 +13,7 @@
 #include <linux/buffer_head.h>
 #include <linux/mempool.h>
 #include <linux/seq_file.h>
+#include <linux/writeback.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dd5c6970ba67..23ab040aa65a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,9 +36,7 @@
 struct mempolicy;
 struct anon_vma;
 struct anon_vma_chain;
-struct file_ra_state;
 struct user_struct;
-struct writeback_control;
 struct pt_regs;
 
 extern int sysctl_page_lock_unfairness;
@@ -2003,8 +2001,6 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned int offset,
 			      unsigned int length);
 
-int redirty_page_for_writepage(struct writeback_control *wbc,
-				struct page *page);
 bool folio_mark_dirty(struct folio *folio);
 bool set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 852100bea351..3571f383dfb6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -399,6 +399,8 @@ static inline void account_page_redirty(struct page *page)
 {
 	folio_account_redirty(page_folio(page));
 }
+bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
+bool redirty_page_for_writepage(struct writeback_control *, struct page *);
 
 void sb_mark_inode_writeback(struct inode *inode);
 void sb_clear_inode_writeback(struct inode *inode);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 39f5a8d963b1..c1e01bc36d32 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -95,3 +95,10 @@ bool clear_page_dirty_for_io(struct page *page)
 	return folio_clear_dirty_for_io(page_folio(page));
 }
 EXPORT_SYMBOL(clear_page_dirty_for_io);
+
+bool redirty_page_for_writepage(struct writeback_control *wbc,
+		struct page *page)
+{
+	return folio_redirty_for_writepage(wbc, page_folio(page));
+}
+EXPORT_SYMBOL(redirty_page_for_writepage);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7ef904363fb7..a0c35a9d8029 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2575,21 +2575,31 @@ void folio_account_redirty(struct folio *folio)
 }
 EXPORT_SYMBOL(folio_account_redirty);
 
-/*
- * When a writepage implementation decides that it doesn't want to write this
- * page for some reason, it should redirty the locked page via
- * redirty_page_for_writepage() and it should then unlock the page and return 0
+/**
+ * folio_redirty_for_writepage - Decline to write a dirty folio.
+ * @wbc: The writeback control.
+ * @folio: The folio.
+ *
+ * When a writepage implementation decides that it doesn't want to write
+ * @folio for some reason, it should call this function, unlock @folio and
+ * return 0.
+ *
+ * Return: True if we redirtied the folio.  False if someone else dirtied
+ * it first.
  */
-int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
+bool folio_redirty_for_writepage(struct writeback_control *wbc,
+		struct folio *folio)
 {
-	int ret;
+	bool ret;
+	long nr = folio_nr_pages(folio);
+
+	wbc->pages_skipped += nr;
+	ret = filemap_dirty_folio(folio->mapping, folio);
+	folio_account_redirty(folio);
 
-	wbc->pages_skipped++;
-	ret = __set_page_dirty_nobuffers(page);
-	account_page_redirty(page);
 	return ret;
 }
-EXPORT_SYMBOL(redirty_page_for_writepage);
+EXPORT_SYMBOL(folio_redirty_for_writepage);
 
 /**
  * folio_mark_dirty - Mark a folio as being modified.
-- 
cgit v1.2.3


From 9eb7c76dd31a53331bec795faaf7daa7ffb80071 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 27 Apr 2021 23:11:28 -0400
Subject: mm/filemap: Add i_blocks_per_folio()

Reimplement i_blocks_per_page() as a wrapper around i_blocks_per_folio().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0dda6459d2d1..5431d9920dc0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1149,19 +1149,25 @@ static inline int page_mkwrite_check_truncate(struct page *page,
 }
 
 /**
- * i_blocks_per_page - How many blocks fit in this page.
+ * i_blocks_per_folio - How many blocks fit in this folio.
  * @inode: The inode which contains the blocks.
- * @page: The page (head page if the page is a THP).
+ * @folio: The folio.
  *
- * If the block size is larger than the size of this page, return zero.
+ * If the block size is larger than the size of this folio, return zero.
  *
- * Context: The caller should hold a refcount on the page to prevent it
+ * Context: The caller should hold a refcount on the folio to prevent it
  * from being split.
- * Return: The number of filesystem blocks covered by this page.
+ * Return: The number of filesystem blocks covered by this folio.
  */
+static inline
+unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
+{
+	return folio_size(folio) >> inode->i_blkbits;
+}
+
 static inline
 unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
 {
-	return thp_size(page) >> inode->i_blkbits;
+	return i_blocks_per_folio(inode, page_folio(page));
 }
 #endif /* _LINUX_PAGEMAP_H */
-- 
cgit v1.2.3


From f705bf84eab2aa3b9842974b8443587727ccb23a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 28 Apr 2021 22:30:06 -0400
Subject: mm/filemap: Add folio_mkwrite_check_truncate()

This is the folio equivalent of page_mkwrite_check_truncate().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/pagemap.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5431d9920dc0..4b74d83c5571 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1120,6 +1120,34 @@ static inline unsigned long dir_pages(struct inode *inode)
 			       PAGE_SHIFT;
 }
 
+/**
+ * folio_mkwrite_check_truncate - check if folio was truncated
+ * @folio: the folio to check
+ * @inode: the inode to check the folio against
+ *
+ * Return: the number of bytes in the folio up to EOF,
+ * or -EFAULT if the folio was truncated.
+ */
+static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
+					      struct inode *inode)
+{
+	loff_t size = i_size_read(inode);
+	pgoff_t index = size >> PAGE_SHIFT;
+	size_t offset = offset_in_folio(folio, size);
+
+	if (!folio->mapping)
+		return -EFAULT;
+
+	/* folio is wholly inside EOF */
+	if (folio_next_index(folio) - 1 < index)
+		return folio_size(folio);
+	/* folio is wholly past EOF */
+	if (folio->index > index || !offset)
+		return -EFAULT;
+	/* folio is partially inside EOF */
+	return offset;
+}
+
 /**
  * page_mkwrite_check_truncate - check if page was truncated
  * @page: the page to check
-- 
cgit v1.2.3


From 9bf70167e3c61473b95f40771decc3778bf0fb9f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 27 Apr 2021 16:37:09 -0400
Subject: mm/filemap: Add readahead_folio()

The pointers stored in the page cache are folios, by definition.
This change comes with a behaviour change -- callers of readahead_folio()
are no longer required to put the page reference themselves.  This matches
how readpage works, rather than matching how readpages used to work.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h | 54 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b74d83c5571..fea9615bab35 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -987,33 +987,57 @@ void page_cache_async_readahead(struct address_space *mapping,
 	page_cache_async_ra(&ractl, page, req_count);
 }
 
+static inline struct folio *__readahead_folio(struct readahead_control *ractl)
+{
+	struct folio *folio;
+
+	BUG_ON(ractl->_batch_count > ractl->_nr_pages);
+	ractl->_nr_pages -= ractl->_batch_count;
+	ractl->_index += ractl->_batch_count;
+
+	if (!ractl->_nr_pages) {
+		ractl->_batch_count = 0;
+		return NULL;
+	}
+
+	folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	ractl->_batch_count = folio_nr_pages(folio);
+
+	return folio;
+}
+
 /**
  * readahead_page - Get the next page to read.
- * @rac: The current readahead request.
+ * @ractl: The current readahead request.
  *
  * Context: The page is locked and has an elevated refcount.  The caller
  * should decreases the refcount once the page has been submitted for I/O
  * and unlock the page once all I/O to that page has completed.
  * Return: A pointer to the next page, or %NULL if we are done.
  */
-static inline struct page *readahead_page(struct readahead_control *rac)
+static inline struct page *readahead_page(struct readahead_control *ractl)
 {
-	struct page *page;
+	struct folio *folio = __readahead_folio(ractl);
 
-	BUG_ON(rac->_batch_count > rac->_nr_pages);
-	rac->_nr_pages -= rac->_batch_count;
-	rac->_index += rac->_batch_count;
-
-	if (!rac->_nr_pages) {
-		rac->_batch_count = 0;
-		return NULL;
-	}
+	return &folio->page;
+}
 
-	page = xa_load(&rac->mapping->i_pages, rac->_index);
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	rac->_batch_count = thp_nr_pages(page);
+/**
+ * readahead_folio - Get the next folio to read.
+ * @ractl: The current readahead request.
+ *
+ * Context: The folio is locked.  The caller should unlock the folio once
+ * all I/O to that folio has completed.
+ * Return: A pointer to the next folio, or %NULL if we are done.
+ */
+static inline struct folio *readahead_folio(struct readahead_control *ractl)
+{
+	struct folio *folio = __readahead_folio(ractl);
 
-	return page;
+	if (folio)
+		folio_put(folio);
+	return folio;
 }
 
 static inline unsigned int __readahead_batch(struct readahead_control *rac,
-- 
cgit v1.2.3


From 0995d7e568141226f10f8216aa4965e06ab5db8a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 29 Apr 2021 10:27:16 -0400
Subject: mm/workingset: Convert workingset_refault() to take a folio

This nets us 178 bytes of savings from removing calls to compound_head.
The three callers all grow a little, but each of them will be converted
to use folios soon, so that's fine.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/swap.h   |  4 ++--
 include/linux/vmstat.h |  6 ------
 mm/filemap.c           |  2 +-
 mm/memory.c            |  3 ++-
 mm/swap.c              |  7 +++----
 mm/swap_state.c        |  2 +-
 mm/workingset.c        | 42 ++++++++++++++++++++++--------------------
 7 files changed, 31 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 067b7af5242c..892faac942da 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -329,7 +329,7 @@ static inline swp_entry_t folio_swap_entry(struct folio *folio)
 /* linux/mm/workingset.c */
 void workingset_age_nonresident(struct lruvec *lruvec, unsigned long nr_pages);
 void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg);
-void workingset_refault(struct page *page, void *shadow);
+void workingset_refault(struct folio *folio, void *shadow);
 void workingset_activation(struct folio *folio);
 
 /* Only track the nodes of mappings with shadow entries */
@@ -350,7 +350,7 @@ extern unsigned long nr_free_buffer_pages(void);
 /* linux/mm/swap.c */
 extern void lru_note_cost(struct lruvec *lruvec, bool file,
 			  unsigned int nr_pages);
-extern void lru_note_cost_page(struct page *);
+extern void lru_note_cost_folio(struct folio *);
 extern void lru_cache_add(struct page *);
 void mark_page_accessed(struct page *);
 void folio_mark_accessed(struct folio *);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 241bd0f53fb9..bfe38869498d 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -597,12 +597,6 @@ static inline void mod_lruvec_page_state(struct page *page,
 
 #endif /* CONFIG_MEMCG */
 
-static inline void inc_lruvec_state(struct lruvec *lruvec,
-				    enum node_stat_item idx)
-{
-	mod_lruvec_state(lruvec, idx, 1);
-}
-
 static inline void __inc_lruvec_page_state(struct page *page,
 					   enum node_stat_item idx)
 {
diff --git a/mm/filemap.c b/mm/filemap.c
index 5cb18399a1b6..c4a7f3fdca12 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -998,7 +998,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 		 */
 		WARN_ON_ONCE(PageActive(page));
 		if (!(gfp_mask & __GFP_WRITE) && shadow)
-			workingset_refault(page, shadow);
+			workingset_refault(page_folio(page), shadow);
 		lru_cache_add(page);
 	}
 	return ret;
diff --git a/mm/memory.c b/mm/memory.c
index b67d80526bee..6308eeaed136 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3539,7 +3539,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 
 				shadow = get_shadow_from_swap_cache(entry);
 				if (shadow)
-					workingset_refault(page, shadow);
+					workingset_refault(page_folio(page),
+								shadow);
 
 				lru_cache_add(page);
 
diff --git a/mm/swap.c b/mm/swap.c
index b95fd6d05768..5c1674c98f82 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -293,11 +293,10 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 	} while ((lruvec = parent_lruvec(lruvec)));
 }
 
-void lru_note_cost_page(struct page *page)
+void lru_note_cost_folio(struct folio *folio)
 {
-	struct folio *folio = page_folio(page);
-	lru_note_cost(folio_lruvec(folio),
-		      page_is_file_lru(page), thp_nr_pages(page));
+	lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio),
+			folio_nr_pages(folio));
 }
 
 static void __folio_activate(struct folio *folio, struct lruvec *lruvec)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index bc7cee6b2ec5..8d4104242100 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -498,7 +498,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	mem_cgroup_swapin_uncharge_swap(entry);
 
 	if (shadow)
-		workingset_refault(page, shadow);
+		workingset_refault(page_folio(page), shadow);
 
 	/* Caller will initiate read into locked page */
 	lru_cache_add(page);
diff --git a/mm/workingset.c b/mm/workingset.c
index 1c96ed525a0e..109ab978251a 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -273,17 +273,17 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
 }
 
 /**
- * workingset_refault - evaluate the refault of a previously evicted page
- * @page: the freshly allocated replacement page
- * @shadow: shadow entry of the evicted page
+ * workingset_refault - Evaluate the refault of a previously evicted folio.
+ * @folio: The freshly allocated replacement folio.
+ * @shadow: Shadow entry of the evicted folio.
  *
  * Calculates and evaluates the refault distance of the previously
- * evicted page in the context of the node and the memcg whose memory
+ * evicted folio in the context of the node and the memcg whose memory
  * pressure caused the eviction.
  */
-void workingset_refault(struct page *page, void *shadow)
+void workingset_refault(struct folio *folio, void *shadow)
 {
-	bool file = page_is_file_lru(page);
+	bool file = folio_is_file_lru(folio);
 	struct mem_cgroup *eviction_memcg;
 	struct lruvec *eviction_lruvec;
 	unsigned long refault_distance;
@@ -295,16 +295,17 @@ void workingset_refault(struct page *page, void *shadow)
 	unsigned long refault;
 	bool workingset;
 	int memcgid;
+	long nr;
 
 	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
 
 	rcu_read_lock();
 	/*
 	 * Look up the memcg associated with the stored ID. It might
-	 * have been deleted since the page's eviction.
+	 * have been deleted since the folio's eviction.
 	 *
 	 * Note that in rare events the ID could have been recycled
-	 * for a new cgroup that refaults a shared page. This is
+	 * for a new cgroup that refaults a shared folio. This is
 	 * impossible to tell from the available data. However, this
 	 * should be a rare and limited disturbance, and activations
 	 * are always speculative anyway. Ultimately, it's the aging
@@ -340,17 +341,18 @@ void workingset_refault(struct page *page, void *shadow)
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
 	/*
-	 * The activation decision for this page is made at the level
+	 * The activation decision for this folio is made at the level
 	 * where the eviction occurred, as that is where the LRU order
-	 * during page reclaim is being determined.
+	 * during folio reclaim is being determined.
 	 *
-	 * However, the cgroup that will own the page is the one that
+	 * However, the cgroup that will own the folio is the one that
 	 * is actually experiencing the refault event.
 	 */
-	memcg = page_memcg(page);
+	nr = folio_nr_pages(folio);
+	memcg = folio_memcg(folio);
 	lruvec = mem_cgroup_lruvec(memcg, pgdat);
 
-	inc_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file);
+	mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);
 
 	mem_cgroup_flush_stats();
 	/*
@@ -376,16 +378,16 @@ void workingset_refault(struct page *page, void *shadow)
 	if (refault_distance > workingset_size)
 		goto out;
 
-	SetPageActive(page);
-	workingset_age_nonresident(lruvec, thp_nr_pages(page));
-	inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file);
+	folio_set_active(folio);
+	workingset_age_nonresident(lruvec, nr);
+	mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + file, nr);
 
-	/* Page was active prior to eviction */
+	/* Folio was active prior to eviction */
 	if (workingset) {
-		SetPageWorkingset(page);
+		folio_set_workingset(folio);
 		/* XXX: Move to lru_cache_add() when it supports new vs putback */
-		lru_note_cost_page(page);
-		inc_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file);
+		lru_note_cost_folio(folio);
+		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
 	}
 out:
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 0d31125d2d3267ec349796418a16761bbda20387 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 29 Apr 2021 11:09:31 -0400
Subject: mm/lru: Add folio_add_lru()

Reimplement lru_cache_add() as a wrapper around folio_add_lru().
Saves 159 bytes of kernel text due to removing calls to compound_head().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/swap.h |  1 +
 mm/folio-compat.c    |  6 ++++++
 mm/swap.c            | 22 +++++++++++-----------
 3 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 892faac942da..cdf0957a88a4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -351,6 +351,7 @@ extern unsigned long nr_free_buffer_pages(void);
 extern void lru_note_cost(struct lruvec *lruvec, bool file,
 			  unsigned int nr_pages);
 extern void lru_note_cost_folio(struct folio *);
+extern void folio_add_lru(struct folio *);
 extern void lru_cache_add(struct page *);
 void mark_page_accessed(struct page *);
 void folio_mark_accessed(struct folio *);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index c1e01bc36d32..6de3cd78a4ae 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -102,3 +102,9 @@ bool redirty_page_for_writepage(struct writeback_control *wbc,
 	return folio_redirty_for_writepage(wbc, page_folio(page));
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
+
+void lru_cache_add(struct page *page)
+{
+	folio_add_lru(page_folio(page));
+}
+EXPORT_SYMBOL(lru_cache_add);
diff --git a/mm/swap.c b/mm/swap.c
index 858b4a8220ca..8ff9ba7cf2de 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -437,29 +437,29 @@ void folio_mark_accessed(struct folio *folio)
 EXPORT_SYMBOL(folio_mark_accessed);
 
 /**
- * lru_cache_add - add a page to a page list
- * @page: the page to be added to the LRU.
+ * folio_add_lru - Add a folio to an LRU list.
+ * @folio: The folio to be added to the LRU.
  *
- * Queue the page for addition to the LRU via pagevec. The decision on whether
+ * Queue the folio for addition to the LRU. The decision on whether
  * to add the page to the [in]active [file|anon] list is deferred until the
- * pagevec is drained. This gives a chance for the caller of lru_cache_add()
- * have the page added to the active list using mark_page_accessed().
+ * pagevec is drained. This gives a chance for the caller of folio_add_lru()
+ * have the folio added to the active list using folio_mark_accessed().
  */
-void lru_cache_add(struct page *page)
+void folio_add_lru(struct folio *folio)
 {
 	struct pagevec *pvec;
 
-	VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
-	VM_BUG_ON_PAGE(PageLRU(page), page);
+	VM_BUG_ON_FOLIO(folio_test_active(folio) && folio_test_unevictable(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
 
-	get_page(page);
+	folio_get(folio);
 	local_lock(&lru_pvecs.lock);
 	pvec = this_cpu_ptr(&lru_pvecs.lru_add);
-	if (pagevec_add_and_need_flush(pvec, page))
+	if (pagevec_add_and_need_flush(pvec, &folio->page))
 		__pagevec_lru_add(pvec);
 	local_unlock(&lru_pvecs.lock);
 }
-EXPORT_SYMBOL(lru_cache_add);
+EXPORT_SYMBOL(folio_add_lru);
 
 /**
  * lru_cache_add_inactive_or_unevictable
-- 
cgit v1.2.3


From cc09cb134124a42fbe3bdcebefdc54e286d8f3e5 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 15 Dec 2020 22:55:54 -0500
Subject: mm/page_alloc: Add folio allocation functions

The __folio_alloc(), __folio_alloc_node() and folio_alloc() functions
are mostly for type safety, but they also ensure that the page allocator
allocates a compound page and initialises the deferred list if the page
is large enough to have one.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/gfp.h | 16 ++++++++++++++++
 mm/mempolicy.c      | 10 ++++++++++
 mm/page_alloc.c     | 12 ++++++++++++
 3 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index dc5ff40608ce..3745efd21cf6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -523,6 +523,8 @@ static inline void arch_alloc_page(struct page *page, int order) { }
 
 struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask);
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+		nodemask_t *nodemask);
 
 unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 				nodemask_t *nodemask, int nr_pages,
@@ -564,6 +566,15 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order)
 	return __alloc_pages(gfp_mask, order, nid, NULL);
 }
 
+static inline
+struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid)
+{
+	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
+	VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid));
+
+	return __folio_alloc(gfp, order, nid, NULL);
+}
+
 /*
  * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE,
  * prefer the current CPU's closest node. Otherwise node must be valid and
@@ -580,6 +591,7 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 
 #ifdef CONFIG_NUMA
 struct page *alloc_pages(gfp_t gfp, unsigned int order);
+struct folio *folio_alloc(gfp_t gfp, unsigned order);
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
 			int node, bool hugepage);
@@ -590,6 +602,10 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
 {
 	return alloc_pages_node(numa_node_id(), gfp_mask, order);
 }
+static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
+{
+	return __folio_alloc_node(gfp, order, numa_node_id());
+}
 #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
 	alloc_pages(gfp_mask, order)
 #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1592b081c58e..251df91ddc80 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2202,6 +2202,16 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
 }
 EXPORT_SYMBOL(alloc_pages);
 
+struct folio *folio_alloc(gfp_t gfp, unsigned order)
+{
+	struct page *page = alloc_pages(gfp | __GFP_COMP, order);
+
+	if (page && order > 1)
+		prep_transhuge_page(page);
+	return (struct folio *)page;
+}
+EXPORT_SYMBOL(folio_alloc);
+
 int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
 {
 	struct mempolicy *pol = mpol_dup(vma_policy(src));
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 869d0b06e1ef..544ff5a11cb8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5400,6 +5400,18 @@ out:
 }
 EXPORT_SYMBOL(__alloc_pages);
 
+struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid,
+		nodemask_t *nodemask)
+{
+	struct page *page = __alloc_pages(gfp | __GFP_COMP, order,
+			preferred_nid, nodemask);
+
+	if (page && order > 1)
+		prep_transhuge_page(page);
+	return (struct folio *)page;
+}
+EXPORT_SYMBOL(__folio_alloc);
+
 /*
  * Common helper functions. Never use with __GFP_HIGHMEM because the returned
  * address cannot represent highmem pages. Use alloc_pages and then kmap if
-- 
cgit v1.2.3


From bb3c579e25e5757bc5bac1333f4a56dfebf7cb91 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 15 Dec 2020 23:11:07 -0500
Subject: mm/filemap: Add filemap_alloc_folio

Reimplement __page_cache_alloc as a wrapper around filemap_alloc_folio
to allow filesystems to be converted at our leisure.  Increases
kernel text size by 133 bytes, mostly in cachefiles_read_backing_file().
pagecache_get_page() shrinks by 32 bytes, though.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h | 11 ++++++++---
 mm/filemap.c            | 14 +++++++-------
 2 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index fea9615bab35..51c190ae3b0b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -262,14 +262,19 @@ static inline void *detach_page_private(struct page *page)
 }
 
 #ifdef CONFIG_NUMA
-extern struct page *__page_cache_alloc(gfp_t gfp);
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 #else
-static inline struct page *__page_cache_alloc(gfp_t gfp)
+static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
 {
-	return alloc_pages(gfp, 0);
+	return folio_alloc(gfp, order);
 }
 #endif
 
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+	return &filemap_alloc_folio(gfp, 0)->page;
+}
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
 	return __page_cache_alloc(mapping_gfp_mask(x));
diff --git a/mm/filemap.c b/mm/filemap.c
index c4a7f3fdca12..6976fb4af390 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1006,24 +1006,24 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
 
 #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
 {
 	int n;
-	struct page *page;
+	struct folio *folio;
 
 	if (cpuset_do_page_mem_spread()) {
 		unsigned int cpuset_mems_cookie;
 		do {
 			cpuset_mems_cookie = read_mems_allowed_begin();
 			n = cpuset_mem_spread_node();
-			page = __alloc_pages_node(n, gfp, 0);
-		} while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
+			folio = __folio_alloc_node(gfp, order, n);
+		} while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));
 
-		return page;
+		return folio;
 	}
-	return alloc_pages(gfp, 0);
+	return folio_alloc(gfp, order);
 }
-EXPORT_SYMBOL(__page_cache_alloc);
+EXPORT_SYMBOL(filemap_alloc_folio);
 #endif
 
 /*
-- 
cgit v1.2.3


From 9dd3d069406cea073fc633e77bc59abbfde8c6c4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 8 Dec 2020 08:56:28 -0500
Subject: mm/filemap: Add filemap_add_folio()

Convert __add_to_page_cache_locked() into __filemap_add_folio().
Add an assertion to it that (for !hugetlbfs), the folio is naturally
aligned within the file.  Move the prototype from mm.h to pagemap.h.
Convert add_to_page_cache_lru() into filemap_add_folio().  Add a
compatibility wrapper for unconverted callers.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/mm.h      |  7 -----
 include/linux/pagemap.h | 10 +++++--
 kernel/bpf/verifier.c   |  2 +-
 mm/filemap.c            | 70 ++++++++++++++++++++++++-------------------------
 mm/folio-compat.c       |  7 +++++
 5 files changed, 50 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 23ab040aa65a..efcb06fc6116 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -213,13 +213,6 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
 int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
-/*
- * Any attempt to mark this function as static leads to build failure
- * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
- * is referred to by BPF code. This must be visible for error injection.
- */
-int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-		pgoff_t index, gfp_t gfp, void **shadowp);
 
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 51c190ae3b0b..a10eb6dc3866 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -876,9 +876,11 @@ static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
 }
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
+		pgoff_t index, gfp_t gfp);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				pgoff_t index, gfp_t gfp_mask);
+		pgoff_t index, gfp_t gfp);
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+		pgoff_t index, gfp_t gfp);
 extern void delete_from_page_cache(struct page *page);
 extern void __delete_from_page_cache(struct page *page, void *shadow);
 void replace_page_cache_page(struct page *old, struct page *new);
@@ -903,6 +905,10 @@ static inline int add_to_page_cache(struct page *page,
 	return error;
 }
 
+/* Must be non-static for BPF error injection */
+int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
+		pgoff_t index, gfp_t gfp, void **shadowp);
+
 /**
  * struct readahead_control - Describes a readahead request.
  *
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e76b55917905..de006552be8a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13319,7 +13319,7 @@ BTF_SET_START(btf_non_sleepable_error_inject)
 /* Three functions below can be called from sleepable and non-sleepable context.
  * Assume non-sleepable from bpf safety point of view.
  */
-BTF_ID(func, __add_to_page_cache_locked)
+BTF_ID(func, __filemap_add_folio)
 BTF_ID(func, should_fail_alloc_page)
 BTF_ID(func, should_failslab)
 BTF_SET_END(btf_non_sleepable_error_inject)
diff --git a/mm/filemap.c b/mm/filemap.c
index 6976fb4af390..3902e7e8877d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -872,26 +872,25 @@ void replace_page_cache_page(struct page *old, struct page *new)
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
-noinline int __add_to_page_cache_locked(struct page *page,
-					struct address_space *mapping,
-					pgoff_t offset, gfp_t gfp,
-					void **shadowp)
+noinline int __filemap_add_folio(struct address_space *mapping,
+		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
 {
-	XA_STATE(xas, &mapping->i_pages, offset);
-	int huge = PageHuge(page);
+	XA_STATE(xas, &mapping->i_pages, index);
+	int huge = folio_test_hugetlb(folio);
 	int error;
 	bool charged = false;
 
-	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+	VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
 	mapping_set_update(&xas, mapping);
 
-	get_page(page);
-	page->mapping = mapping;
-	page->index = offset;
+	folio_get(folio);
+	folio->mapping = mapping;
+	folio->index = index;
 
 	if (!huge) {
-		error = mem_cgroup_charge(page_folio(page), NULL, gfp);
+		error = mem_cgroup_charge(folio, NULL, gfp);
+		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
 		if (error)
 			goto error;
 		charged = true;
@@ -903,7 +902,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
 		unsigned int order = xa_get_order(xas.xa, xas.xa_index);
 		void *entry, *old = NULL;
 
-		if (order > thp_order(page))
+		if (order > folio_order(folio))
 			xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
 					order, gfp);
 		xas_lock_irq(&xas);
@@ -920,13 +919,13 @@ noinline int __add_to_page_cache_locked(struct page *page,
 				*shadowp = old;
 			/* entry may have been split before we acquired lock */
 			order = xa_get_order(xas.xa, xas.xa_index);
-			if (order > thp_order(page)) {
+			if (order > folio_order(folio)) {
 				xas_split(&xas, old, order);
 				xas_reset(&xas);
 			}
 		}
 
-		xas_store(&xas, page);
+		xas_store(&xas, folio);
 		if (xas_error(&xas))
 			goto unlock;
 
@@ -934,7 +933,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
 
 		/* hugetlb pages do not participate in page cache accounting */
 		if (!huge)
-			__inc_lruvec_page_state(page, NR_FILE_PAGES);
+			__lruvec_stat_add_folio(folio, NR_FILE_PAGES);
 unlock:
 		xas_unlock_irq(&xas);
 	} while (xas_nomem(&xas, gfp));
@@ -942,19 +941,19 @@ unlock:
 	if (xas_error(&xas)) {
 		error = xas_error(&xas);
 		if (charged)
-			mem_cgroup_uncharge(page_folio(page));
+			mem_cgroup_uncharge(folio);
 		goto error;
 	}
 
-	trace_mm_filemap_add_to_page_cache(page);
+	trace_mm_filemap_add_to_page_cache(&folio->page);
 	return 0;
 error:
-	page->mapping = NULL;
+	folio->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
-	put_page(page);
+	folio_put(folio);
 	return error;
 }
-ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
+ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
 
 /**
  * add_to_page_cache_locked - add a locked page to the pagecache
@@ -971,39 +970,38 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		pgoff_t offset, gfp_t gfp_mask)
 {
-	return __add_to_page_cache_locked(page, mapping, offset,
+	return __filemap_add_folio(mapping, page_folio(page), offset,
 					  gfp_mask, NULL);
 }
 EXPORT_SYMBOL(add_to_page_cache_locked);
 
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-				pgoff_t offset, gfp_t gfp_mask)
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+				pgoff_t index, gfp_t gfp)
 {
 	void *shadow = NULL;
 	int ret;
 
-	__SetPageLocked(page);
-	ret = __add_to_page_cache_locked(page, mapping, offset,
-					 gfp_mask, &shadow);
+	__folio_set_locked(folio);
+	ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
 	if (unlikely(ret))
-		__ClearPageLocked(page);
+		__folio_clear_locked(folio);
 	else {
 		/*
-		 * The page might have been evicted from cache only
+		 * The folio might have been evicted from cache only
 		 * recently, in which case it should be activated like
-		 * any other repeatedly accessed page.
-		 * The exception is pages getting rewritten; evicting other
+		 * any other repeatedly accessed folio.
+		 * The exception is folios getting rewritten; evicting other
 		 * data from the working set, only to cache data that will
 		 * get overwritten with something else, is a waste of memory.
 		 */
-		WARN_ON_ONCE(PageActive(page));
-		if (!(gfp_mask & __GFP_WRITE) && shadow)
-			workingset_refault(page_folio(page), shadow);
-		lru_cache_add(page);
+		WARN_ON_ONCE(folio_test_active(folio));
+		if (!(gfp & __GFP_WRITE) && shadow)
+			workingset_refault(folio, shadow);
+		folio_add_lru(folio);
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
+EXPORT_SYMBOL_GPL(filemap_add_folio);
 
 #ifdef CONFIG_NUMA
 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 6de3cd78a4ae..6b19bc4ed6b0 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -108,3 +108,10 @@ void lru_cache_add(struct page *page)
 	folio_add_lru(page_folio(page));
 }
 EXPORT_SYMBOL(lru_cache_add);
+
+int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
+		pgoff_t index, gfp_t gfp)
+{
+	return filemap_add_folio(mapping, page_folio(page), index, gfp);
+}
+EXPORT_SYMBOL(add_to_page_cache_lru);
-- 
cgit v1.2.3


From 3f0c6a07fee6a1c2618a2e12ffb8e9aa24384fde Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Mon, 8 Mar 2021 11:45:35 -0500
Subject: mm/filemap: Add filemap_get_folio

filemap_get_folio() is a replacement for find_get_page().
Turn pagecache_get_page() into a wrapper around __filemap_get_folio().
Remove find_lock_head() as this use case is now covered by
filemap_get_folio().

Reduces overall kernel size by 209 bytes.  __filemap_get_folio() is
316 bytes shorter than pagecache_get_page() was, but the new
pagecache_get_page() wrapper is 99 bytes.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h | 41 +++++++++++-----------
 mm/filemap.c            | 92 +++++++++++++++++++++++--------------------------
 mm/folio-compat.c       | 12 +++++++
 3 files changed, 76 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a10eb6dc3866..401a90336a2c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -302,8 +302,26 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_HEAD		0x00000080
 #define FGP_ENTRY		0x00000100
 
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
-		int fgp_flags, gfp_t cache_gfp_mask);
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+		int fgp_flags, gfp_t gfp);
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
+		int fgp_flags, gfp_t gfp);
+
+/**
+ * filemap_get_folio - Find and get a folio.
+ * @mapping: The address_space to search.
+ * @index: The page index.
+ *
+ * Looks up the page cache entry at @mapping & @index.  If a folio is
+ * present, it is returned with an increased refcount.
+ *
+ * Otherwise, %NULL is returned.
+ */
+static inline struct folio *filemap_get_folio(struct address_space *mapping,
+					pgoff_t index)
+{
+	return __filemap_get_folio(mapping, index, 0, 0);
+}
 
 /**
  * find_get_page - find and get a page reference
@@ -346,25 +364,6 @@ static inline struct page *find_lock_page(struct address_space *mapping,
 	return pagecache_get_page(mapping, index, FGP_LOCK, 0);
 }
 
-/**
- * find_lock_head - Locate, pin and lock a pagecache page.
- * @mapping: The address_space to search.
- * @index: The page index.
- *
- * Looks up the page cache entry at @mapping & @index.  If there is a
- * page cache page, its head page is returned locked and with an increased
- * refcount.
- *
- * Context: May sleep.
- * Return: A struct page which is !PageTail, or %NULL if there is no page
- * in the cache for this index.
- */
-static inline struct page *find_lock_head(struct address_space *mapping,
-					pgoff_t index)
-{
-	return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
-}
-
 /**
  * find_or_create_page - locate or add a pagecache page
  * @mapping: the page's address_space
diff --git a/mm/filemap.c b/mm/filemap.c
index 5c99fd9a789d..de4cc1b9aa9b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1849,93 +1849,89 @@ out:
 }
 
 /**
- * pagecache_get_page - Find and get a reference to a page.
+ * __filemap_get_folio - Find and get a reference to a folio.
  * @mapping: The address_space to search.
  * @index: The page index.
- * @fgp_flags: %FGP flags modify how the page is returned.
- * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified.
+ * @fgp_flags: %FGP flags modify how the folio is returned.
+ * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
  *
  * Looks up the page cache entry at @mapping & @index.
  *
  * @fgp_flags can be zero or more of these flags:
  *
- * * %FGP_ACCESSED - The page will be marked accessed.
- * * %FGP_LOCK - The page is returned locked.
- * * %FGP_HEAD - If the page is present and a THP, return the head page
- *   rather than the exact page specified by the index.
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
  * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it
- *   instead of allocating a new page to replace it.
+ *   instead of allocating a new folio to replace it.
  * * %FGP_CREAT - If no page is present then a new page is allocated using
- *   @gfp_mask and added to the page cache and the VM's LRU list.
+ *   @gfp and added to the page cache and the VM's LRU list.
  *   The page is returned locked and with an increased refcount.
  * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
  *   page is already in cache.  If the page was allocated, unlock it before
  *   returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask
- * * %FGP_NOWAIT - Don't get blocked by page lock
+ * * %FGP_WRITE - The page will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't get blocked by page lock.
  *
  * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
  * if the %GFP flags specified for %FGP_CREAT are atomic.
  *
  * If there is a page cache page, it is returned with an increased refcount.
  *
- * Return: The found page or %NULL otherwise.
+ * Return: The found folio or %NULL otherwise.
  */
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-		int fgp_flags, gfp_t gfp_mask)
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+		int fgp_flags, gfp_t gfp)
 {
-	struct page *page;
+	struct folio *folio;
 
 repeat:
-	page = mapping_get_entry(mapping, index);
-	if (xa_is_value(page)) {
+	folio = mapping_get_entry(mapping, index);
+	if (xa_is_value(folio)) {
 		if (fgp_flags & FGP_ENTRY)
-			return page;
-		page = NULL;
+			return folio;
+		folio = NULL;
 	}
-	if (!page)
+	if (!folio)
 		goto no_page;
 
 	if (fgp_flags & FGP_LOCK) {
 		if (fgp_flags & FGP_NOWAIT) {
-			if (!trylock_page(page)) {
-				put_page(page);
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
 				return NULL;
 			}
 		} else {
-			lock_page(page);
+			folio_lock(folio);
 		}
 
 		/* Has the page been truncated? */
-		if (unlikely(page->mapping != mapping)) {
-			unlock_page(page);
-			put_page(page);
+		if (unlikely(folio->mapping != mapping)) {
+			folio_unlock(folio);
+			folio_put(folio);
 			goto repeat;
 		}
-		VM_BUG_ON_PAGE(!thp_contains(page, index), page);
+		VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
 	}
 
 	if (fgp_flags & FGP_ACCESSED)
-		mark_page_accessed(page);
+		folio_mark_accessed(folio);
 	else if (fgp_flags & FGP_WRITE) {
 		/* Clear idle flag for buffer write */
-		if (page_is_idle(page))
-			clear_page_idle(page);
+		if (folio_test_idle(folio))
+			folio_clear_idle(folio);
 	}
-	if (!(fgp_flags & FGP_HEAD))
-		page = find_subpage(page, index);
 
 no_page:
-	if (!page && (fgp_flags & FGP_CREAT)) {
+	if (!folio && (fgp_flags & FGP_CREAT)) {
 		int err;
 		if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
-			gfp_mask |= __GFP_WRITE;
+			gfp |= __GFP_WRITE;
 		if (fgp_flags & FGP_NOFS)
-			gfp_mask &= ~__GFP_FS;
+			gfp &= ~__GFP_FS;
 
-		page = __page_cache_alloc(gfp_mask);
-		if (!page)
+		folio = filemap_alloc_folio(gfp, 0);
+		if (!folio)
 			return NULL;
 
 		if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
@@ -1943,27 +1939,27 @@ no_page:
 
 		/* Init accessed so avoid atomic mark_page_accessed later */
 		if (fgp_flags & FGP_ACCESSED)
-			__SetPageReferenced(page);
+			__folio_set_referenced(folio);
 
-		err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+		err = filemap_add_folio(mapping, folio, index, gfp);
 		if (unlikely(err)) {
-			put_page(page);
-			page = NULL;
+			folio_put(folio);
+			folio = NULL;
 			if (err == -EEXIST)
 				goto repeat;
 		}
 
 		/*
-		 * add_to_page_cache_lru locks the page, and for mmap we expect
-		 * an unlocked page.
+		 * filemap_add_folio locks the page, and for mmap
+		 * we expect an unlocked page.
 		 */
-		if (page && (fgp_flags & FGP_FOR_MMAP))
-			unlock_page(page);
+		if (folio && (fgp_flags & FGP_FOR_MMAP))
+			folio_unlock(folio);
 	}
 
-	return page;
+	return folio;
 }
-EXPORT_SYMBOL(pagecache_get_page);
+EXPORT_SYMBOL(__filemap_get_folio);
 
 static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
 		xa_mark_t mark)
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 6b19bc4ed6b0..e833e680e944 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -115,3 +115,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 	return filemap_add_folio(mapping, page_folio(page), index, gfp);
 }
 EXPORT_SYMBOL(add_to_page_cache_lru);
+
+struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
+		int fgp_flags, gfp_t gfp)
+{
+	struct folio *folio;
+
+	folio = __filemap_get_folio(mapping, index, fgp_flags, gfp);
+	if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio))
+		return &folio->page;
+	return folio_file_page(folio, index);
+}
+EXPORT_SYMBOL(pagecache_get_page);
-- 
cgit v1.2.3


From b27652d935f41793c5e229a1e8b3a8bb3afe3cc1 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 24 Dec 2020 12:55:56 -0500
Subject: mm/filemap: Add FGP_STABLE

Allow filemap_get_folio() to wait for writeback to complete (if the
filesystem wants that behaviour).  This is the folio equivalent of
grab_cache_page_write_begin(), which is moved into the folio-compat
file as a reminder to migrate all the code using it.  This paves the
way for getting rid of AOP_FLAG_NOFS once grab_cache_page_write_begin()
is removed.

Kernel grows by 11 bytes.  filemap_get_folio() grows by 33 bytes but
grab_cache_page_write_begin() shrinks by 22 bytes to make up for it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
---
 include/linux/pagemap.h |  1 +
 mm/filemap.c            | 25 +++----------------------
 mm/folio-compat.c       | 13 +++++++++++++
 3 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 401a90336a2c..b68b053e581f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -301,6 +301,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_FOR_MMAP		0x00000040
 #define FGP_HEAD		0x00000080
 #define FGP_ENTRY		0x00000100
+#define FGP_STABLE		0x00000200
 
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		int fgp_flags, gfp_t gfp);
diff --git a/mm/filemap.c b/mm/filemap.c
index de4cc1b9aa9b..3e9feb5cc570 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1872,6 +1872,7 @@ out:
  * * %FGP_WRITE - The page will be written to by the caller.
  * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
  * * %FGP_NOWAIT - Don't get blocked by page lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
  *
  * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
  * if the %GFP flags specified for %FGP_CREAT are atomic.
@@ -1922,6 +1923,8 @@ repeat:
 			folio_clear_idle(folio);
 	}
 
+	if (fgp_flags & FGP_STABLE)
+		folio_wait_stable(folio);
 no_page:
 	if (!folio && (fgp_flags & FGP_CREAT)) {
 		int err;
@@ -3704,28 +3707,6 @@ out:
 }
 EXPORT_SYMBOL(generic_file_direct_write);
 
-/*
- * Find or create a page at the given pagecache position. Return the locked
- * page. This function is specifically for buffered writes.
- */
-struct page *grab_cache_page_write_begin(struct address_space *mapping,
-					pgoff_t index, unsigned flags)
-{
-	struct page *page;
-	int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
-
-	if (flags & AOP_FLAG_NOFS)
-		fgp_flags |= FGP_NOFS;
-
-	page = pagecache_get_page(mapping, index, fgp_flags,
-			mapping_gfp_mask(mapping));
-	if (page)
-		wait_for_stable_page(page);
-
-	return page;
-}
-EXPORT_SYMBOL(grab_cache_page_write_begin);
-
 ssize_t generic_perform_write(struct file *file,
 				struct iov_iter *i, loff_t pos)
 {
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index e833e680e944..5b6ae1da314e 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -116,6 +116,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 }
 EXPORT_SYMBOL(add_to_page_cache_lru);
 
+noinline
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 		int fgp_flags, gfp_t gfp)
 {
@@ -127,3 +128,15 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 	return folio_file_page(folio, index);
 }
 EXPORT_SYMBOL(pagecache_get_page);
+
+struct page *grab_cache_page_write_begin(struct address_space *mapping,
+					pgoff_t index, unsigned flags)
+{
+	unsigned fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+
+	if (flags & AOP_FLAG_NOFS)
+		fgp_flags |= FGP_NOFS;
+	return pagecache_get_page(mapping, index, fgp_flags,
+			mapping_gfp_mask(mapping));
+}
+EXPORT_SYMBOL(grab_cache_page_write_begin);
-- 
cgit v1.2.3


From 121703c1c817b3c77f61002466d0bfca7e39f25d Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 9 Mar 2021 13:48:03 -0500
Subject: mm/writeback: Add folio_write_one

Transform write_one_page() into folio_write_one() and add a compatibility
wrapper.  Also move the declaration to pagemap.h as this is page cache
functionality that doesn't need to be used by the rest of the kernel.

Saves 58 bytes of kernel text.  While folio_write_one() is 101 bytes
smaller than write_one_page(), the inlined call to page_folio() expands
each caller.  There are fewer than ten callers so it doesn't seem worth
putting a wrapper in the core.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Howells <dhowells@redhat.com>
---
 include/linux/mm.h      |  4 ----
 include/linux/pagemap.h |  5 +++++
 mm/page-writeback.c     | 30 +++++++++++++++---------------
 3 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index efcb06fc6116..40ff114aaf9e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2788,10 +2788,6 @@ extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
 		pgoff_t start_pgoff, pgoff_t end_pgoff);
 extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
 
-/* mm/page-writeback.c */
-int __must_check write_one_page(struct page *page);
-void task_dirty_inc(struct task_struct *tsk);
-
 extern unsigned long stack_guard_gap;
 /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b68b053e581f..013cdc90f5fd 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -803,6 +803,11 @@ static inline void cancel_dirty_page(struct page *page)
 }
 bool folio_clear_dirty_for_io(struct folio *folio);
 bool clear_page_dirty_for_io(struct page *page);
+int __must_check folio_write_one(struct folio *folio);
+static inline int __must_check write_one_page(struct page *page)
+{
+	return folio_write_one(page_folio(page));
+}
 
 int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a0c35a9d8029..9c64490171e0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2381,44 +2381,44 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 }
 
 /**
- * write_one_page - write out a single page and wait on I/O
- * @page: the page to write
+ * folio_write_one - write out a single folio and wait on I/O.
+ * @folio: The folio to write.
  *
- * The page must be locked by the caller and will be unlocked upon return.
+ * The folio must be locked by the caller and will be unlocked upon return.
  *
  * Note that the mapping's AS_EIO/AS_ENOSPC flags will be cleared when this
  * function returns.
  *
  * Return: %0 on success, negative error code otherwise
  */
-int write_one_page(struct page *page)
+int folio_write_one(struct folio *folio)
 {
-	struct address_space *mapping = page->mapping;
+	struct address_space *mapping = folio->mapping;
 	int ret = 0;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 1,
+		.nr_to_write = folio_nr_pages(folio),
 	};
 
-	BUG_ON(!PageLocked(page));
+	BUG_ON(!folio_test_locked(folio));
 
-	wait_on_page_writeback(page);
+	folio_wait_writeback(folio);
 
-	if (clear_page_dirty_for_io(page)) {
-		get_page(page);
-		ret = mapping->a_ops->writepage(page, &wbc);
+	if (folio_clear_dirty_for_io(folio)) {
+		folio_get(folio);
+		ret = mapping->a_ops->writepage(&folio->page, &wbc);
 		if (ret == 0)
-			wait_on_page_writeback(page);
-		put_page(page);
+			folio_wait_writeback(folio);
+		folio_put(folio);
 	} else {
-		unlock_page(page);
+		folio_unlock(folio);
 	}
 
 	if (!ret)
 		ret = filemap_check_errors(mapping);
 	return ret;
 }
-EXPORT_SYMBOL(write_one_page);
+EXPORT_SYMBOL(folio_write_one);
 
 /*
  * For address_spaces which do not use buffers nor write back.
-- 
cgit v1.2.3


From f2efdb17928924c9c935c136dea764a081032006 Mon Sep 17 00:00:00 2001
From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
Date: Sat, 16 Oct 2021 10:49:06 +0200
Subject: u64_stats: Introduce u64_stats_set()

Allow to directly set a u64_stats_t value which is used to provide an init
function which sets it directly to zero intead of memset() the value.

Add u64_stats_set() to the u64_stats API.

[bigeasy: commit message. ]

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/u64_stats_sync.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h
index e81856c0ba13..e8ec116c916b 100644
--- a/include/linux/u64_stats_sync.h
+++ b/include/linux/u64_stats_sync.h
@@ -83,6 +83,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
 	return local64_read(&p->v);
 }
 
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+	local64_set(&p->v, val);
+}
+
 static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
 {
 	local64_add(val, &p->v);
@@ -104,6 +109,11 @@ static inline u64 u64_stats_read(const u64_stats_t *p)
 	return p->v;
 }
 
+static inline void u64_stats_set(u64_stats_t *p, u64 val)
+{
+	p->v = val;
+}
+
 static inline void u64_stats_add(u64_stats_t *p, unsigned long val)
 {
 	p->v += val;
-- 
cgit v1.2.3


From 29cbcd85828372333aa87542c51f2b2b0fd4380c Mon Sep 17 00:00:00 2001
From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
Date: Sat, 16 Oct 2021 10:49:10 +0200
Subject: net: sched: Remove Qdisc::running sequence counter

The Qdisc::running sequence counter has two uses:

  1. Reliably reading qdisc's tc statistics while the qdisc is running
     (a seqcount read/retry loop at gnet_stats_add_basic()).

  2. As a flag, indicating whether the qdisc in question is running
     (without any retry loops).

For the first usage, the Qdisc::running sequence counter write section,
qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
is actually needed: the raw qdisc's bstats update. A u64_stats sync
point was thus introduced (in previous commits) inside the bstats
structure itself. A local u64_stats write section is then started and
stopped for the bstats updates.

Use that u64_stats sync point mechanism for the bstats read/retry loop
at gnet_stats_add_basic().

For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
accessed with atomic bitops, is sufficient. Using a bit flag instead of
a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
to the SMP barriers implicitly added through raw_read_seqcount() and
write_seqcount_begin/end() getting removed. All call sites have been
surveyed though, and no required ordering was identified.

Now that the qdisc->running sequence counter is no longer used, remove
it.

Note, using u64_stats implies no sequence counter protection for 64-bit
architectures. This can lead to the qdisc tc statistics "packets" vs.
"bytes" values getting out of sync on rare occasions. The individual
values will still be valid.

Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ----
 include/net/gen_stats.h   | 19 ++++++++----------
 include/net/sch_generic.h | 33 +++++++++++++------------------
 net/core/gen_estimator.c  | 16 +++++++++------
 net/core/gen_stats.c      | 50 ++++++++++++++++++++++++++---------------------
 net/sched/act_api.c       |  9 +++++----
 net/sched/act_police.c    |  2 +-
 net/sched/sch_api.c       | 16 +++------------
 net/sched/sch_atm.c       |  3 +--
 net/sched/sch_cbq.c       |  9 +++------
 net/sched/sch_drr.c       | 10 +++-------
 net/sched/sch_ets.c       |  3 +--
 net/sched/sch_generic.c   | 10 ++--------
 net/sched/sch_hfsc.c      |  8 +++-----
 net/sched/sch_htb.c       |  7 +++----
 net/sched/sch_mq.c        |  7 +++----
 net/sched/sch_mqprio.c    | 14 ++++++-------
 net/sched/sch_multiq.c    |  3 +--
 net/sched/sch_prio.c      |  4 ++--
 net/sched/sch_qfq.c       |  7 +++----
 net/sched/sch_taprio.c    |  2 +-
 21 files changed, 102 insertions(+), 134 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 173984414f38..f9cd6fea213f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
  *	@sfp_bus:	attached &struct sfp_bus structure.
  *
  *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
- *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
@@ -2250,7 +2249,6 @@ struct net_device {
 	struct phy_device	*phydev;
 	struct sfp_bus		*sfp_bus;
 	struct lock_class_key	*qdisc_tx_busylock;
-	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
 	unsigned		wol_enabled:1;
 	unsigned		threaded:1;
@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 #define netdev_lockdep_set_classes(dev)				\
 {								\
 	static struct lock_class_key qdisc_tx_busylock_key;	\
-	static struct lock_class_key qdisc_running_key;		\
 	static struct lock_class_key qdisc_xmit_lock_key;	\
 	static struct lock_class_key dev_addr_list_lock_key;	\
 	unsigned int i;						\
 								\
 	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
-	(dev)->qdisc_running_key = &qdisc_running_key;		\
 	lockdep_set_class(&(dev)->addr_list_lock,		\
 			  &dev_addr_list_lock_key);		\
 	for (i = 0; i < (dev)->num_tx_queues; i++)		\
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index 52b87588f467..7aa2b8e1fb29 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 				 spinlock_t *lock, struct gnet_dump *d,
 				 int padattr);
 
-int gnet_stats_copy_basic(const seqcount_t *running,
-			  struct gnet_dump *d,
+int gnet_stats_copy_basic(struct gnet_dump *d,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b);
-void gnet_stats_add_basic(const seqcount_t *running,
-			  struct gnet_stats_basic_sync *bstats,
+			  struct gnet_stats_basic_sync *b, bool running);
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b);
-int gnet_stats_copy_basic_hw(const seqcount_t *running,
-			     struct gnet_dump *d,
+			  struct gnet_stats_basic_sync *b, bool running);
+int gnet_stats_copy_basic_hw(struct gnet_dump *d,
 			     struct gnet_stats_basic_sync __percpu *cpu,
-			     struct gnet_stats_basic_sync *b);
+			     struct gnet_stats_basic_sync *b, bool running);
 int gnet_stats_copy_rate_est(struct gnet_dump *d,
 			     struct net_rate_estimator __rcu **ptr);
 int gnet_stats_copy_queue(struct gnet_dump *d,
@@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
 		      spinlock_t *lock,
-		      seqcount_t *running, struct nlattr *opt);
+		      bool running, struct nlattr *opt);
 void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
 int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **ptr,
 			  spinlock_t *lock,
-			  seqcount_t *running, struct nlattr *opt);
+			  bool running, struct nlattr *opt);
 bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
 bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
 			struct gnet_stats_rate_est64 *sample);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7882e3aa6448..baad2ab4d971 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -38,6 +38,10 @@ enum qdisc_state_t {
 	__QDISC_STATE_DEACTIVATED,
 	__QDISC_STATE_MISSED,
 	__QDISC_STATE_DRAINING,
+	/* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
+	 * Use qdisc_run_begin/end() or qdisc_is_running() instead.
+	 */
+	__QDISC_STATE_RUNNING,
 };
 
 #define QDISC_STATE_MISSED	BIT(__QDISC_STATE_MISSED)
@@ -108,7 +112,6 @@ struct Qdisc {
 	struct sk_buff_head	gso_skb ____cacheline_aligned_in_smp;
 	struct qdisc_skb_head	q;
 	struct gnet_stats_basic_sync bstats;
-	seqcount_t		running;
 	struct gnet_stats_queue	qstats;
 	unsigned long		state;
 	struct Qdisc            *next_sched;
@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
 	return NULL;
 }
 
+/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
+ * root_lock section, or provide their own memory barriers -- ordering
+ * against qdisc_run_begin/end() atomic bit operations.
+ */
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK)
 		return spin_is_locked(&qdisc->seqlock);
-	return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
+	return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
 }
 
 static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
 	return !READ_ONCE(qdisc->q.qlen);
 }
 
+/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
+ * the qdisc root lock acquired.
+ */
 static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK) {
@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
 		 * after it releases the lock at the end of qdisc_run_end().
 		 */
 		return spin_trylock(&qdisc->seqlock);
-	} else if (qdisc_is_running(qdisc)) {
-		return false;
 	}
-	/* Variant of write_seqcount_begin() telling lockdep a trylock
-	 * was attempted.
-	 */
-	raw_write_seqcount_begin(&qdisc->running);
-	seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
-	return true;
+	return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
 }
 
 static inline void qdisc_run_end(struct Qdisc *qdisc)
@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct Qdisc *qdisc)
 				      &qdisc->state)))
 			__netif_schedule(qdisc);
 	} else {
-		write_seqcount_end(&qdisc->running);
+		clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
 	}
 }
 
@@ -592,14 +595,6 @@ static inline spinlock_t *qdisc_root_sleeping_lock(const struct Qdisc *qdisc)
 	return qdisc_lock(root);
 }
 
-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
-{
-	struct Qdisc *root = qdisc_root_sleeping(qdisc);
-
-	ASSERT_RTNL();
-	return &root->running;
-}
-
 static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
 {
 	return qdisc->dev_queue->dev;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index a73ad0bf324c..4fcbdd71c59f 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -42,7 +42,7 @@
 struct net_rate_estimator {
 	struct gnet_stats_basic_sync	*bstats;
 	spinlock_t		*stats_lock;
-	seqcount_t		*running;
+	bool			running;
 	struct gnet_stats_basic_sync __percpu *cpu_bstats;
 	u8			ewma_log;
 	u8			intvl_log; /* period : (250ms << intvl_log) */
@@ -66,7 +66,7 @@ static void est_fetch_counters(struct net_rate_estimator *e,
 	if (e->stats_lock)
 		spin_lock(e->stats_lock);
 
-	gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats);
+	gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
 
 	if (e->stats_lock)
 		spin_unlock(e->stats_lock);
@@ -113,7 +113,9 @@ static void est_timer(struct timer_list *t)
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @lock: lock for statistics and control path
- * @running: qdisc running seqcount
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @bstats_cpu is NULL
  * @opt: rate estimator configuration TLV
  *
  * Creates a new rate estimator with &bstats as source and &rate_est
@@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_basic_sync *bstats,
 		      struct gnet_stats_basic_sync __percpu *cpu_bstats,
 		      struct net_rate_estimator __rcu **rate_est,
 		      spinlock_t *lock,
-		      seqcount_t *running,
+		      bool running,
 		      struct nlattr *opt)
 {
 	struct gnet_estimator *parm = nla_data(opt);
@@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
  * @cpu_bstats: bstats per cpu
  * @rate_est: rate estimator statistics
  * @lock: lock for statistics and control path
- * @running: qdisc running seqcount (might be NULL)
+ * @running: true if @bstats represents a running qdisc, thus @bstats'
+ *           internal values might change during basic reads. Only used
+ *           if @cpu_bstats is NULL
  * @opt: rate estimator configuration TLV
  *
  * Replaces the configuration of a rate estimator by calling
@@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu_bstats,
 			  struct net_rate_estimator __rcu **rate_est,
 			  spinlock_t *lock,
-			  seqcount_t *running, struct nlattr *opt)
+			  bool running, struct nlattr *opt)
 {
 	return gen_new_estimator(bstats, cpu_bstats, rate_est,
 				 lock, running, opt);
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 5f57f761def6..5516ea0d5da0 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(struct gnet_stats_basic_sync *bstats,
 	_bstats_update(bstats, t_bytes, t_packets);
 }
 
-void gnet_stats_add_basic(const seqcount_t *running,
-			  struct gnet_stats_basic_sync *bstats,
+void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
 			  struct gnet_stats_basic_sync __percpu *cpu,
-			  struct gnet_stats_basic_sync *b)
+			  struct gnet_stats_basic_sync *b, bool running)
 {
-	unsigned int seq;
+	unsigned int start;
 	u64 bytes = 0;
 	u64 packets = 0;
 
+	WARN_ON_ONCE((cpu || running) && !in_task());
+
 	if (cpu) {
 		gnet_stats_add_basic_cpu(bstats, cpu);
 		return;
 	}
 	do {
 		if (running)
-			seq = read_seqcount_begin(running);
+			start = u64_stats_fetch_begin_irq(&b->syncp);
 		bytes = u64_stats_read(&b->bytes);
 		packets = u64_stats_read(&b->packets);
-	} while (running && read_seqcount_retry(running, seq));
+	} while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
 
 	_bstats_update(bstats, bytes, packets);
 }
 EXPORT_SYMBOL(gnet_stats_add_basic);
 
 static int
-___gnet_stats_copy_basic(const seqcount_t *running,
-			 struct gnet_dump *d,
+___gnet_stats_copy_basic(struct gnet_dump *d,
 			 struct gnet_stats_basic_sync __percpu *cpu,
 			 struct gnet_stats_basic_sync *b,
-			 int type)
+			 int type, bool running)
 {
 	struct gnet_stats_basic_sync bstats;
 	u64 bstats_bytes, bstats_packets;
 
 	gnet_stats_basic_sync_init(&bstats);
-	gnet_stats_add_basic(running, &bstats, cpu, b);
+	gnet_stats_add_basic(&bstats, cpu, b, running);
 
 	bstats_bytes = u64_stats_read(&bstats.bytes);
 	bstats_packets = u64_stats_read(&bstats.packets);
@@ -210,10 +210,14 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
 
 /**
  * gnet_stats_copy_basic - copy basic statistics into statistic TLV
- * @running: seqcount_t pointer
  * @d: dumping handle
  * @cpu: copy statistic per cpu
  * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
  *
  * Appends the basic statistics to the top level TLV created by
  * gnet_stats_start_copy().
@@ -222,22 +226,25 @@ ___gnet_stats_copy_basic(const seqcount_t *running,
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic(const seqcount_t *running,
-		      struct gnet_dump *d,
+gnet_stats_copy_basic(struct gnet_dump *d,
 		      struct gnet_stats_basic_sync __percpu *cpu,
-		      struct gnet_stats_basic_sync *b)
+		      struct gnet_stats_basic_sync *b,
+		      bool running)
 {
-	return ___gnet_stats_copy_basic(running, d, cpu, b,
-					TCA_STATS_BASIC);
+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
 }
 EXPORT_SYMBOL(gnet_stats_copy_basic);
 
 /**
  * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
- * @running: seqcount_t pointer
  * @d: dumping handle
  * @cpu: copy statistic per cpu
  * @b: basic statistics
+ * @running: true if @b represents a running qdisc, thus @b's
+ *           internal values might change during basic reads.
+ *           Only used if @cpu is NULL
+ *
+ * Context: task; must not be run from IRQ or BH contexts
  *
  * Appends the basic statistics to the top level TLV created by
  * gnet_stats_start_copy().
@@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic_hw(const seqcount_t *running,
-			 struct gnet_dump *d,
+gnet_stats_copy_basic_hw(struct gnet_dump *d,
 			 struct gnet_stats_basic_sync __percpu *cpu,
-			 struct gnet_stats_basic_sync *b)
+			 struct gnet_stats_basic_sync *b,
+			 bool running)
 {
-	return ___gnet_stats_copy_basic(running, d, cpu, b,
-					TCA_STATS_BASIC_HW);
+	return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
 }
 EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
 
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 585829ffa0c4..3258da3d5bed 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
 	if (est) {
 		err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
 					&p->tcfa_rate_est,
-					&p->tcfa_lock, NULL, est);
+					&p->tcfa_lock, false, est);
 		if (err)
 			goto err4;
 	}
@@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
 	if (err < 0)
 		goto errout;
 
-	if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
-	    gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
-				     &p->tcfa_bstats_hw) < 0 ||
+	if (gnet_stats_copy_basic(&d, p->cpu_bstats,
+				  &p->tcfa_bstats, false) < 0 ||
+	    gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
+				     &p->tcfa_bstats_hw, false) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, p->cpu_qstats,
 				  &p->tcfa_qstats,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index c9383805222d..9e77ba8401e5 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 					    police->common.cpu_bstats,
 					    &police->tcf_rate_est,
 					    &police->tcf_lock,
-					    NULL, est);
+					    false, est);
 		if (err)
 			goto failure;
 	} else if (tb[TCA_POLICE_AVRATE] &&
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 70f006cbf212..efcd0b5e9a32 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -943,8 +943,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
 		cpu_qstats = q->cpu_qstats;
 	}
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
-				  &d, cpu_bstats, &q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
 		goto nla_put_failure;
@@ -1265,26 +1264,17 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 		rcu_assign_pointer(sch->stab, stab);
 	}
 	if (tca[TCA_RATE]) {
-		seqcount_t *running;
-
 		err = -EOPNOTSUPP;
 		if (sch->flags & TCQ_F_MQROOT) {
 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
 			goto err_out4;
 		}
 
-		if (sch->parent != TC_H_ROOT &&
-		    !(sch->flags & TCQ_F_INGRESS) &&
-		    (!p || !(p->flags & TCQ_F_MQROOT)))
-			running = qdisc_root_sleeping_running(sch);
-		else
-			running = &sch->running;
-
 		err = gen_new_estimator(&sch->bstats,
 					sch->cpu_bstats,
 					&sch->rate_est,
 					NULL,
-					running,
+					true,
 					tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
@@ -1360,7 +1350,7 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
 				      sch->cpu_bstats,
 				      &sch->rate_est,
 				      NULL,
-				      qdisc_root_sleeping_running(sch),
+				      true,
 				      tca[TCA_RATE]);
 	}
 out:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fbfe4ce9497b..4c8e994cf0a5 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -653,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct atm_flow_data *flow = (struct atm_flow_data *)arg;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &flow->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
 		return -1;
 
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index f0b1282fae11..02d9f0dfe356 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1383,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
@@ -1518,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err) {
 				NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
-					qdisc_root_sleeping_running(sch),
-					tca[TCA_RATE]);
+					NULL, true, tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
 			tcf_block_put(cl->block);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 7243617a3595..18e4f7a0b291 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		if (tca[TCA_RATE]) {
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
-						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    NULL, true,
 						    tca[TCA_RATE]);
 			if (err) {
 				NL_SET_ERR_MSG(extack, "Failed to replace estimator");
@@ -119,9 +118,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
-					    NULL,
-					    qdisc_root_sleeping_running(sch),
-					    tca[TCA_RATE]);
+					    NULL, true, tca[TCA_RATE]);
 		if (err) {
 			NL_SET_ERR_MSG(extack, "Failed to replace estimator");
 			qdisc_put(cl->qdisc);
@@ -268,8 +265,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (qlen)
 		xstats.deficit = cl->deficit;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
 		return -1;
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
index af56d155e7fc..0eae9ff5edf6 100644
--- a/net/sched/sch_ets.c
+++ b/net/sched/sch_ets.c
@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
 	struct ets_class *cl = ets_class_from_arg(sch, arg);
 	struct Qdisc *cl_q = cl->qdisc;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 989186e7f1a0..b0ff0dff2773 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -304,8 +304,8 @@ trace:
 
 /*
  * Transmit possibly several skbs, and handle the return status as
- * required. Owning running seqcount bit guarantees that
- * only one CPU can execute this function.
+ * required. Owning qdisc running bit guarantees that only one CPU
+ * can execute this function.
  *
  * Returns to the caller:
  *				false  - hardware queue frozen backoff
@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
 	.ops		=	&noop_qdisc_ops,
 	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
 	.dev_queue	=	&noop_netdev_queue,
-	.running	=	SEQCNT_ZERO(noop_qdisc.running),
 	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
 	.gso_skb = {
 		.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 EXPORT_SYMBOL(pfifo_fast_ops);
 
 static struct lock_class_key qdisc_tx_busylock;
-static struct lock_class_key qdisc_running_key;
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops,
@@ -917,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	lockdep_set_class(&sch->seqlock,
 			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
 
-	seqcount_init(&sch->running);
-	lockdep_set_class(&sch->running,
-			  dev->qdisc_running_key ?: &qdisc_running_key);
-
 	sch->ops = ops;
 	sch->flags = ops->static_flags;
 	sch->enqueue = ops->enqueue;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 181c2905ff98..d3979a6000e7 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 
 	if (tca[TCA_RATE]) {
 		err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
-					NULL,
-					qdisc_root_sleeping_running(sch),
-					tca[TCA_RATE]);
+					NULL, true, tca[TCA_RATE]);
 		if (err) {
 			tcf_block_put(cl->block);
 			kfree(cl);
@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.work    = cl->cl_total;
 	xstats.rtwork  = cl->cl_cumul;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index adceb9e210f6..cf1d45db4e84 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1368,8 +1368,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 		}
 	}
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
 		return -1;
@@ -1865,7 +1864,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			err = gen_new_estimator(&cl->bstats, NULL,
 						&cl->rate_est,
 						NULL,
-						qdisc_root_sleeping_running(sch),
+						true,
 						tca[TCA_RATE] ? : &est.nla);
 			if (err)
 				goto err_block_put;
@@ -1991,7 +1990,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index cedd0b3ef9cf..83d2e54bf303 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -144,8 +144,8 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
 		spin_lock_bh(qdisc_lock(qdisc));
 
-		gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats);
+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, false);
 		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
 		sch->q.qlen += qdisc_qlen(qdisc);
@@ -231,8 +231,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
-				  &sch->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3f7f756f92ca..b29f3453c6ea 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -402,8 +402,8 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
 		spin_lock_bh(qdisc_lock(qdisc));
 
-		gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats,
-				     &qdisc->bstats);
+		gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
+				     &qdisc->bstats, false);
 		gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
 				     &qdisc->qstats);
 		sch->q.qlen += qdisc_qlen(qdisc);
@@ -519,8 +519,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 			spin_lock_bh(qdisc_lock(qdisc));
 
-			gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats,
-					     &qdisc->bstats);
+			gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
+					     &qdisc->bstats, false);
 			gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
 					     &qdisc->qstats);
 			sch->q.qlen += qdisc_qlen(qdisc);
@@ -532,15 +532,15 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		/* Reclaim root sleeping lock before completing stats */
 		if (d->lock)
 			spin_lock_bh(d->lock);
-		if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
+		if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
 		    gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
 			return -1;
 	} else {
 		struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
 
 		sch = dev_queue->qdisc_sleeping;
-		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
-					  sch->cpu_bstats, &sch->bstats) < 0 ||
+		if (gnet_stats_copy_basic(d, sch->cpu_bstats,
+					  &sch->bstats, true) < 0 ||
 		    qdisc_qstats_copy(d, sch) < 0)
 			return -1;
 	}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index e282e7382117..cd8ab90c4765 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 03fdf31ccb6a..3b8d7197c06b 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct Qdisc *cl_q;
 
 	cl_q = q->queues[cl - 1];
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
+				  &cl_q->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index a35200f591a2..0b7f9ba28deb 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 			err = gen_replace_estimator(&cl->bstats, NULL,
 						    &cl->rate_est,
 						    NULL,
-						    qdisc_root_sleeping_running(sch),
+						    true,
 						    tca[TCA_RATE]);
 			if (err)
 				return err;
@@ -478,7 +478,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 		err = gen_new_estimator(&cl->bstats, NULL,
 					&cl->rate_est,
 					NULL,
-					qdisc_root_sleeping_running(sch),
+					true,
 					tca[TCA_RATE]);
 		if (err)
 			goto destroy_class;
@@ -640,8 +640,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	xstats.weight = cl->agg->class_weight;
 	xstats.lmax = cl->agg->lmax;
 
-	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
-				  d, NULL, &cl->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
 	    qdisc_qstats_copy(d, cl->qdisc) < 0)
 		return -1;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index b9fd18d98646..9ab068fa2672 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1977,7 +1977,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
 
 	sch = dev_queue->qdisc_sleeping;
-	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
+	if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
 	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
-- 
cgit v1.2.3


From 82a8daaecfd9382e9450a05f86be8a274cf69a27 Mon Sep 17 00:00:00 2001
From: Marc Bonnici <marc.bonnici@arm.com>
Date: Fri, 15 Oct 2021 17:57:42 +0100
Subject: firmware: arm_ffa: Add support for MEM_LEND

As part of the FF-A spec, an endpoint is allowed to transfer access of,
or lend, a memory region to one or more borrowers.

Extend the existing memory sharing implementation to support
FF-A MEM_LEND functionality and expose this to other kernel drivers.

Note that upon a successful MEM_LEND request the caller must ensure that
the memory region specified is not accessed until a successful
MEM_RECALIM call has been made. On systems with a hypervisor present
this will been enforced, however on systems without a hypervisor the
responsibility falls to the calling kernel driver to prevent access.

Link: https://lore.kernel.org/r/20211015165742.2513065-1-marc.bonnici@arm.com
Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
Signed-off-by: Marc Bonnici <marc.bonnici@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_ffa/driver.c | 17 +++++++++++++++++
 include/linux/arm_ffa.h           |  2 ++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
index 6e0c883ab708..12f4c87c4555 100644
--- a/drivers/firmware/arm_ffa/driver.c
+++ b/drivers/firmware/arm_ffa/driver.c
@@ -613,6 +613,22 @@ ffa_memory_share(struct ffa_device *dev, struct ffa_mem_ops_args *args)
 	return ffa_memory_ops(FFA_FN_NATIVE(MEM_SHARE), args);
 }
 
+static int
+ffa_memory_lend(struct ffa_device *dev, struct ffa_mem_ops_args *args)
+{
+	/* Note that upon a successful MEM_LEND request the caller
+	 * must ensure that the memory region specified is not accessed
+	 * until a successful MEM_RECALIM call has been made.
+	 * On systems with a hypervisor present this will been enforced,
+	 * however on systems without a hypervisor the responsibility
+	 * falls to the calling kernel driver to prevent access.
+	 */
+	if (dev->mode_32bit)
+		return ffa_memory_ops(FFA_MEM_LEND, args);
+
+	return ffa_memory_ops(FFA_FN_NATIVE(MEM_LEND), args);
+}
+
 static const struct ffa_dev_ops ffa_ops = {
 	.api_version_get = ffa_api_version_get,
 	.partition_info_get = ffa_partition_info_get,
@@ -620,6 +636,7 @@ static const struct ffa_dev_ops ffa_ops = {
 	.sync_send_receive = ffa_sync_send_receive,
 	.memory_reclaim = ffa_memory_reclaim,
 	.memory_share = ffa_memory_share,
+	.memory_lend = ffa_memory_lend,
 };
 
 const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev)
diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h
index 505c679b6a9b..85651e41ded8 100644
--- a/include/linux/arm_ffa.h
+++ b/include/linux/arm_ffa.h
@@ -262,6 +262,8 @@ struct ffa_dev_ops {
 	int (*memory_reclaim)(u64 g_handle, u32 flags);
 	int (*memory_share)(struct ffa_device *dev,
 			    struct ffa_mem_ops_args *args);
+	int (*memory_lend)(struct ffa_device *dev,
+			   struct ffa_mem_ops_args *args);
 };
 
 #endif /* _LINUX_ARM_FFA_H */
-- 
cgit v1.2.3


From 348332e000697b4ca82ef96719e02876434b8346 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:12 +0200
Subject: mm: don't include <linux/blk-cgroup.h> in <linux/writeback.h>

blk-cgroup.h pulls in blkdev.h and thus pretty much all the block
headers.  Break this dependency chain by turning wbc_blkcg_css into a
macro and dropping the blk-cgroup.h include.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/gpu/drm/i915/i915_utils.h |  1 +
 fs/btrfs/inode.c                  |  1 +
 fs/quota/quota.c                  |  1 +
 include/linux/writeback.h         | 14 +++++---------
 lib/random32.c                    |  1 +
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 5259edacde38..066a9118c374 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -30,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
+#include <linux/sched/clock.h>
 
 struct drm_i915_private;
 struct timer_list;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 487533c35ddb..4a9077c52444 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6,6 +6,7 @@
 #include <crypto/hash.h>
 #include <linux/kernel.h>
 #include <linux/bio.h>
+#include <linux/blk-cgroup.h>
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 2bcc9a6f1bfc..052f143e2e0e 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -10,6 +10,7 @@
 #include <linux/namei.h>
 #include <linux/slab.h>
 #include <asm/current.h>
+#include <linux/blkdev.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d1f65adf6a26..8eb165760752 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -11,7 +11,6 @@
 #include <linux/flex_proportions.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/blk_types.h>
-#include <linux/blk-cgroup.h>
 
 struct bio;
 
@@ -109,15 +108,12 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc)
 	return flags;
 }
 
-static inline struct cgroup_subsys_state *
-wbc_blkcg_css(struct writeback_control *wbc)
-{
 #ifdef CONFIG_CGROUP_WRITEBACK
-	if (wbc->wb)
-		return wbc->wb->blkcg_css;
-#endif
-	return blkcg_root_css;
-}
+#define wbc_blkcg_css(wbc) \
+	((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css)
+#else
+#define wbc_blkcg_css(wbc)		(blkcg_root_css)
+#endif /* CONFIG_CGROUP_WRITEBACK */
 
 /*
  * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
diff --git a/lib/random32.c b/lib/random32.c
index 4d0e05e471d7..a57a0e18819d 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -39,6 +39,7 @@
 #include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/bitops.h>
+#include <linux/slab.h>
 #include <asm/unaligned.h>
 #include <trace/events/random.h>
 
-- 
cgit v1.2.3


From e41d12f539f7c8bac9b0897031fee6cc9158a6be Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:13 +0200
Subject: mm: don't include <linux/blk-cgroup.h> in <linux/backing-dev.h>

There is no need to pull blk-cgroup.h and thus blkdev.h in here, so
break the include chain.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 1 +
 block/blk-iolatency.c                     | 1 +
 block/blk-mq.c                            | 1 +
 block/bounce.c                            | 1 +
 drivers/md/dm-ima.c                       | 1 +
 fs/btrfs/compression.c                    | 1 +
 fs/btrfs/ctree.c                          | 1 +
 fs/f2fs/compress.c                        | 1 +
 fs/orangefs/super.c                       | 1 +
 fs/ramfs/inode.c                          | 1 +
 include/linux/backing-dev.h               | 3 ++-
 mm/backing-dev.c                          | 3 ++-
 mm/swapfile.c                             | 2 +-
 13 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index bed05b644c2c..cb25acccd746 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -21,6 +21,7 @@
 #include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/poll.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 
 #include <asm/prom.h>
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index c0545f9da549..6593c7123b97 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -74,6 +74,7 @@
 #include <linux/sched/signal.h>
 #include <trace/events/block.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-cgroup.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 #include "blk.h"
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bc026372de43..0fefe00cebd4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/smp.h>
+#include <linux/interrupt.h>
 #include <linux/llist.h>
 #include <linux/list_sort.h>
 #include <linux/cpu.h>
diff --git a/block/bounce.c b/block/bounce.c
index 05fc7148489d..7af1a72835b9 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -14,6 +14,7 @@
 #include <linux/pagemap.h>
 #include <linux/mempool.h>
 #include <linux/blkdev.h>
+#include <linux/blk-cgroup.h>
 #include <linux/backing-dev.h>
 #include <linux/init.h>
 #include <linux/hash.h>
diff --git a/drivers/md/dm-ima.c b/drivers/md/dm-ima.c
index 2c5edfbd7711..957999998d70 100644
--- a/drivers/md/dm-ima.c
+++ b/drivers/md/dm-ima.c
@@ -12,6 +12,7 @@
 #include "dm-ima.h"
 
 #include <linux/ima.h>
+#include <linux/sched/mm.h>
 #include <crypto/hash.h>
 #include <linux/crypto.h>
 #include <crypto/hash_info.h>
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 7869ad12bc6e..ddc4f5436cc9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -9,6 +9,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
+#include <linux/kthread.h>
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 84627cbd5b5b..66290b214f2b 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/mm.h>
+#include <linux/error-injection.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index c1bf9ad4c220..20a083dc9042 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -7,6 +7,7 @@
 
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
+#include <linux/moduleparam.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/lzo.h>
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
index 2f2e430461b2..8bb0a53a658b 100644
--- a/fs/orangefs/super.c
+++ b/fs/orangefs/super.c
@@ -11,6 +11,7 @@
 
 #include <linux/parser.h>
 #include <linux/hashtable.h>
+#include <linux/seq_file.h>
 
 /* a cache for orangefs-inode objects (i.e. orangefs inode private data) */
 static struct kmem_cache *orangefs_inode_cache;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 65e7e56005b8..e2302342a67f 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -38,6 +38,7 @@
 #include <linux/uaccess.h>
 #include <linux/fs_context.h>
 #include <linux/fs_parser.h>
+#include <linux/seq_file.h>
 #include "internal.h"
 
 struct ramfs_mount_opts {
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ac7f231b8825..843bf4be675f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -15,10 +15,11 @@
 #include <linux/blkdev.h>
 #include <linux/device.h>
 #include <linux/writeback.h>
-#include <linux/blk-cgroup.h>
 #include <linux/backing-dev-defs.h>
 #include <linux/slab.h>
 
+struct blkcg;
+
 static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi)
 {
 	kref_get(&bdi->refcnt);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 4a9d4e27d0d9..5245744437dc 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -2,8 +2,9 @@
 
 #include <linux/wait.h>
 #include <linux/rbtree.h>
-#include <linux/backing-dev.h>
 #include <linux/kthread.h>
+#include <linux/backing-dev.h>
+#include <linux/blk-cgroup.h>
 #include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 22d10f713848..30db362749c0 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -18,7 +18,7 @@
 #include <linux/pagemap.h>
 #include <linux/namei.h>
 #include <linux/shmem_fs.h>
-#include <linux/blkdev.h>
+#include <linux/blk-cgroup.h>
 #include <linux/random.h>
 #include <linux/writeback.h>
 #include <linux/proc_fs.h>
-- 
cgit v1.2.3


From ccdf774189b6466457ca9c7de1fe9ed18547d249 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:14 +0200
Subject: mm: don't include <linux/blkdev.h> in <linux/backing-dev.h>

Move inode_to_bdi out of line to avoid having to include blkdev.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/mtdsuper.c      |  1 +
 fs/ntfs/file.c              |  1 +
 fs/ntfs3/file.c             |  1 +
 fs/orangefs/inode.c         |  2 +-
 include/linux/backing-dev.h | 16 +---------------
 mm/backing-dev.c            | 16 ++++++++++++++++
 6 files changed, 21 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c
index 38b6aa849c63..5ff001140ef4 100644
--- a/drivers/mtd/mtdsuper.c
+++ b/drivers/mtd/mtdsuper.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/major.h>
 #include <linux/backing-dev.h>
+#include <linux/blkdev.h>
 #include <linux/fs_context.h>
 #include "mtdcore.h"
 
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ab4f3362466d..373dbb627657 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2001-2015 Anton Altaparmakov and Tuxera Inc.
  */
 
+#include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/gfp.h>
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 43b1451bff53..a3cd3c3f091e 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -8,6 +8,7 @@
  */
 
 #include <linux/backing-dev.h>
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/compat.h>
 #include <linux/falloc.h>
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index c1bb4c4b5d67..e5e3e500ed46 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -10,7 +10,7 @@
  *  Linux VFS inode operations.
  */
 
-#include <linux/bvec.h>
+#include <linux/blkdev.h>
 #include <linux/fileattr.h>
 #include "protocol.h"
 #include "orangefs-kernel.h"
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 843bf4be675f..4ac7ce096013 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -12,7 +12,6 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
-#include <linux/blkdev.h>
 #include <linux/device.h>
 #include <linux/writeback.h>
 #include <linux/backing-dev-defs.h>
@@ -134,20 +133,7 @@ static inline bool writeback_in_progress(struct bdi_writeback *wb)
 	return test_bit(WB_writeback_running, &wb->state);
 }
 
-static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
-{
-	struct super_block *sb;
-
-	if (!inode)
-		return &noop_backing_dev_info;
-
-	sb = inode->i_sb;
-#ifdef CONFIG_BLOCK
-	if (sb_is_blkdev_sb(sb))
-		return I_BDEV(inode)->bd_disk->bdi;
-#endif
-	return sb->s_bdi;
-}
+struct backing_dev_info *inode_to_bdi(struct inode *inode);
 
 static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5245744437dc..c878d995af06 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -978,6 +978,22 @@ void bdi_put(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL(bdi_put);
 
+struct backing_dev_info *inode_to_bdi(struct inode *inode)
+{
+	struct super_block *sb;
+
+	if (!inode)
+		return &noop_backing_dev_info;
+
+	sb = inode->i_sb;
+#ifdef CONFIG_BLOCK
+	if (sb_is_blkdev_sb(sb))
+		return I_BDEV(inode)->bd_disk->bdi;
+#endif
+	return sb->s_bdi;
+}
+EXPORT_SYMBOL(inode_to_bdi);
+
 const char *bdi_dev_name(struct backing_dev_info *bdi)
 {
 	if (!bdi || !bdi->dev)
-- 
cgit v1.2.3


From 1d9433cdd04adf7cfd5e3ef81f5acb29d80aae9b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:19 +0200
Subject: block: remove the unused rq_end_sector macro

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-rq.c       | 1 -
 include/linux/elevator.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index a896dea9750e..579ab6183d4d 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -7,7 +7,6 @@
 #include "dm-core.h"
 #include "dm-rq.h"
 
-#include <linux/elevator.h> /* for rq_end_sector() */
 #include <linux/blk-mq.h>
 
 #define DM_MSG_PREFIX "core-rq"
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index ef9ceead3db1..80633f3b600c 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -162,7 +162,6 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t);
 #define ELEVATOR_INSERT_FLUSH	5
 #define ELEVATOR_INSERT_SORT_MERGE	6
 
-#define rq_end_sector(rq)	(blk_rq_pos(rq) + blk_rq_sectors(rq))
 #define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
 
 #define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
-- 
cgit v1.2.3


From 90138237a56282f99d71130ab7c68903a2eba5a7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:20 +0200
Subject: block: remove the unused blk_queue_state enum

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-10-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 12b9dbcc980e..9e367509bea1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -262,11 +262,6 @@ enum blk_eh_timer_return {
 	BLK_EH_RESET_TIMER,	/* reset timer and try again */
 };
 
-enum blk_queue_state {
-	Queue_down,
-	Queue_up,
-};
-
 #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
 #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
 
-- 
cgit v1.2.3


From 713e4e11088875bf7aee3df81b167c8b2f6c5d65 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:21 +0200
Subject: block: remove the cmd_size field from struct request_queue

Entirely unused.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9e367509bea1..6737e484280d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -544,8 +544,6 @@ struct request_queue {
 
 	bool			mq_sysfs_init_done;
 
-	size_t			cmd_size;
-
 #define BLK_MAX_WRITE_HINTS	5
 	u64			write_hints[BLK_MAX_WRITE_HINTS];
 };
-- 
cgit v1.2.3


From 9778ac77c2027827ffdbb33d3e936b3a0ae9f0f9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:22 +0200
Subject: block: remove the struct blk_queue_ctx forward declaration

This type doesn't exist at all, so no need to forward declare it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6737e484280d..4bcbb1ae2d66 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -253,8 +253,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
 
 #include <linux/elevator.h>
 
-struct blk_queue_ctx;
-
 struct bio_vec;
 
 enum blk_eh_timer_return {
-- 
cgit v1.2.3


From 2e9bc3465ac54d282b855b073409c2c3a7d1ae00 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:23 +0200
Subject: block: move elevator.h to block/

Except for the features passed to blk_queue_required_elevator_features,
elevator.h is only needed internally to the block layer.  Move the
ELEVATOR_F_* definitions to blkdev.h, and the move elevator.h to
block/, dropping all the spurious includes outside of that.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c       |   2 +-
 block/bfq-iosched.c      |   2 +-
 block/blk-mq-sched.h     |   1 +
 block/blk-mq-tag.h       |   2 +
 block/blk.h              |   2 +
 block/elevator.c         |   2 +-
 block/elevator.h         | 166 +++++++++++++++++++++++++++++++++++++++++++
 block/kyber-iosched.c    |   2 +-
 block/mq-deadline.c      |   2 +-
 drivers/block/amiflop.c  |   1 -
 drivers/scsi/lpfc/lpfc.h |   1 +
 include/linux/blkdev.h   |  11 ++-
 include/linux/elevator.h | 180 -----------------------------------------------
 init/main.c              |   1 -
 14 files changed, 186 insertions(+), 189 deletions(-)
 create mode 100644 block/elevator.h
 delete mode 100644 include/linux/elevator.h

(limited to 'include/linux')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 85b8e1c3a762..882ec4bc51ad 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -6,13 +6,13 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/cgroup.h>
-#include <linux/elevator.h>
 #include <linux/ktime.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/sbitmap.h>
 #include <linux/delay.h>
 
+#include "elevator.h"
 #include "bfq-iosched.h"
 
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 480e1a134859..b5ef23f63d51 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -117,7 +117,6 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/cgroup.h>
-#include <linux/elevator.h>
 #include <linux/ktime.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
@@ -127,6 +126,7 @@
 
 #include <trace/events/block.h>
 
+#include "elevator.h"
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5246ae040704..5181487db792 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -2,6 +2,7 @@
 #ifndef BLK_MQ_SCHED_H
 #define BLK_MQ_SCHED_H
 
+#include "elevator.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 8ed55af08427..f0a0ee758a55 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -2,6 +2,8 @@
 #ifndef INT_BLK_MQ_TAG_H
 #define INT_BLK_MQ_TAG_H
 
+struct blk_mq_alloc_data;
+
 /*
  * Tag address space map.
  */
diff --git a/block/blk.h b/block/blk.h
index 6c3c00a8fe19..f24569980905 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -12,6 +12,8 @@
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
 
+struct elevator_type;
+
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
diff --git a/block/elevator.c b/block/elevator.c
index ff45d8388f48..57be09cd7f6d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -26,7 +26,6 @@
 #include <linux/kernel.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
-#include <linux/elevator.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -40,6 +39,7 @@
 
 #include <trace/events/block.h>
 
+#include "elevator.h"
 #include "blk.h"
 #include "blk-mq-sched.h"
 #include "blk-pm.h"
diff --git a/block/elevator.h b/block/elevator.h
new file mode 100644
index 000000000000..16cd8bdedb7e
--- /dev/null
+++ b/block/elevator.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ELEVATOR_H
+#define _ELEVATOR_H
+
+#include <linux/percpu.h>
+#include <linux/hashtable.h>
+
+struct io_cq;
+struct elevator_type;
+struct blk_mq_debugfs_attr;
+
+/*
+ * Return values from elevator merger
+ */
+enum elv_merge {
+	ELEVATOR_NO_MERGE	= 0,
+	ELEVATOR_FRONT_MERGE	= 1,
+	ELEVATOR_BACK_MERGE	= 2,
+	ELEVATOR_DISCARD_MERGE	= 3,
+};
+
+struct blk_mq_alloc_data;
+struct blk_mq_hw_ctx;
+
+struct elevator_mq_ops {
+	int (*init_sched)(struct request_queue *, struct elevator_type *);
+	void (*exit_sched)(struct elevator_queue *);
+	int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+	void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
+	void (*depth_updated)(struct blk_mq_hw_ctx *);
+
+	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
+	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
+	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
+	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
+	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
+	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
+	void (*prepare_request)(struct request *);
+	void (*finish_request)(struct request *);
+	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
+	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
+	bool (*has_work)(struct blk_mq_hw_ctx *);
+	void (*completed_request)(struct request *, u64);
+	void (*requeue_request)(struct request *);
+	struct request *(*former_request)(struct request_queue *, struct request *);
+	struct request *(*next_request)(struct request_queue *, struct request *);
+	void (*init_icq)(struct io_cq *);
+	void (*exit_icq)(struct io_cq *);
+};
+
+#define ELV_NAME_MAX	(16)
+
+struct elv_fs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct elevator_queue *, char *);
+	ssize_t (*store)(struct elevator_queue *, const char *, size_t);
+};
+
+/*
+ * identifies an elevator type, such as AS or deadline
+ */
+struct elevator_type
+{
+	/* managed by elevator core */
+	struct kmem_cache *icq_cache;
+
+	/* fields provided by elevator implementation */
+	struct elevator_mq_ops ops;
+
+	size_t icq_size;	/* see iocontext.h */
+	size_t icq_align;	/* ditto */
+	struct elv_fs_entry *elevator_attrs;
+	const char *elevator_name;
+	const char *elevator_alias;
+	const unsigned int elevator_features;
+	struct module *elevator_owner;
+#ifdef CONFIG_BLK_DEBUG_FS
+	const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
+	const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
+#endif
+
+	/* managed by elevator core */
+	char icq_cache_name[ELV_NAME_MAX + 6];	/* elvname + "_io_cq" */
+	struct list_head list;
+};
+
+#define ELV_HASH_BITS 6
+
+void elv_rqhash_del(struct request_queue *q, struct request *rq);
+void elv_rqhash_add(struct request_queue *q, struct request *rq);
+void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
+struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
+
+/*
+ * each queue has an elevator_queue associated with it
+ */
+struct elevator_queue
+{
+	struct elevator_type *type;
+	void *elevator_data;
+	struct kobject kobj;
+	struct mutex sysfs_lock;
+	unsigned int registered:1;
+	DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
+};
+
+/*
+ * block elevator interface
+ */
+extern enum elv_merge elv_merge(struct request_queue *, struct request **,
+		struct bio *);
+extern void elv_merge_requests(struct request_queue *, struct request *,
+			       struct request *);
+extern void elv_merged_request(struct request_queue *, struct request *,
+		enum elv_merge);
+extern bool elv_attempt_insert_merge(struct request_queue *, struct request *,
+				     struct list_head *);
+extern struct request *elv_former_request(struct request_queue *, struct request *);
+extern struct request *elv_latter_request(struct request_queue *, struct request *);
+void elevator_init_mq(struct request_queue *q);
+
+/*
+ * io scheduler registration
+ */
+extern int elv_register(struct elevator_type *);
+extern void elv_unregister(struct elevator_type *);
+
+/*
+ * io scheduler sysfs switching
+ */
+extern ssize_t elv_iosched_show(struct request_queue *, char *);
+extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
+
+extern bool elv_bio_merge_ok(struct request *, struct bio *);
+extern struct elevator_queue *elevator_alloc(struct request_queue *,
+					struct elevator_type *);
+
+/*
+ * Helper functions.
+ */
+extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
+extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
+
+/*
+ * rb support functions.
+ */
+extern void elv_rb_add(struct rb_root *, struct request *);
+extern void elv_rb_del(struct rb_root *, struct request *);
+extern struct request *elv_rb_find(struct rb_root *, sector_t);
+
+/*
+ * Insertion selection
+ */
+#define ELEVATOR_INSERT_FRONT	1
+#define ELEVATOR_INSERT_BACK	2
+#define ELEVATOR_INSERT_SORT	3
+#define ELEVATOR_INSERT_REQUEUE	4
+#define ELEVATOR_INSERT_FLUSH	5
+#define ELEVATOR_INSERT_SORT_MERGE	6
+
+#define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
+
+#define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
+#define rq_fifo_clear(rq)	list_del_init(&(rq)->queuelist)
+
+#endif /* _ELEVATOR_H */
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index a0ffbabfac2c..53dafc4bcd72 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -9,12 +9,12 @@
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
-#include <linux/elevator.h>
 #include <linux/module.h>
 #include <linux/sbitmap.h>
 
 #include <trace/events/block.h>
 
+#include "elevator.h"
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 7f3c3932b723..47f042fa6a68 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -9,7 +9,6 @@
 #include <linux/fs.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
-#include <linux/elevator.h>
 #include <linux/bio.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -20,6 +19,7 @@
 
 #include <trace/events/block.h>
 
+#include "elevator.h"
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 8b1714021498..b892e5185d6f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -64,7 +64,6 @@
 #include <linux/mutex.h>
 #include <linux/fs.h>
 #include <linux/blk-mq.h>
-#include <linux/elevator.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index befeb7c34290..337e6ed24821 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -22,6 +22,7 @@
  *******************************************************************/
 
 #include <scsi/scsi_host.h>
+#include <linux/hashtable.h>
 #include <linux/ktime.h>
 #include <linux/workqueue.h>
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4bcbb1ae2d66..d815238f61ed 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -251,8 +251,6 @@ static inline unsigned short req_get_ioprio(struct request *req)
 	return req->ioprio;
 }
 
-#include <linux/elevator.h>
-
 struct bio_vec;
 
 enum blk_eh_timer_return {
@@ -1124,6 +1122,15 @@ extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
+
+/*
+ * Elevator features for blk_queue_required_elevator_features:
+ */
+/* Supports zoned block devices sequential write constraint */
+#define ELEVATOR_F_ZBD_SEQ_WRITE	(1U << 0)
+/* Supports scheduling on multiple hardware queues */
+#define ELEVATOR_F_MQ_AWARE		(1U << 1)
+
 extern void blk_queue_required_elevator_features(struct request_queue *q,
 						 unsigned int features);
 extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
deleted file mode 100644
index 80633f3b600c..000000000000
--- a/include/linux/elevator.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_ELEVATOR_H
-#define _LINUX_ELEVATOR_H
-
-#include <linux/percpu.h>
-#include <linux/hashtable.h>
-
-#ifdef CONFIG_BLOCK
-
-struct io_cq;
-struct elevator_type;
-#ifdef CONFIG_BLK_DEBUG_FS
-struct blk_mq_debugfs_attr;
-#endif
-
-/*
- * Return values from elevator merger
- */
-enum elv_merge {
-	ELEVATOR_NO_MERGE	= 0,
-	ELEVATOR_FRONT_MERGE	= 1,
-	ELEVATOR_BACK_MERGE	= 2,
-	ELEVATOR_DISCARD_MERGE	= 3,
-};
-
-struct blk_mq_alloc_data;
-struct blk_mq_hw_ctx;
-
-struct elevator_mq_ops {
-	int (*init_sched)(struct request_queue *, struct elevator_type *);
-	void (*exit_sched)(struct elevator_queue *);
-	int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
-	void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
-	void (*depth_updated)(struct blk_mq_hw_ctx *);
-
-	bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
-	bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
-	int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
-	void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
-	void (*requests_merged)(struct request_queue *, struct request *, struct request *);
-	void (*limit_depth)(unsigned int, struct blk_mq_alloc_data *);
-	void (*prepare_request)(struct request *);
-	void (*finish_request)(struct request *);
-	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
-	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
-	bool (*has_work)(struct blk_mq_hw_ctx *);
-	void (*completed_request)(struct request *, u64);
-	void (*requeue_request)(struct request *);
-	struct request *(*former_request)(struct request_queue *, struct request *);
-	struct request *(*next_request)(struct request_queue *, struct request *);
-	void (*init_icq)(struct io_cq *);
-	void (*exit_icq)(struct io_cq *);
-};
-
-#define ELV_NAME_MAX	(16)
-
-struct elv_fs_entry {
-	struct attribute attr;
-	ssize_t (*show)(struct elevator_queue *, char *);
-	ssize_t (*store)(struct elevator_queue *, const char *, size_t);
-};
-
-/*
- * identifies an elevator type, such as AS or deadline
- */
-struct elevator_type
-{
-	/* managed by elevator core */
-	struct kmem_cache *icq_cache;
-
-	/* fields provided by elevator implementation */
-	struct elevator_mq_ops ops;
-
-	size_t icq_size;	/* see iocontext.h */
-	size_t icq_align;	/* ditto */
-	struct elv_fs_entry *elevator_attrs;
-	const char *elevator_name;
-	const char *elevator_alias;
-	const unsigned int elevator_features;
-	struct module *elevator_owner;
-#ifdef CONFIG_BLK_DEBUG_FS
-	const struct blk_mq_debugfs_attr *queue_debugfs_attrs;
-	const struct blk_mq_debugfs_attr *hctx_debugfs_attrs;
-#endif
-
-	/* managed by elevator core */
-	char icq_cache_name[ELV_NAME_MAX + 6];	/* elvname + "_io_cq" */
-	struct list_head list;
-};
-
-#define ELV_HASH_BITS 6
-
-void elv_rqhash_del(struct request_queue *q, struct request *rq);
-void elv_rqhash_add(struct request_queue *q, struct request *rq);
-void elv_rqhash_reposition(struct request_queue *q, struct request *rq);
-struct request *elv_rqhash_find(struct request_queue *q, sector_t offset);
-
-/*
- * each queue has an elevator_queue associated with it
- */
-struct elevator_queue
-{
-	struct elevator_type *type;
-	void *elevator_data;
-	struct kobject kobj;
-	struct mutex sysfs_lock;
-	unsigned int registered:1;
-	DECLARE_HASHTABLE(hash, ELV_HASH_BITS);
-};
-
-/*
- * block elevator interface
- */
-extern enum elv_merge elv_merge(struct request_queue *, struct request **,
-		struct bio *);
-extern void elv_merge_requests(struct request_queue *, struct request *,
-			       struct request *);
-extern void elv_merged_request(struct request_queue *, struct request *,
-		enum elv_merge);
-extern bool elv_attempt_insert_merge(struct request_queue *, struct request *,
-				     struct list_head *);
-extern struct request *elv_former_request(struct request_queue *, struct request *);
-extern struct request *elv_latter_request(struct request_queue *, struct request *);
-void elevator_init_mq(struct request_queue *q);
-
-/*
- * io scheduler registration
- */
-extern int elv_register(struct elevator_type *);
-extern void elv_unregister(struct elevator_type *);
-
-/*
- * io scheduler sysfs switching
- */
-extern ssize_t elv_iosched_show(struct request_queue *, char *);
-extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
-
-extern bool elv_bio_merge_ok(struct request *, struct bio *);
-extern struct elevator_queue *elevator_alloc(struct request_queue *,
-					struct elevator_type *);
-
-/*
- * Helper functions.
- */
-extern struct request *elv_rb_former_request(struct request_queue *, struct request *);
-extern struct request *elv_rb_latter_request(struct request_queue *, struct request *);
-
-/*
- * rb support functions.
- */
-extern void elv_rb_add(struct rb_root *, struct request *);
-extern void elv_rb_del(struct rb_root *, struct request *);
-extern struct request *elv_rb_find(struct rb_root *, sector_t);
-
-/*
- * Insertion selection
- */
-#define ELEVATOR_INSERT_FRONT	1
-#define ELEVATOR_INSERT_BACK	2
-#define ELEVATOR_INSERT_SORT	3
-#define ELEVATOR_INSERT_REQUEUE	4
-#define ELEVATOR_INSERT_FLUSH	5
-#define ELEVATOR_INSERT_SORT_MERGE	6
-
-#define rb_entry_rq(node)	rb_entry((node), struct request, rb_node)
-
-#define rq_entry_fifo(ptr)	list_entry((ptr), struct request, queuelist)
-#define rq_fifo_clear(rq)	list_del_init(&(rq)->queuelist)
-
-/*
- * Elevator features.
- */
-
-/* Supports zoned block devices sequential write constraint */
-#define ELEVATOR_F_ZBD_SEQ_WRITE	(1U << 0)
-/* Supports scheduling on multiple hardware queues */
-#define ELEVATOR_F_MQ_AWARE		(1U << 1)
-
-#endif /* CONFIG_BLOCK */
-#endif
diff --git a/init/main.c b/init/main.c
index 3c4054a95545..4162d7f3179f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -83,7 +83,6 @@
 #include <linux/ptrace.h>
 #include <linux/pti.h>
 #include <linux/blkdev.h>
-#include <linux/elevator.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/task.h>
 #include <linux/sched/task_stack.h>
-- 
cgit v1.2.3


From 3ab0bc78e96bd03a5096e4801550926d81b3e19d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:24 +0200
Subject: block: drop unused includes in <linux/blkdev.h>

Drop various include not actually used in blkdev.h itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/dm-ps-historical-service-time.c | 1 +
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c     | 1 +
 include/linux/blkdev.h                     | 7 -------
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ps-historical-service-time.c b/drivers/md/dm-ps-historical-service-time.c
index 1856a1b125cc..875bca30a0dd 100644
--- a/drivers/md/dm-ps-historical-service-time.c
+++ b/drivers/md/dm-ps-historical-service-time.c
@@ -27,6 +27,7 @@
 #include <linux/blkdev.h>
 #include <linux/slab.h>
 #include <linux/module.h>
+#include <linux/sched/clock.h>
 
 
 #define DM_MSG_PREFIX	"multipath historical-service-time"
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 3ab669dc806f..27884f3106ab 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2017 Hisilicon Limited.
  */
 
+#include <linux/sched/clock.h>
 #include "hisi_sas.h"
 #define DRV_NAME "hisi_sas_v3_hw"
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d815238f61ed..46a703394f7f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -3,8 +3,6 @@
 #define _LINUX_BLKDEV_H
 
 #include <linux/sched.h>
-#include <linux/sched/clock.h>
-#include <linux/major.h>
 #include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/llist.h>
@@ -12,17 +10,12 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/wait.h>
-#include <linux/mempool.h>
-#include <linux/pfn.h>
 #include <linux/bio.h>
-#include <linux/stringify.h>
 #include <linux/gfp.h>
-#include <linux/smp.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
-#include <linux/pm.h>
 #include <linux/sbitmap.h>
 
 struct module;
-- 
cgit v1.2.3


From b81e0c2372e65e5627864ba034433b64b2fc73f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:25 +0200
Subject: block: drop unused includes in <linux/genhd.h>

Drop various include not actually used in genhd.h itself, and
move the remaning includes closer together.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c      |  1 +
 block/genhd.c                   |  1 +
 block/holder.c                  |  1 +
 block/partitions/core.c         |  1 +
 drivers/block/amiflop.c         |  1 +
 drivers/block/ataflop.c         |  1 +
 drivers/block/floppy.c          |  1 +
 drivers/block/swim.c            |  1 +
 drivers/block/xen-blkfront.c    |  1 +
 drivers/md/md.c                 |  1 +
 drivers/s390/block/dasd_genhd.c |  1 +
 drivers/scsi/sd.c               |  1 +
 drivers/scsi/sg.c               |  1 +
 drivers/scsi/sr.c               |  1 +
 drivers/scsi/st.c               |  1 +
 include/linux/genhd.h           | 14 ++------------
 include/linux/part_stat.h       |  1 +
 17 files changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index cd9dc0556e91..fefd343412c7 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -27,6 +27,7 @@
 #include <linux/blk-mq.h>
 #include <linux/ata.h>
 #include <linux/hdreg.h>
+#include <linux/major.h>
 #include <linux/cdrom.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/block/genhd.c b/block/genhd.c
index b49858550fa6..ffbdb9b24555 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -19,6 +19,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/kmod.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/idr.h>
 #include <linux/log2.h>
diff --git a/block/holder.c b/block/holder.c
index 9dc084182337..27cddce1b446 100644
--- a/block/holder.c
+++ b/block/holder.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/genhd.h>
+#include <linux/slab.h>
 
 struct bd_holder_disk {
 	struct list_head	list;
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 58c4c362c94f..3a4898433c43 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2020 Christoph Hellwig
  */
 #include <linux/fs.h>
+#include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
 #include <linux/genhd.h>
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index b892e5185d6f..2909fd9e72fb 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -61,6 +61,7 @@
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/fs.h>
 #include <linux/blk-mq.h>
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index a093644ac39f..58e921ab5729 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -68,6 +68,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/blk-mq.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/completion.h>
 #include <linux/wait.h>
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index fef79ea52e3e..6288ce888414 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -184,6 +184,7 @@ static int print_unex = 1;
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/major.h>
 #include <linux/platform_device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/mutex.h>
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 7ccc8d2a41bc..3911d0833e1b 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -16,6 +16,7 @@
 #include <linux/fd.h>
 #include <linux/slab.h>
 #include <linux/blk-mq.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/hdreg.h>
 #include <linux/kernel.h>
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 72902104f111..df0deb927760 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -42,6 +42,7 @@
 #include <linux/cdrom.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
 #include <linux/bitmap.h>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 6c0c3d0d905a..1b9bc8dbd95d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -51,6 +51,7 @@
 #include <linux/hdreg.h>
 #include <linux/proc_fs.h>
 #include <linux/random.h>
+#include <linux/major.h>
 #include <linux/module.h>
 #include <linux/reboot.h>
 #include <linux/file.h>
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index fa966e0db6ca..3a6f3af240fa 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -14,6 +14,7 @@
 #define KMSG_COMPONENT "dasd"
 
 #include <linux/interrupt.h>
+#include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/blkpg.h>
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 523bf2fdc253..d8f6add416c0 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -48,6 +48,7 @@
 #include <linux/blkpg.h>
 #include <linux/blk-pm.h>
 #include <linux/delay.h>
+#include <linux/major.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
 #include <linux/async.h>
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 8f05248920e8..3c98f08dc25d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -31,6 +31,7 @@ static int sg_version_num = 30536;	/* 2 digits for each component */
 #include <linux/errno.h>
 #include <linux/mtio.h>
 #include <linux/ioctl.h>
+#include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/fcntl.h>
 #include <linux/init.h>
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 8b17b35283aa..115f7ef7a5de 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -44,6 +44,7 @@
 #include <linux/cdrom.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
+#include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/blk-pm.h>
 #include <linux/mutex.h>
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index ae8636d3780b..9933722acfd9 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -32,6 +32,7 @@ static const char *verstr = "20160209";
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/mtio.h>
+#include <linux/major.h>
 #include <linux/cdrom.h>
 #include <linux/ioctl.h>
 #include <linux/fcntl.h>
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0f5315c2b5a3..0b48a0cf4262 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -12,12 +12,10 @@
 
 #include <linux/types.h>
 #include <linux/kdev_t.h>
-#include <linux/rcupdate.h>
-#include <linux/slab.h>
-#include <linux/percpu-refcount.h>
 #include <linux/uuid.h>
 #include <linux/blk_types.h>
-#include <asm/local.h>
+#include <linux/device.h>
+#include <linux/xarray.h>
 
 extern const struct device_type disk_type;
 extern struct device_type part_type;
@@ -26,14 +24,6 @@ extern struct class block_class;
 #define DISK_MAX_PARTS			256
 #define DISK_NAME_LEN			32
 
-#include <linux/major.h>
-#include <linux/device.h>
-#include <linux/smp.h>
-#include <linux/string.h>
-#include <linux/fs.h>
-#include <linux/workqueue.h>
-#include <linux/xarray.h>
-
 #define PARTITION_META_INFO_VOLNAMELTH	64
 /*
  * Enough for the string representation of any kind of UUID plus NULL.
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index d2558121d48c..6f7949b2fd8d 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -3,6 +3,7 @@
 #define _LINUX_PART_STAT_H
 
 #include <linux/genhd.h>
+#include <asm/local.h>
 
 struct disk_stats {
 	u64 nsecs[NR_STAT_GROUPS];
-- 
cgit v1.2.3


From badf7f64378796d460c79eb0f182fa7282eb65d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:26 +0200
Subject: block: move a few merge helpers out of <linux/blkdev.h>

These are block-layer internal helpers, so move them to block/blk.h and
block/blk-merge.c.  Also update a comment a bit to use better grammar.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-16-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      | 24 +++++++++++++++++++
 block/blk.h            | 38 ++++++++++++++++++++++++++++++
 include/linux/blkdev.h | 64 --------------------------------------------------
 3 files changed, 62 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7a5c81c02c80..39f210da399a 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -558,6 +558,23 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
 	return queue_max_segments(rq->q);
 }
 
+static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
+						  sector_t offset)
+{
+	struct request_queue *q = rq->q;
+
+	if (blk_rq_is_passthrough(rq))
+		return q->limits.max_hw_sectors;
+
+	if (!q->limits.chunk_sectors ||
+	    req_op(rq) == REQ_OP_DISCARD ||
+	    req_op(rq) == REQ_OP_SECURE_ERASE)
+		return blk_queue_get_max_sectors(q, req_op(rq));
+
+	return min(blk_max_size_offset(q, offset, 0),
+			blk_queue_get_max_sectors(q, req_op(rq)));
+}
+
 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
 		unsigned int nr_phys_segs)
 {
@@ -718,6 +735,13 @@ static enum elv_merge blk_try_req_merge(struct request *req,
 	return ELEVATOR_NO_MERGE;
 }
 
+static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
+{
+	if (bio_page(a) == bio_page(b) && bio_offset(a) == bio_offset(b))
+		return true;
+	return false;
+}
+
 /*
  * For non-mq, this has to be called with the request spinlock acquired.
  * For mq with scheduling, the appropriate queue wide lock should be held.
diff --git a/block/blk.h b/block/blk.h
index f24569980905..fa90691cfdab 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -96,6 +96,44 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
+static inline bool rq_mergeable(struct request *rq)
+{
+	if (blk_rq_is_passthrough(rq))
+		return false;
+
+	if (req_op(rq) == REQ_OP_FLUSH)
+		return false;
+
+	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
+		return false;
+
+	if (req_op(rq) == REQ_OP_ZONE_APPEND)
+		return false;
+
+	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
+		return false;
+	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
+		return false;
+
+	return true;
+}
+
+/*
+ * There are two different ways to handle DISCARD merges:
+ *  1) If max_discard_segments > 1, the driver treats every bio as a range and
+ *     send the bios to controller together. The ranges don't need to be
+ *     contiguous.
+ *  2) Otherwise, the request will be normal read/write requests.  The ranges
+ *     need to be contiguous.
+ */
+static inline bool blk_discard_mergable(struct request *req)
+{
+	if (req_op(req) == REQ_OP_DISCARD &&
+	    queue_max_discard_segments(req->q) > 1)
+		return true;
+	return false;
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
 bool __bio_integrity_endio(struct bio *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 46a703394f7f..be534040ca9c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -745,37 +745,6 @@ static inline bool rq_is_sync(struct request *rq)
 	return op_is_sync(rq->cmd_flags);
 }
 
-static inline bool rq_mergeable(struct request *rq)
-{
-	if (blk_rq_is_passthrough(rq))
-		return false;
-
-	if (req_op(rq) == REQ_OP_FLUSH)
-		return false;
-
-	if (req_op(rq) == REQ_OP_WRITE_ZEROES)
-		return false;
-
-	if (req_op(rq) == REQ_OP_ZONE_APPEND)
-		return false;
-
-	if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
-		return false;
-	if (rq->rq_flags & RQF_NOMERGE_FLAGS)
-		return false;
-
-	return true;
-}
-
-static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
-{
-	if (bio_page(a) == bio_page(b) &&
-	    bio_offset(a) == bio_offset(b))
-		return true;
-
-	return false;
-}
-
 static inline unsigned int blk_queue_depth(struct request_queue *q)
 {
 	if (q->queue_depth)
@@ -1030,23 +999,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 	return min(q->limits.max_sectors, chunk_sectors);
 }
 
-static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
-						  sector_t offset)
-{
-	struct request_queue *q = rq->q;
-
-	if (blk_rq_is_passthrough(rq))
-		return q->limits.max_hw_sectors;
-
-	if (!q->limits.chunk_sectors ||
-	    req_op(rq) == REQ_OP_DISCARD ||
-	    req_op(rq) == REQ_OP_SECURE_ERASE)
-		return blk_queue_get_max_sectors(q, req_op(rq));
-
-	return min(blk_max_size_offset(q, offset, 0),
-			blk_queue_get_max_sectors(q, req_op(rq)));
-}
-
 static inline unsigned int blk_rq_count_bios(struct request *rq)
 {
 	unsigned int nr_bios = 0;
@@ -1490,22 +1442,6 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector
 	return offset << SECTOR_SHIFT;
 }
 
-/*
- * Two cases of handling DISCARD merge:
- * If max_discard_segments > 1, the driver takes every bio
- * as a range and send them to controller together. The ranges
- * needn't to be contiguous.
- * Otherwise, the bios/requests will be handled as same as
- * others which should be contiguous.
- */
-static inline bool blk_discard_mergable(struct request *req)
-{
-	if (req_op(req) == REQ_OP_DISCARD &&
-	    queue_max_discard_segments(req->q) > 1)
-		return true;
-	return false;
-}
-
 static inline int bdev_discard_alignment(struct block_device *bdev)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-- 
cgit v1.2.3


From fe45e630a1035aea94c29016f2598bbde149bbe3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:27 +0200
Subject: block: move integrity handling out of <linux/blkdev.h>

Split the integrity/metadata handling definitions out into a new header.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-17-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c                        |   1 +
 block/bio-integrity.c               |   2 +-
 block/blk-core.c                    |   1 +
 block/blk-integrity.c               |   2 +-
 block/blk-merge.c                   |   1 +
 block/blk-mq.c                      |   1 +
 block/keyslot-manager.c             |   1 +
 block/t10-pi.c                      |   2 +-
 drivers/md/dm-bio-record.h          |   1 +
 drivers/md/dm-crypt.c               |   1 +
 drivers/md/dm-table.c               |   1 +
 drivers/md/md.c                     |   1 +
 drivers/nvdimm/core.c               |   1 +
 drivers/nvme/host/core.c            |   1 +
 drivers/nvme/host/pci.c             |   1 +
 drivers/nvme/host/rdma.c            |   1 +
 drivers/nvme/target/io-cmd-bdev.c   |   1 +
 drivers/nvme/target/rdma.c          |   1 +
 drivers/scsi/scsi_lib.c             |   1 +
 drivers/scsi/sd_dif.c               |   2 +-
 drivers/scsi/virtio_scsi.c          |   1 +
 drivers/target/target_core_iblock.c |   1 +
 include/linux/blk-integrity.h       | 183 ++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h              | 183 ------------------------------------
 24 files changed, 205 insertions(+), 187 deletions(-)
 create mode 100644 include/linux/blk-integrity.h

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index 485a258b0ab3..93b1188d7e58 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -12,6 +12,7 @@
 #include <linux/major.h>
 #include <linux/device_cgroup.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/backing-dev.h>
 #include <linux/module.h>
 #include <linux/blkpg.h>
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 6b47cddbbca1..21234ff966d9 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -6,7 +6,7 @@
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  */
 
-#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/mempool.h>
 #include <linux/export.h>
 #include <linux/bio.h>
diff --git a/block/blk-core.c b/block/blk-core.c
index 4d8f5fe91588..d2c6caadef89 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -18,6 +18,7 @@
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-pm.h>
+#include <linux/blk-integrity.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 16d5d5338392..cef534a7cbc9 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -6,7 +6,7 @@
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  */
 
-#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/backing-dev.h>
 #include <linux/mempool.h>
 #include <linux/bio.h>
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 39f210da399a..5b4f23014df8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -6,6 +6,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/scatterlist.h>
 
 #include <trace/events/block.h>
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0fefe00cebd4..2539ba976949 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -10,6 +10,7 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/kmemleak.h>
 #include <linux/mm.h>
 #include <linux/init.h>
diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c
index 2c4a55bea6ca..1792159d12d1 100644
--- a/block/keyslot-manager.c
+++ b/block/keyslot-manager.c
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/wait.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 
 struct blk_ksm_keyslot {
 	atomic_t slot_refs;
diff --git a/block/t10-pi.c b/block/t10-pi.c
index 00c203b2a921..25a52a2a09a8 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -5,7 +5,7 @@
  */
 
 #include <linux/t10-pi.h>
-#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/crc-t10dif.h>
 #include <linux/module.h>
 #include <net/checksum.h>
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index a3b71350eec8..745e3ab4aa0a 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -8,6 +8,7 @@
 #define DM_BIO_RECORD_H
 
 #include <linux/bio.h>
+#include <linux/blk-integrity.h>
 
 /*
  * There are lots of mutable fields in the bio struct that get
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 916b7da16de2..292f7896f733 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -15,6 +15,7 @@
 #include <linux/key.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/crypto.h>
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 2111daaacaba..1fa4d5582dca 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/namei.h>
 #include <linux/ctype.h>
 #include <linux/string.h>
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1b9bc8dbd95d..ec09083ff0ef 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -41,6 +41,7 @@
 #include <linux/sched/signal.h>
 #include <linux/kthread.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/badblocks.h>
 #include <linux/sysctl.h>
 #include <linux/seq_file.h>
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7de592d7eff4..6a45fa91e8a3 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/device.h>
 #include <linux/ctype.h>
 #include <linux/ndctl.h>
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index f8dd664b2eda..3d444b13cd69 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -6,6 +6,7 @@
 
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-integrity.h>
 #include <linux/compat.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 149ecf73df38..896328271471 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -10,6 +10,7 @@
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-pci.h>
+#include <linux/blk-integrity.h>
 #include <linux/dmi.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 042c594bc57e..40317e1b9183 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -13,6 +13,7 @@
 #include <linux/atomic.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-mq-rdma.h>
+#include <linux/blk-integrity.h>
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 0fc2781ab970..6139e1de50a6 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -5,6 +5,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/module.h>
 #include "nvmet.h"
 
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 891174ccd44b..38d1f292ecc2 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -5,6 +5,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/atomic.h>
+#include <linux/blk-integrity.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
 #include <linux/err.h>
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 572673873ddf..33fd9a01330c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -21,6 +21,7 @@
 #include <linux/hardirq.h>
 #include <linux/scatterlist.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-integrity.h>
 #include <linux/ratelimit.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 4cadb26070a8..349950616adc 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -6,7 +6,7 @@
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  */
 
-#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/t10-pi.h>
 
 #include <scsi/scsi.h>
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 07d0250f17c3..b8455fcbf18b 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -22,6 +22,7 @@
 #include <linux/virtio_scsi.h>
 #include <linux/cpu.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 4069a1edcfa3..d39b87e2ed10 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -16,6 +16,7 @@
 #include <linux/timer.h>
 #include <linux/fs.h>
 #include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/bio.h>
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
new file mode 100644
index 000000000000..8a038ea0717e
--- /dev/null
+++ b/include/linux/blk-integrity.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BLK_INTEGRITY_H
+#define _LINUX_BLK_INTEGRITY_H
+
+#include <linux/blk-mq.h>
+
+struct request;
+
+enum blk_integrity_flags {
+	BLK_INTEGRITY_VERIFY		= 1 << 0,
+	BLK_INTEGRITY_GENERATE		= 1 << 1,
+	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
+	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
+};
+
+struct blk_integrity_iter {
+	void			*prot_buf;
+	void			*data_buf;
+	sector_t		seed;
+	unsigned int		data_size;
+	unsigned short		interval;
+	const char		*disk_name;
+};
+
+typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
+typedef void (integrity_prepare_fn) (struct request *);
+typedef void (integrity_complete_fn) (struct request *, unsigned int);
+
+struct blk_integrity_profile {
+	integrity_processing_fn		*generate_fn;
+	integrity_processing_fn		*verify_fn;
+	integrity_prepare_fn		*prepare_fn;
+	integrity_complete_fn		*complete_fn;
+	const char			*name;
+};
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+void blk_integrity_register(struct gendisk *, struct blk_integrity *);
+void blk_integrity_unregister(struct gendisk *);
+int blk_integrity_compare(struct gendisk *, struct gendisk *);
+int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
+				   struct scatterlist *);
+int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+	struct blk_integrity *bi = &disk->queue->integrity;
+
+	if (!bi->profile)
+		return NULL;
+
+	return bi;
+}
+
+static inline struct blk_integrity *
+bdev_get_integrity(struct block_device *bdev)
+{
+	return blk_get_integrity(bdev->bd_disk);
+}
+
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+	return q->integrity.profile;
+}
+
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+						    unsigned int segs)
+{
+	q->limits.max_integrity_segments = segs;
+}
+
+static inline unsigned short
+queue_max_integrity_segments(const struct request_queue *q)
+{
+	return q->limits.max_integrity_segments;
+}
+
+/**
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
+ * @bi:		blk_integrity profile for device
+ * @sectors:	Size of the bio in 512-byte sectors
+ *
+ * Description: The block layer calculates everything in 512 byte
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
+ */
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return sectors >> (bi->interval_exp - 9);
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
+}
+
+static inline bool blk_integrity_rq(struct request *rq)
+{
+	return rq->cmd_flags & REQ_INTEGRITY;
+}
+
+/*
+ * Return the first bvec that contains integrity data.  Only drivers that are
+ * limited to a single integrity segment should use this helper.
+ */
+static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+{
+	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
+		return NULL;
+	return rq->bio->bi_integrity->bip_vec;
+}
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static inline int blk_rq_count_integrity_sg(struct request_queue *q,
+					    struct bio *b)
+{
+	return 0;
+}
+static inline int blk_rq_map_integrity_sg(struct request_queue *q,
+					  struct bio *b,
+					  struct scatterlist *s)
+{
+	return 0;
+}
+static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
+{
+	return NULL;
+}
+static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
+{
+	return NULL;
+}
+static inline bool
+blk_integrity_queue_supports_integrity(struct request_queue *q)
+{
+	return false;
+}
+static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
+{
+	return 0;
+}
+static inline void blk_integrity_register(struct gendisk *d,
+					 struct blk_integrity *b)
+{
+}
+static inline void blk_integrity_unregister(struct gendisk *d)
+{
+}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+						    unsigned int segs)
+{
+}
+static inline unsigned short
+queue_max_integrity_segments(const struct request_queue *q)
+{
+	return 0;
+}
+
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
+{
+	return 0;
+}
+
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return 0;
+}
+static inline int blk_integrity_rq(struct request *rq)
+{
+	return 0;
+}
+
+static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+{
+	return NULL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+#endif /* _LINUX_BLK_INTEGRITY_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index be534040ca9c..56e60e5c09d0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1555,189 +1555,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
 #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-*")
 
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-
-enum blk_integrity_flags {
-	BLK_INTEGRITY_VERIFY		= 1 << 0,
-	BLK_INTEGRITY_GENERATE		= 1 << 1,
-	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
-	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
-};
-
-struct blk_integrity_iter {
-	void			*prot_buf;
-	void			*data_buf;
-	sector_t		seed;
-	unsigned int		data_size;
-	unsigned short		interval;
-	const char		*disk_name;
-};
-
-typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *);
-typedef void (integrity_prepare_fn) (struct request *);
-typedef void (integrity_complete_fn) (struct request *, unsigned int);
-
-struct blk_integrity_profile {
-	integrity_processing_fn		*generate_fn;
-	integrity_processing_fn		*verify_fn;
-	integrity_prepare_fn		*prepare_fn;
-	integrity_complete_fn		*complete_fn;
-	const char			*name;
-};
-
-extern void blk_integrity_register(struct gendisk *, struct blk_integrity *);
-extern void blk_integrity_unregister(struct gendisk *);
-extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
-extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
-				   struct scatterlist *);
-extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
-{
-	struct blk_integrity *bi = &disk->queue->integrity;
-
-	if (!bi->profile)
-		return NULL;
-
-	return bi;
-}
-
-static inline
-struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
-{
-	return blk_get_integrity(bdev->bd_disk);
-}
-
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
-	return q->integrity.profile;
-}
-
-static inline bool blk_integrity_rq(struct request *rq)
-{
-	return rq->cmd_flags & REQ_INTEGRITY;
-}
-
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-						    unsigned int segs)
-{
-	q->limits.max_integrity_segments = segs;
-}
-
-static inline unsigned short
-queue_max_integrity_segments(const struct request_queue *q)
-{
-	return q->limits.max_integrity_segments;
-}
-
-/**
- * bio_integrity_intervals - Return number of integrity intervals for a bio
- * @bi:		blk_integrity profile for device
- * @sectors:	Size of the bio in 512-byte sectors
- *
- * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the data integrity
- * interval size of the storage device.  Convert the block layer sectors
- * to the appropriate number of integrity intervals.
- */
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
-						   unsigned int sectors)
-{
-	return sectors >> (bi->interval_exp - 9);
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
-					       unsigned int sectors)
-{
-	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
-}
-
-/*
- * Return the first bvec that contains integrity data.  Only drivers that are
- * limited to a single integrity segment should use this helper.
- */
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
-{
-	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
-		return NULL;
-	return rq->bio->bi_integrity->bip_vec;
-}
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-
-struct bio;
-struct block_device;
-struct gendisk;
-struct blk_integrity;
-
-static inline int blk_integrity_rq(struct request *rq)
-{
-	return 0;
-}
-static inline int blk_rq_count_integrity_sg(struct request_queue *q,
-					    struct bio *b)
-{
-	return 0;
-}
-static inline int blk_rq_map_integrity_sg(struct request_queue *q,
-					  struct bio *b,
-					  struct scatterlist *s)
-{
-	return 0;
-}
-static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
-{
-	return NULL;
-}
-static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
-{
-	return NULL;
-}
-static inline bool
-blk_integrity_queue_supports_integrity(struct request_queue *q)
-{
-	return false;
-}
-static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b)
-{
-	return 0;
-}
-static inline void blk_integrity_register(struct gendisk *d,
-					 struct blk_integrity *b)
-{
-}
-static inline void blk_integrity_unregister(struct gendisk *d)
-{
-}
-static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-						    unsigned int segs)
-{
-}
-static inline unsigned short queue_max_integrity_segments(const struct request_queue *q)
-{
-	return 0;
-}
-
-static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
-						   unsigned int sectors)
-{
-	return 0;
-}
-
-static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
-					       unsigned int sectors)
-{
-	return 0;
-}
-
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
-{
-	return NULL;
-}
-
-#endif /* CONFIG_BLK_DEV_INTEGRITY */
-
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 
 bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
-- 
cgit v1.2.3


From 24b83deb29b7f06a5573b65f2ce96f5482755d43 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 20 Sep 2021 14:33:28 +0200
Subject: block: move struct request to blk-mq.h

struct request is only used by blk-mq drivers, so move it and all
related declarations to blk-mq.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20210920123328.1399408-18-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-crypto-fallback.c         |   1 +
 block/blk-crypto-internal.h         |   2 +-
 drivers/block/rnbd/rnbd-proto.h     |   2 +-
 drivers/md/dm-verity-target.c       |   1 +
 drivers/mmc/core/sd.c               |   1 +
 drivers/target/target_core_file.c   |   1 +
 drivers/target/target_core_iblock.c |   1 +
 include/linux/blk-mq.h              | 465 +++++++++++++++++++++++++++++++++++
 include/linux/blk_types.h           |   2 -
 include/linux/blkdev.h              | 469 +-----------------------------------
 include/linux/blktrace_api.h        |   2 +-
 include/linux/t10-pi.h              |   2 +-
 include/scsi/scsi_device.h          |   2 +-
 13 files changed, 476 insertions(+), 475 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index c322176a1e09..ec4c7823541c 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -18,6 +18,7 @@
 #include <linux/mempool.h>
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/scatterlist.h>
 
 #include "blk-crypto-internal.h"
 
diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h
index 0d36aae538d7..2fb0d65a464c 100644
--- a/block/blk-crypto-internal.h
+++ b/block/blk-crypto-internal.h
@@ -7,7 +7,7 @@
 #define __LINUX_BLK_CRYPTO_INTERNAL_H
 
 #include <linux/bio.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 
 /* Represents a crypto mode supported by blk-crypto  */
 struct blk_crypto_mode {
diff --git a/drivers/block/rnbd/rnbd-proto.h b/drivers/block/rnbd/rnbd-proto.h
index c1bc5c0fef71..de5d5a8df81d 100644
--- a/drivers/block/rnbd/rnbd-proto.h
+++ b/drivers/block/rnbd/rnbd-proto.h
@@ -10,7 +10,7 @@
 #define RNBD_PROTO_H
 
 #include <linux/types.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/limits.h>
 #include <linux/inet.h>
 #include <linux/in.h>
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 88288c8d6bc8..aae48a8b1a04 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -18,6 +18,7 @@
 #include "dm-verity-verify-sig.h"
 #include <linux/module.h>
 #include <linux/reboot.h>
+#include <linux/scatterlist.h>
 
 #define DM_MSG_PREFIX			"verity"
 
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 4646b7a03db6..c9db24e16af1 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index ef4a8e189fba..02f64453b4c5 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -20,6 +20,7 @@
 #include <linux/vmalloc.h>
 #include <linux/falloc.h>
 #include <linux/uio.h>
+#include <linux/scatterlist.h>
 #include <scsi/scsi_proto.h>
 #include <asm/unaligned.h>
 
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index d39b87e2ed10..31df20abe141 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -23,6 +23,7 @@
 #include <linux/genhd.h>
 #include <linux/file.h>
 #include <linux/module.h>
+#include <linux/scatterlist.h>
 #include <scsi/scsi_proto.h>
 #include <asm/unaligned.h>
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 13ba1861e688..bd4086a6f28e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -6,10 +6,218 @@
 #include <linux/sbitmap.h>
 #include <linux/srcu.h>
 #include <linux/lockdep.h>
+#include <linux/scatterlist.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
 
+#define BLKDEV_MIN_RQ	4
+#define BLKDEV_MAX_RQ	128	/* Default maximum */
+
+typedef void (rq_end_io_fn)(struct request *, blk_status_t);
+
+/*
+ * request flags */
+typedef __u32 __bitwise req_flags_t;
+
+/* drive already may have started this one */
+#define RQF_STARTED		((__force req_flags_t)(1 << 1))
+/* may not be passed by ioscheduler */
+#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
+/* request for flush sequence */
+#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
+/* merge of different types, fail separately */
+#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
+/* track inflight for MQ */
+#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
+/* don't call prep for this one */
+#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
+/* vaguely specified driver internal error.  Ignored by the block layer */
+#define RQF_FAILED		((__force req_flags_t)(1 << 10))
+/* don't warn about errors */
+#define RQF_QUIET		((__force req_flags_t)(1 << 11))
+/* elevator private data attached */
+#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
+/* account into disk and partition IO statistics */
+#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
+/* runtime pm request */
+#define RQF_PM			((__force req_flags_t)(1 << 15))
+/* on IO scheduler merge hash */
+#define RQF_HASHED		((__force req_flags_t)(1 << 16))
+/* track IO completion time */
+#define RQF_STATS		((__force req_flags_t)(1 << 17))
+/* Look at ->special_vec for the actual data payload instead of the
+   bio chain. */
+#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
+/* The per-zone write lock is held for this request */
+#define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
+/* already slept for hybrid poll */
+#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 20))
+/* ->timeout has been called, don't expire again */
+#define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
+
+/* flags that prevent us from merging requests: */
+#define RQF_NOMERGE_FLAGS \
+	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
+
+enum mq_rq_state {
+	MQ_RQ_IDLE		= 0,
+	MQ_RQ_IN_FLIGHT		= 1,
+	MQ_RQ_COMPLETE		= 2,
+};
+
+/*
+ * Try to put the fields that are referenced together in the same cacheline.
+ *
+ * If you modify this structure, make sure to update blk_rq_init() and
+ * especially blk_mq_rq_ctx_init() to take care of the added fields.
+ */
+struct request {
+	struct request_queue *q;
+	struct blk_mq_ctx *mq_ctx;
+	struct blk_mq_hw_ctx *mq_hctx;
+
+	unsigned int cmd_flags;		/* op and common flags */
+	req_flags_t rq_flags;
+
+	int tag;
+	int internal_tag;
+
+	/* the following two fields are internal, NEVER access directly */
+	unsigned int __data_len;	/* total data len */
+	sector_t __sector;		/* sector cursor */
+
+	struct bio *bio;
+	struct bio *biotail;
+
+	struct list_head queuelist;
+
+	/*
+	 * The hash is used inside the scheduler, and killed once the
+	 * request reaches the dispatch list. The ipi_list is only used
+	 * to queue the request for softirq completion, which is long
+	 * after the request has been unhashed (and even removed from
+	 * the dispatch list).
+	 */
+	union {
+		struct hlist_node hash;	/* merge hash */
+		struct llist_node ipi_list;
+	};
+
+	/*
+	 * The rb_node is only used inside the io scheduler, requests
+	 * are pruned when moved to the dispatch queue. So let the
+	 * completion_data share space with the rb_node.
+	 */
+	union {
+		struct rb_node rb_node;	/* sort/lookup */
+		struct bio_vec special_vec;
+		void *completion_data;
+		int error_count; /* for legacy drivers, don't use */
+	};
+
+	/*
+	 * Three pointers are available for the IO schedulers, if they need
+	 * more they have to dynamically allocate it.  Flush requests are
+	 * never put on the IO scheduler. So let the flush fields share
+	 * space with the elevator data.
+	 */
+	union {
+		struct {
+			struct io_cq		*icq;
+			void			*priv[2];
+		} elv;
+
+		struct {
+			unsigned int		seq;
+			struct list_head	list;
+			rq_end_io_fn		*saved_end_io;
+		} flush;
+	};
+
+	struct gendisk *rq_disk;
+	struct block_device *part;
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+	/* Time that the first bio started allocating this request. */
+	u64 alloc_time_ns;
+#endif
+	/* Time that this request was allocated for this IO. */
+	u64 start_time_ns;
+	/* Time that I/O was submitted to the device. */
+	u64 io_start_time_ns;
+
+#ifdef CONFIG_BLK_WBT
+	unsigned short wbt_flags;
+#endif
+	/*
+	 * rq sectors used for blk stats. It has the same value
+	 * with blk_rq_sectors(rq), except that it never be zeroed
+	 * by completion.
+	 */
+	unsigned short stats_sectors;
+
+	/*
+	 * Number of scatter-gather DMA addr+len pairs after
+	 * physical address coalescing is performed.
+	 */
+	unsigned short nr_phys_segments;
+
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+	unsigned short nr_integrity_segments;
+#endif
+
+#ifdef CONFIG_BLK_INLINE_ENCRYPTION
+	struct bio_crypt_ctx *crypt_ctx;
+	struct blk_ksm_keyslot *crypt_keyslot;
+#endif
+
+	unsigned short write_hint;
+	unsigned short ioprio;
+
+	enum mq_rq_state state;
+	refcount_t ref;
+
+	unsigned int timeout;
+	unsigned long deadline;
+
+	union {
+		struct __call_single_data csd;
+		u64 fifo_time;
+	};
+
+	/*
+	 * completion callback.
+	 */
+	rq_end_io_fn *end_io;
+	void *end_io_data;
+};
+
+#define req_op(req) \
+	((req)->cmd_flags & REQ_OP_MASK)
+
+static inline bool blk_rq_is_passthrough(struct request *rq)
+{
+	return blk_op_is_passthrough(req_op(rq));
+}
+
+static inline unsigned short req_get_ioprio(struct request *req)
+{
+	return req->ioprio;
+}
+
+#define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
+
+#define rq_dma_dir(rq) \
+	(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
+
+enum blk_eh_timer_return {
+	BLK_EH_DONE,		/* drivers has completed the command */
+	BLK_EH_RESET_TIMER,	/* reset timer and try again */
+};
+
+#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
+#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
+
 /**
  * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware
  * block device
@@ -637,4 +845,261 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio);
 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
 		struct lock_class_key *key);
 
+static inline bool rq_is_sync(struct request *rq)
+{
+	return op_is_sync(rq->cmd_flags);
+}
+
+void blk_rq_init(struct request_queue *q, struct request *rq);
+void blk_put_request(struct request *rq);
+struct request *blk_get_request(struct request_queue *q, unsigned int op,
+		blk_mq_req_flags_t flags);
+int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
+		struct bio_set *bs, gfp_t gfp_mask,
+		int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
+void blk_rq_unprep_clone(struct request *rq);
+blk_status_t blk_insert_cloned_request(struct request_queue *q,
+		struct request *rq);
+
+struct rq_map_data {
+	struct page **pages;
+	int page_order;
+	int nr_entries;
+	unsigned long offset;
+	int null_mapped;
+	int from_user;
+};
+
+int blk_rq_map_user(struct request_queue *, struct request *,
+		struct rq_map_data *, void __user *, unsigned long, gfp_t);
+int blk_rq_map_user_iov(struct request_queue *, struct request *,
+		struct rq_map_data *, const struct iov_iter *, gfp_t);
+int blk_rq_unmap_user(struct bio *);
+int blk_rq_map_kern(struct request_queue *, struct request *, void *,
+		unsigned int, gfp_t);
+int blk_rq_append_bio(struct request *rq, struct bio *bio);
+void blk_execute_rq_nowait(struct gendisk *, struct request *, int,
+		rq_end_io_fn *);
+blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
+		int at_head);
+
+struct req_iterator {
+	struct bvec_iter iter;
+	struct bio *bio;
+};
+
+#define __rq_for_each_bio(_bio, rq)	\
+	if ((rq->bio))			\
+		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
+
+#define rq_for_each_segment(bvl, _rq, _iter)			\
+	__rq_for_each_bio(_iter.bio, _rq)			\
+		bio_for_each_segment(bvl, _iter.bio, _iter.iter)
+
+#define rq_for_each_bvec(bvl, _rq, _iter)			\
+	__rq_for_each_bio(_iter.bio, _rq)			\
+		bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
+
+#define rq_iter_last(bvec, _iter)				\
+		(_iter.bio->bi_next == NULL &&			\
+		 bio_iter_last(bvec, _iter.iter))
+
+/*
+ * blk_rq_pos()			: the current sector
+ * blk_rq_bytes()		: bytes left in the entire request
+ * blk_rq_cur_bytes()		: bytes left in the current segment
+ * blk_rq_err_bytes()		: bytes left till the next error boundary
+ * blk_rq_sectors()		: sectors left in the entire request
+ * blk_rq_cur_sectors()		: sectors left in the current segment
+ * blk_rq_stats_sectors()	: sectors of the entire request used for stats
+ */
+static inline sector_t blk_rq_pos(const struct request *rq)
+{
+	return rq->__sector;
+}
+
+static inline unsigned int blk_rq_bytes(const struct request *rq)
+{
+	return rq->__data_len;
+}
+
+static inline int blk_rq_cur_bytes(const struct request *rq)
+{
+	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
+}
+
+unsigned int blk_rq_err_bytes(const struct request *rq);
+
+static inline unsigned int blk_rq_sectors(const struct request *rq)
+{
+	return blk_rq_bytes(rq) >> SECTOR_SHIFT;
+}
+
+static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
+{
+	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
+}
+
+static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
+{
+	return rq->stats_sectors;
+}
+
+/*
+ * Some commands like WRITE SAME have a payload or data transfer size which
+ * is different from the size of the request.  Any driver that supports such
+ * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
+ * calculate the data transfer size.
+ */
+static inline unsigned int blk_rq_payload_bytes(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return rq->special_vec.bv_len;
+	return blk_rq_bytes(rq);
+}
+
+/*
+ * Return the first full biovec in the request.  The caller needs to check that
+ * there are any bvecs before calling this helper.
+ */
+static inline struct bio_vec req_bvec(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return rq->special_vec;
+	return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
+}
+
+static inline unsigned int blk_rq_count_bios(struct request *rq)
+{
+	unsigned int nr_bios = 0;
+	struct bio *bio;
+
+	__rq_for_each_bio(bio, rq)
+		nr_bios++;
+
+	return nr_bios;
+}
+
+void blk_steal_bios(struct bio_list *list, struct request *rq);
+
+/*
+ * Request completion related functions.
+ *
+ * blk_update_request() completes given number of bytes and updates
+ * the request without completing it.
+ */
+bool blk_update_request(struct request *rq, blk_status_t error,
+			       unsigned int nr_bytes);
+void blk_abort_request(struct request *);
+
+/*
+ * Number of physical segments as sent to the device.
+ *
+ * Normally this is the number of discontiguous data segments sent by the
+ * submitter.  But for data-less command like discard we might have no
+ * actual data segments submitted, but the driver might have to add it's
+ * own special payload.  In that case we still return 1 here so that this
+ * special payload will be mapped.
+ */
+static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
+{
+	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+		return 1;
+	return rq->nr_phys_segments;
+}
+
+/*
+ * Number of discard segments (or ranges) the driver needs to fill in.
+ * Each discard bio merged into a request is counted as one segment.
+ */
+static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
+{
+	return max_t(unsigned short, rq->nr_phys_segments, 1);
+}
+
+int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
+		struct scatterlist *sglist, struct scatterlist **last_sg);
+static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
+		struct scatterlist *sglist)
+{
+	struct scatterlist *last_sg = NULL;
+
+	return __blk_rq_map_sg(q, rq, sglist, &last_sg);
+}
+void blk_dump_rq_flags(struct request *, char *);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+static inline unsigned int blk_rq_zone_no(struct request *rq)
+{
+	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
+}
+
+static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
+{
+	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
+}
+
+bool blk_req_needs_zone_write_lock(struct request *rq);
+bool blk_req_zone_write_trylock(struct request *rq);
+void __blk_req_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_unlock(struct request *rq);
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+	if (blk_req_needs_zone_write_lock(rq))
+		__blk_req_zone_write_lock(rq);
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+	if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
+		__blk_req_zone_write_unlock(rq);
+}
+
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+	return rq->q->seq_zones_wlock &&
+		test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+	if (!blk_req_needs_zone_write_lock(rq))
+		return true;
+	return !blk_req_zone_is_write_locked(rq);
+}
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline bool blk_req_needs_zone_write_lock(struct request *rq)
+{
+	return false;
+}
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+}
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+	return false;
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+	return true;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
 #endif
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+void rq_flush_dcache_pages(struct request *rq);
+#else
+static inline void rq_flush_dcache_pages(struct request *rq)
+{
+}
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+#endif /* BLK_MQ_H */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index be622b5a21ed..3b967053e9f5 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -431,8 +431,6 @@ enum stat_group {
 
 #define bio_op(bio) \
 	((bio)->bi_opf & REQ_OP_MASK)
-#define req_op(req) \
-	((req)->cmd_flags & REQ_OP_MASK)
 
 /* obsolete, don't use in new code */
 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 56e60e5c09d0..0e960d74615e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -14,7 +14,6 @@
 #include <linux/gfp.h>
 #include <linux/rcupdate.h>
 #include <linux/percpu-refcount.h>
-#include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
 #include <linux/sbitmap.h>
 
@@ -32,9 +31,6 @@ struct blk_queue_stats;
 struct blk_stat_callback;
 struct blk_keyslot_manager;
 
-#define BLKDEV_MIN_RQ	4
-#define BLKDEV_MAX_RQ	128	/* Default maximum */
-
 /* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
 
@@ -47,213 +43,12 @@ struct blk_keyslot_manager;
  */
 #define BLKCG_MAX_POLS		6
 
-typedef void (rq_end_io_fn)(struct request *, blk_status_t);
-
-/*
- * request flags */
-typedef __u32 __bitwise req_flags_t;
-
-/* drive already may have started this one */
-#define RQF_STARTED		((__force req_flags_t)(1 << 1))
-/* may not be passed by ioscheduler */
-#define RQF_SOFTBARRIER		((__force req_flags_t)(1 << 3))
-/* request for flush sequence */
-#define RQF_FLUSH_SEQ		((__force req_flags_t)(1 << 4))
-/* merge of different types, fail separately */
-#define RQF_MIXED_MERGE		((__force req_flags_t)(1 << 5))
-/* track inflight for MQ */
-#define RQF_MQ_INFLIGHT		((__force req_flags_t)(1 << 6))
-/* don't call prep for this one */
-#define RQF_DONTPREP		((__force req_flags_t)(1 << 7))
-/* vaguely specified driver internal error.  Ignored by the block layer */
-#define RQF_FAILED		((__force req_flags_t)(1 << 10))
-/* don't warn about errors */
-#define RQF_QUIET		((__force req_flags_t)(1 << 11))
-/* elevator private data attached */
-#define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
-/* account into disk and partition IO statistics */
-#define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
-/* runtime pm request */
-#define RQF_PM			((__force req_flags_t)(1 << 15))
-/* on IO scheduler merge hash */
-#define RQF_HASHED		((__force req_flags_t)(1 << 16))
-/* track IO completion time */
-#define RQF_STATS		((__force req_flags_t)(1 << 17))
-/* Look at ->special_vec for the actual data payload instead of the
-   bio chain. */
-#define RQF_SPECIAL_PAYLOAD	((__force req_flags_t)(1 << 18))
-/* The per-zone write lock is held for this request */
-#define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
-/* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 20))
-/* ->timeout has been called, don't expire again */
-#define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
-
-/* flags that prevent us from merging requests: */
-#define RQF_NOMERGE_FLAGS \
-	(RQF_STARTED | RQF_SOFTBARRIER | RQF_FLUSH_SEQ | RQF_SPECIAL_PAYLOAD)
-
-/*
- * Request state for blk-mq.
- */
-enum mq_rq_state {
-	MQ_RQ_IDLE		= 0,
-	MQ_RQ_IN_FLIGHT		= 1,
-	MQ_RQ_COMPLETE		= 2,
-};
-
-/*
- * Try to put the fields that are referenced together in the same cacheline.
- *
- * If you modify this structure, make sure to update blk_rq_init() and
- * especially blk_mq_rq_ctx_init() to take care of the added fields.
- */
-struct request {
-	struct request_queue *q;
-	struct blk_mq_ctx *mq_ctx;
-	struct blk_mq_hw_ctx *mq_hctx;
-
-	unsigned int cmd_flags;		/* op and common flags */
-	req_flags_t rq_flags;
-
-	int tag;
-	int internal_tag;
-
-	/* the following two fields are internal, NEVER access directly */
-	unsigned int __data_len;	/* total data len */
-	sector_t __sector;		/* sector cursor */
-
-	struct bio *bio;
-	struct bio *biotail;
-
-	struct list_head queuelist;
-
-	/*
-	 * The hash is used inside the scheduler, and killed once the
-	 * request reaches the dispatch list. The ipi_list is only used
-	 * to queue the request for softirq completion, which is long
-	 * after the request has been unhashed (and even removed from
-	 * the dispatch list).
-	 */
-	union {
-		struct hlist_node hash;	/* merge hash */
-		struct llist_node ipi_list;
-	};
-
-	/*
-	 * The rb_node is only used inside the io scheduler, requests
-	 * are pruned when moved to the dispatch queue. So let the
-	 * completion_data share space with the rb_node.
-	 */
-	union {
-		struct rb_node rb_node;	/* sort/lookup */
-		struct bio_vec special_vec;
-		void *completion_data;
-		int error_count; /* for legacy drivers, don't use */
-	};
-
-	/*
-	 * Three pointers are available for the IO schedulers, if they need
-	 * more they have to dynamically allocate it.  Flush requests are
-	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the elevator data.
-	 */
-	union {
-		struct {
-			struct io_cq		*icq;
-			void			*priv[2];
-		} elv;
-
-		struct {
-			unsigned int		seq;
-			struct list_head	list;
-			rq_end_io_fn		*saved_end_io;
-		} flush;
-	};
-
-	struct gendisk *rq_disk;
-	struct block_device *part;
-#ifdef CONFIG_BLK_RQ_ALLOC_TIME
-	/* Time that the first bio started allocating this request. */
-	u64 alloc_time_ns;
-#endif
-	/* Time that this request was allocated for this IO. */
-	u64 start_time_ns;
-	/* Time that I/O was submitted to the device. */
-	u64 io_start_time_ns;
-
-#ifdef CONFIG_BLK_WBT
-	unsigned short wbt_flags;
-#endif
-	/*
-	 * rq sectors used for blk stats. It has the same value
-	 * with blk_rq_sectors(rq), except that it never be zeroed
-	 * by completion.
-	 */
-	unsigned short stats_sectors;
-
-	/*
-	 * Number of scatter-gather DMA addr+len pairs after
-	 * physical address coalescing is performed.
-	 */
-	unsigned short nr_phys_segments;
-
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	unsigned short nr_integrity_segments;
-#endif
-
-#ifdef CONFIG_BLK_INLINE_ENCRYPTION
-	struct bio_crypt_ctx *crypt_ctx;
-	struct blk_ksm_keyslot *crypt_keyslot;
-#endif
-
-	unsigned short write_hint;
-	unsigned short ioprio;
-
-	enum mq_rq_state state;
-	refcount_t ref;
-
-	unsigned int timeout;
-	unsigned long deadline;
-
-	union {
-		struct __call_single_data csd;
-		u64 fifo_time;
-	};
-
-	/*
-	 * completion callback.
-	 */
-	rq_end_io_fn *end_io;
-	void *end_io_data;
-};
-
 static inline bool blk_op_is_passthrough(unsigned int op)
 {
 	op &= REQ_OP_MASK;
 	return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT;
 }
 
-static inline bool blk_rq_is_passthrough(struct request *rq)
-{
-	return blk_op_is_passthrough(req_op(rq));
-}
-
-static inline unsigned short req_get_ioprio(struct request *req)
-{
-	return req->ioprio;
-}
-
-struct bio_vec;
-
-enum blk_eh_timer_return {
-	BLK_EH_DONE,		/* drivers has completed the command */
-	BLK_EH_RESET_TIMER,	/* reset timer and try again */
-};
-
-#define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */
-#define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */
-
 /*
  * Zoned block device models (zoned limit).
  *
@@ -620,11 +415,6 @@ extern void blk_clear_pm_only(struct request_queue *q);
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
-#define rq_data_dir(rq)		(op_is_write(req_op(rq)) ? WRITE : READ)
-
-#define rq_dma_dir(rq) \
-	(op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE)
-
 #define dma_map_bvec(dev, bv, dir, attrs) \
 	dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \
 	(dir), (attrs))
@@ -740,11 +530,6 @@ static inline unsigned int queue_max_active_zones(const struct request_queue *q)
 }
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-static inline bool rq_is_sync(struct request *rq)
-{
-	return op_is_sync(rq->cmd_flags);
-}
-
 static inline unsigned int blk_queue_depth(struct request_queue *q)
 {
 	if (q->queue_depth)
@@ -759,83 +544,20 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
 #define BLK_DEFAULT_SG_TIMEOUT	(60 * HZ)
 #define BLK_MIN_SG_TIMEOUT	(7 * HZ)
 
-struct rq_map_data {
-	struct page **pages;
-	int page_order;
-	int nr_entries;
-	unsigned long offset;
-	int null_mapped;
-	int from_user;
-};
-
-struct req_iterator {
-	struct bvec_iter iter;
-	struct bio *bio;
-};
-
 /* This should not be used directly - use rq_for_each_segment */
 #define for_each_bio(_bio)		\
 	for (; _bio; _bio = _bio->bi_next)
-#define __rq_for_each_bio(_bio, rq)	\
-	if ((rq->bio))			\
-		for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next)
-
-#define rq_for_each_segment(bvl, _rq, _iter)			\
-	__rq_for_each_bio(_iter.bio, _rq)			\
-		bio_for_each_segment(bvl, _iter.bio, _iter.iter)
 
-#define rq_for_each_bvec(bvl, _rq, _iter)			\
-	__rq_for_each_bio(_iter.bio, _rq)			\
-		bio_for_each_bvec(bvl, _iter.bio, _iter.iter)
-
-#define rq_iter_last(bvec, _iter)				\
-		(_iter.bio->bi_next == NULL &&			\
-		 bio_iter_last(bvec, _iter.iter))
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-# error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
-#endif
-#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
-extern void rq_flush_dcache_pages(struct request *rq);
-#else
-static inline void rq_flush_dcache_pages(struct request *rq)
-{
-}
-#endif
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
 blk_qc_t submit_bio_noacct(struct bio *bio);
-extern void blk_rq_init(struct request_queue *q, struct request *rq);
-extern void blk_put_request(struct request *);
-extern struct request *blk_get_request(struct request_queue *, unsigned int op,
-				       blk_mq_req_flags_t flags);
+
 extern int blk_lld_busy(struct request_queue *q);
-extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-			     struct bio_set *bs, gfp_t gfp_mask,
-			     int (*bio_ctr)(struct bio *, struct bio *, void *),
-			     void *data);
-extern void blk_rq_unprep_clone(struct request *rq);
-extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
-				     struct request *rq);
-int blk_rq_append_bio(struct request *rq, struct bio *bio);
 extern void blk_queue_split(struct bio **);
 extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags);
 extern void blk_queue_exit(struct request_queue *q);
 extern void blk_sync_queue(struct request_queue *q);
-extern int blk_rq_map_user(struct request_queue *, struct request *,
-			   struct rq_map_data *, void __user *, unsigned long,
-			   gfp_t);
-extern int blk_rq_unmap_user(struct bio *);
-extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
-extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
-			       struct rq_map_data *, const struct iov_iter *,
-			       gfp_t);
-extern void blk_execute_rq_nowait(struct gendisk *,
-				  struct request *, int, rq_end_io_fn *);
-
-blk_status_t blk_execute_rq(struct gendisk *bd_disk, struct request *rq,
-			    int at_head);
 
 /* Helper to convert REQ_OP_XXX to its string format XXX */
 extern const char *blk_op_str(unsigned int op);
@@ -867,47 +589,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
-/*
- * blk_rq_pos()			: the current sector
- * blk_rq_bytes()		: bytes left in the entire request
- * blk_rq_cur_bytes()		: bytes left in the current segment
- * blk_rq_err_bytes()		: bytes left till the next error boundary
- * blk_rq_sectors()		: sectors left in the entire request
- * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_stats_sectors()	: sectors of the entire request used for stats
- */
-static inline sector_t blk_rq_pos(const struct request *rq)
-{
-	return rq->__sector;
-}
-
-static inline unsigned int blk_rq_bytes(const struct request *rq)
-{
-	return rq->__data_len;
-}
-
-static inline int blk_rq_cur_bytes(const struct request *rq)
-{
-	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
-}
-
-extern unsigned int blk_rq_err_bytes(const struct request *rq);
-
-static inline unsigned int blk_rq_sectors(const struct request *rq)
-{
-	return blk_rq_bytes(rq) >> SECTOR_SHIFT;
-}
-
-static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
-{
-	return blk_rq_cur_bytes(rq) >> SECTOR_SHIFT;
-}
-
-static inline unsigned int blk_rq_stats_sectors(const struct request *rq)
-{
-	return rq->stats_sectors;
-}
-
 #ifdef CONFIG_BLK_DEV_ZONED
 
 /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
@@ -924,42 +605,8 @@ static inline unsigned int bio_zone_is_seq(struct bio *bio)
 	return blk_queue_zone_is_seq(bdev_get_queue(bio->bi_bdev),
 				     bio->bi_iter.bi_sector);
 }
-
-static inline unsigned int blk_rq_zone_no(struct request *rq)
-{
-	return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
-}
-
-static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
-{
-	return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
-}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
-/*
- * Some commands like WRITE SAME have a payload or data transfer size which
- * is different from the size of the request.  Any driver that supports such
- * commands using the RQF_SPECIAL_PAYLOAD flag needs to use this helper to
- * calculate the data transfer size.
- */
-static inline unsigned int blk_rq_payload_bytes(struct request *rq)
-{
-	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
-		return rq->special_vec.bv_len;
-	return blk_rq_bytes(rq);
-}
-
-/*
- * Return the first full biovec in the request.  The caller needs to check that
- * there are any bvecs before calling this helper.
- */
-static inline struct bio_vec req_bvec(struct request *rq)
-{
-	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
-		return rq->special_vec;
-	return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter);
-}
-
 static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
 						     int op)
 {
@@ -999,30 +646,6 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q,
 	return min(q->limits.max_sectors, chunk_sectors);
 }
 
-static inline unsigned int blk_rq_count_bios(struct request *rq)
-{
-	unsigned int nr_bios = 0;
-	struct bio *bio;
-
-	__rq_for_each_bio(bio, rq)
-		nr_bios++;
-
-	return nr_bios;
-}
-
-void blk_steal_bios(struct bio_list *list, struct request *rq);
-
-/*
- * Request completion related functions.
- *
- * blk_update_request() completes given number of bytes and updates
- * the request without completing it.
- */
-extern bool blk_update_request(struct request *rq, blk_status_t error,
-			       unsigned int nr_bytes);
-
-extern void blk_abort_request(struct request *);
-
 /*
  * Access functions for manipulating queue properties
  */
@@ -1081,42 +704,6 @@ extern void blk_queue_required_elevator_features(struct request_queue *q,
 extern bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
 					      struct device *dev);
 
-/*
- * Number of physical segments as sent to the device.
- *
- * Normally this is the number of discontiguous data segments sent by the
- * submitter.  But for data-less command like discard we might have no
- * actual data segments submitted, but the driver might have to add it's
- * own special payload.  In that case we still return 1 here so that this
- * special payload will be mapped.
- */
-static inline unsigned short blk_rq_nr_phys_segments(struct request *rq)
-{
-	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
-		return 1;
-	return rq->nr_phys_segments;
-}
-
-/*
- * Number of discard segments (or ranges) the driver needs to fill in.
- * Each discard bio merged into a request is counted as one segment.
- */
-static inline unsigned short blk_rq_nr_discard_segments(struct request *rq)
-{
-	return max_t(unsigned short, rq->nr_phys_segments, 1);
-}
-
-int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
-		struct scatterlist *sglist, struct scatterlist **last_sg);
-static inline int blk_rq_map_sg(struct request_queue *q, struct request *rq,
-		struct scatterlist *sglist)
-{
-	struct scatterlist *last_sg = NULL;
-
-	return __blk_rq_map_sg(q, rq, sglist, &last_sg);
-}
-extern void blk_dump_rq_flags(struct request *, char *);
-
 bool __must_check blk_get_queue(struct request_queue *);
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
@@ -1613,60 +1200,6 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *);
 extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
 
-#ifdef CONFIG_BLK_DEV_ZONED
-bool blk_req_needs_zone_write_lock(struct request *rq);
-bool blk_req_zone_write_trylock(struct request *rq);
-void __blk_req_zone_write_lock(struct request *rq);
-void __blk_req_zone_write_unlock(struct request *rq);
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
-	if (blk_req_needs_zone_write_lock(rq))
-		__blk_req_zone_write_lock(rq);
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
-	if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
-		__blk_req_zone_write_unlock(rq);
-}
-
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
-	return rq->q->seq_zones_wlock &&
-		test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
-	if (!blk_req_needs_zone_write_lock(rq))
-		return true;
-	return !blk_req_zone_is_write_locked(rq);
-}
-#else
-static inline bool blk_req_needs_zone_write_lock(struct request *rq)
-{
-	return false;
-}
-
-static inline void blk_req_zone_write_lock(struct request *rq)
-{
-}
-
-static inline void blk_req_zone_write_unlock(struct request *rq)
-{
-}
-static inline bool blk_req_zone_is_write_locked(struct request *rq)
-{
-	return false;
-}
-
-static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
-{
-	return true;
-}
-#endif /* CONFIG_BLK_DEV_ZONED */
-
 static inline void blk_wake_io_task(struct task_struct *waiter)
 {
 	/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index a083e15df608..22501a293fa5 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -2,7 +2,7 @@
 #ifndef BLKTRACE_H
 #define BLKTRACE_H
 
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/relay.h>
 #include <linux/compat.h>
 #include <uapi/linux/blktrace_api.h>
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 96305a64a5a7..c635c2e014e3 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -3,7 +3,7 @@
 #define _LINUX_T10_PI_H
 
 #include <linux/types.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 
 /*
  * A T10 PI-capable target device can be formatted with different
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index b97e142a7ca9..430b73bd02ac 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -5,7 +5,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <scsi/scsi.h>
 #include <linux/atomic.h>
 #include <linux/sbitmap.h>
-- 
cgit v1.2.3


From d2a27964e60ff18e637334864042af54de383902 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 5 Oct 2021 18:23:27 +0800
Subject: block: Rename BLKDEV_MAX_RQ -> BLKDEV_DEFAULT_RQ

It is a bit confusing that there is BLKDEV_MAX_RQ and MAX_SCHED_RQ, as
the name BLKDEV_MAX_RQ would imply the max requests always, which it is
not.

Rename to BLKDEV_MAX_RQ to BLKDEV_DEFAULT_RQ, matching its usage - that being
the default number of requests assigned when allocating a request queue.

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/1633429419-228500-3-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 2 +-
 block/blk-mq-sched.c   | 2 +-
 block/blk-mq-sched.h   | 2 +-
 drivers/block/rbd.c    | 2 +-
 include/linux/blk-mq.h | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 469550845244..2590e7724990 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -587,7 +587,7 @@ struct request_queue *blk_alloc_queue(int node_id)
 
 	blk_queue_dma_alignment(q, 511);
 	blk_set_default_limits(&q->limits);
-	q->nr_requests = BLKDEV_MAX_RQ;
+	q->nr_requests = BLKDEV_DEFAULT_RQ;
 
 	return q;
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 0f006cabfd91..2231fb0d4c35 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -606,7 +606,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	 * Additionally, this is a per-hw queue depth.
 	 */
 	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
-				   BLKDEV_MAX_RQ);
+				   BLKDEV_DEFAULT_RQ);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_sched_alloc_tags(q, hctx, i);
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 5181487db792..f6a1ce9e31f3 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -6,7 +6,7 @@
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
 
-#define MAX_SCHED_RQ (16 * BLKDEV_MAX_RQ)
+#define MAX_SCHED_RQ (16 * BLKDEV_DEFAULT_RQ)
 
 void blk_mq_sched_assign_ioc(struct request *rq);
 
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index e65c9d706f6f..bf60aebd0cfb 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -836,7 +836,7 @@ struct rbd_options {
 	u32 alloc_hint_flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
 };
 
-#define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_MAX_RQ
+#define RBD_QUEUE_DEPTH_DEFAULT	BLKDEV_DEFAULT_RQ
 #define RBD_ALLOC_SIZE_DEFAULT	(64 * 1024)
 #define RBD_LOCK_TIMEOUT_DEFAULT 0  /* no timeout */
 #define RBD_READ_ONLY_DEFAULT	false
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bd4086a6f28e..31cc41dfef6b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -12,7 +12,7 @@ struct blk_mq_tags;
 struct blk_flush_queue;
 
 #define BLKDEV_MIN_RQ	4
-#define BLKDEV_MAX_RQ	128	/* Default maximum */
+#define BLKDEV_DEFAULT_RQ	128
 
 typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
-- 
cgit v1.2.3


From e155b0c238b20f0a866f4334d292656665836c8a Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 5 Oct 2021 18:23:37 +0800
Subject: blk-mq: Use shared tags for shared sbitmap support

Currently we use separate sbitmap pairs and active_queues atomic_t for
shared sbitmap support.

However a full sets of static requests are used per HW queue, which is
quite wasteful, considering that the total number of requests usable at
any given time across all HW queues is limited by the shared sbitmap depth.

As such, it is considerably more memory efficient in the case of shared
sbitmap to allocate a set of static rqs per tag set or request queue, and
not per HW queue.

So replace the sbitmap pairs and active_queues atomic_t with a shared
tags per tagset and request queue, which will hold a set of shared static
rqs.

Since there is now no valid HW queue index to be passed to the blk_mq_ops
.init and .exit_request callbacks, pass an invalid index token. This
changes the semantics of the APIs, such that the callback would need to
validate the HW queue index before using it. Currently no user of shared
sbitmap actually uses the HW queue index (as would be expected).

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/1633429419-228500-13-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.c   |  82 +++++++++++++++++++--------------------
 block/blk-mq-tag.c     |  63 ++++++++++--------------------
 block/blk-mq-tag.h     |   6 +--
 block/blk-mq.c         | 101 +++++++++++++++++++++++++------------------------
 block/blk-mq.h         |   7 ++--
 include/linux/blk-mq.h |  15 ++++----
 include/linux/blkdev.h |   3 +-
 7 files changed, 125 insertions(+), 152 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index d1b56bb9ac64..428da4949d80 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -519,6 +519,11 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
 					  struct blk_mq_hw_ctx *hctx,
 					  unsigned int hctx_idx)
 {
+	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
+		hctx->sched_tags = q->shared_sbitmap_tags;
+		return 0;
+	}
+
 	hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
 						    q->nr_requests);
 
@@ -527,61 +532,54 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
 	return 0;
 }
 
+static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
+{
+	blk_mq_free_rq_map(queue->shared_sbitmap_tags);
+	queue->shared_sbitmap_tags = NULL;
+}
+
 /* called in queue's release handler, tagset has gone away */
-static void blk_mq_sched_tags_teardown(struct request_queue *q)
+static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
 {
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->sched_tags) {
-			blk_mq_free_rq_map(hctx->sched_tags, hctx->flags);
+			if (!blk_mq_is_sbitmap_shared(q->tag_set->flags))
+				blk_mq_free_rq_map(hctx->sched_tags);
 			hctx->sched_tags = NULL;
 		}
 	}
+
+	if (blk_mq_is_sbitmap_shared(flags))
+		blk_mq_exit_sched_shared_sbitmap(q);
 }
 
 static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
 {
 	struct blk_mq_tag_set *set = queue->tag_set;
-	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
-	struct blk_mq_hw_ctx *hctx;
-	int ret, i;
 
 	/*
 	 * Set initial depth at max so that we don't need to reallocate for
 	 * updating nr_requests.
 	 */
-	ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags,
-				  &queue->sched_breserved_tags,
-				  MAX_SCHED_RQ, set->reserved_tags,
-				  set->numa_node, alloc_policy);
-	if (ret)
-		return ret;
-
-	queue_for_each_hw_ctx(queue, hctx, i) {
-		hctx->sched_tags->bitmap_tags =
-					&queue->sched_bitmap_tags;
-		hctx->sched_tags->breserved_tags =
-					&queue->sched_breserved_tags;
-	}
+	queue->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
+						BLK_MQ_NO_HCTX_IDX,
+						MAX_SCHED_RQ);
+	if (!queue->shared_sbitmap_tags)
+		return -ENOMEM;
 
 	blk_mq_tag_update_sched_shared_sbitmap(queue);
 
 	return 0;
 }
 
-static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
-{
-	sbitmap_queue_free(&queue->sched_bitmap_tags);
-	sbitmap_queue_free(&queue->sched_breserved_tags);
-}
-
 int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 {
+	unsigned int i, flags = q->tag_set->flags;
 	struct blk_mq_hw_ctx *hctx;
 	struct elevator_queue *eq;
-	unsigned int i;
 	int ret;
 
 	if (!e) {
@@ -598,21 +596,21 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
 				   BLKDEV_DEFAULT_RQ);
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
+	if (blk_mq_is_sbitmap_shared(flags)) {
+		ret = blk_mq_init_sched_shared_sbitmap(q);
 		if (ret)
-			goto err_free_map_and_rqs;
+			return ret;
 	}
 
-	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
-		ret = blk_mq_init_sched_shared_sbitmap(q);
+	queue_for_each_hw_ctx(q, hctx, i) {
+		ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
 		if (ret)
 			goto err_free_map_and_rqs;
 	}
 
 	ret = e->ops.init_sched(q, e);
 	if (ret)
-		goto err_free_sbitmap;
+		goto err_free_map_and_rqs;
 
 	blk_mq_debugfs_register_sched(q);
 
@@ -632,12 +630,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	return 0;
 
-err_free_sbitmap:
-	if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
-		blk_mq_exit_sched_shared_sbitmap(q);
 err_free_map_and_rqs:
 	blk_mq_sched_free_rqs(q);
-	blk_mq_sched_tags_teardown(q);
+	blk_mq_sched_tags_teardown(q, flags);
+
 	q->elevator = NULL;
 	return ret;
 }
@@ -651,9 +647,15 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	queue_for_each_hw_ctx(q, hctx, i) {
-		if (hctx->sched_tags)
-			blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
+	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
+		blk_mq_free_rqs(q->tag_set, q->shared_sbitmap_tags,
+				BLK_MQ_NO_HCTX_IDX);
+	} else {
+		queue_for_each_hw_ctx(q, hctx, i) {
+			if (hctx->sched_tags)
+				blk_mq_free_rqs(q->tag_set,
+						hctx->sched_tags, i);
+		}
 	}
 }
 
@@ -674,8 +676,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 	blk_mq_debugfs_unregister_sched(q);
 	if (e->type->ops.exit_sched)
 		e->type->ops.exit_sched(e);
-	blk_mq_sched_tags_teardown(q);
-	if (blk_mq_is_sbitmap_shared(flags))
-		blk_mq_exit_sched_shared_sbitmap(q);
+	blk_mq_sched_tags_teardown(q, flags);
 	q->elevator = NULL;
 }
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index a0ecc6d88f84..0e10e8404bf0 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -26,11 +26,10 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
-		struct blk_mq_tag_set *set = q->tag_set;
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
 		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
-			atomic_inc(&set->active_queues_shared_sbitmap);
+			atomic_inc(&hctx->tags->active_queues);
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
 		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
@@ -57,14 +56,14 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
-	struct request_queue *q = hctx->queue;
-	struct blk_mq_tag_set *set = q->tag_set;
 
 	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
+		struct request_queue *q = hctx->queue;
+
 		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
 					&q->queue_flags))
 			return;
-		atomic_dec(&set->active_queues_shared_sbitmap);
+		atomic_dec(&tags->active_queues);
 	} else {
 		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
@@ -510,38 +509,10 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
 	return 0;
 }
 
-int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)
-{
-	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
-	int i, ret;
-
-	ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags,
-				  set->queue_depth, set->reserved_tags,
-				  set->numa_node, alloc_policy);
-	if (ret)
-		return ret;
-
-	for (i = 0; i < set->nr_hw_queues; i++) {
-		struct blk_mq_tags *tags = set->tags[i];
-
-		tags->bitmap_tags = &set->__bitmap_tags;
-		tags->breserved_tags = &set->__breserved_tags;
-	}
-
-	return 0;
-}
-
-void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
-{
-	sbitmap_queue_free(&set->__bitmap_tags);
-	sbitmap_queue_free(&set->__breserved_tags);
-}
-
 struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 				     unsigned int reserved_tags,
-				     int node, unsigned int flags)
+				     int node, int alloc_policy)
 {
-	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags);
 	struct blk_mq_tags *tags;
 
 	if (total_tags > BLK_MQ_TAG_MAX) {
@@ -557,9 +528,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	tags->nr_reserved_tags = reserved_tags;
 	spin_lock_init(&tags->lock);
 
-	if (blk_mq_is_sbitmap_shared(flags))
-		return tags;
-
 	if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
 		kfree(tags);
 		return NULL;
@@ -567,12 +535,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	return tags;
 }
 
-void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
+void blk_mq_free_tags(struct blk_mq_tags *tags)
 {
-	if (!blk_mq_is_sbitmap_shared(flags)) {
-		sbitmap_queue_free(tags->bitmap_tags);
-		sbitmap_queue_free(tags->breserved_tags);
-	}
+	sbitmap_queue_free(tags->bitmap_tags);
+	sbitmap_queue_free(tags->breserved_tags);
 	kfree(tags);
 }
 
@@ -603,6 +569,13 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 		if (tdepth > MAX_SCHED_RQ)
 			return -EINVAL;
 
+		/*
+		 * Only the sbitmap needs resizing since we allocated the max
+		 * initially.
+		 */
+		if (blk_mq_is_sbitmap_shared(set->flags))
+			return 0;
+
 		new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
 		if (!new)
 			return -ENOMEM;
@@ -623,12 +596,14 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 
 void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
 {
-	sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags);
+	struct blk_mq_tags *tags = set->shared_sbitmap_tags;
+
+	sbitmap_queue_resize(&tags->__bitmap_tags, size - set->reserved_tags);
 }
 
 void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q)
 {
-	sbitmap_queue_resize(&q->sched_bitmap_tags,
+	sbitmap_queue_resize(q->shared_sbitmap_tags->bitmap_tags,
 			     q->nr_requests - q->tag_set->reserved_tags);
 }
 
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index a9f5f1824819..e43f7589b96c 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -32,16 +32,14 @@ struct blk_mq_tags {
 
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
 					unsigned int reserved_tags,
-					int node, unsigned int flags);
-extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
+					int node, int alloc_policy);
+extern void blk_mq_free_tags(struct blk_mq_tags *tags);
 extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
 			       struct sbitmap_queue *breserved_tags,
 			       unsigned int queue_depth,
 			       unsigned int reserved,
 			       int node, int alloc_policy);
 
-extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set);
-extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
 extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
 extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 			   unsigned int tag);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c27591a04c4f..5537375f6400 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2353,7 +2353,10 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	struct blk_mq_tags *drv_tags;
 	struct page *page;
 
-	drv_tags = set->tags[hctx_idx];
+	if (blk_mq_is_sbitmap_shared(set->flags))
+		drv_tags = set->shared_sbitmap_tags;
+	else
+		drv_tags = set->tags[hctx_idx];
 
 	if (tags->static_rqs && set->ops->exit_request) {
 		int i;
@@ -2382,21 +2385,20 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	}
 }
 
-void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
+void blk_mq_free_rq_map(struct blk_mq_tags *tags)
 {
 	kfree(tags->rqs);
 	tags->rqs = NULL;
 	kfree(tags->static_rqs);
 	tags->static_rqs = NULL;
 
-	blk_mq_free_tags(tags, flags);
+	blk_mq_free_tags(tags);
 }
 
 static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					       unsigned int hctx_idx,
 					       unsigned int nr_tags,
-					       unsigned int reserved_tags,
-					       unsigned int flags)
+					       unsigned int reserved_tags)
 {
 	struct blk_mq_tags *tags;
 	int node;
@@ -2405,7 +2407,8 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 	if (node == NUMA_NO_NODE)
 		node = set->numa_node;
 
-	tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
+	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
+				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
 	if (!tags)
 		return NULL;
 
@@ -2413,7 +2416,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
 				 node);
 	if (!tags->rqs) {
-		blk_mq_free_tags(tags, flags);
+		blk_mq_free_tags(tags);
 		return NULL;
 	}
 
@@ -2422,7 +2425,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 					node);
 	if (!tags->static_rqs) {
 		kfree(tags->rqs);
-		blk_mq_free_tags(tags, flags);
+		blk_mq_free_tags(tags);
 		return NULL;
 	}
 
@@ -2864,14 +2867,13 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 	struct blk_mq_tags *tags;
 	int ret;
 
-	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags,
-				   set->flags);
+	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);
 	if (!tags)
 		return NULL;
 
 	ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth);
 	if (ret) {
-		blk_mq_free_rq_map(tags, set->flags);
+		blk_mq_free_rq_map(tags);
 		return NULL;
 	}
 
@@ -2881,6 +2883,12 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 				       int hctx_idx)
 {
+	if (blk_mq_is_sbitmap_shared(set->flags)) {
+		set->tags[hctx_idx] = set->shared_sbitmap_tags;
+
+		return true;
+	}
+
 	set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,
 						       set->queue_depth);
 
@@ -2891,14 +2899,21 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
 			     struct blk_mq_tags *tags,
 			     unsigned int hctx_idx)
 {
-	unsigned int flags = set->flags;
-
 	if (tags) {
 		blk_mq_free_rqs(set, tags, hctx_idx);
-		blk_mq_free_rq_map(tags, flags);
+		blk_mq_free_rq_map(tags);
 	}
 }
 
+static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
+				      unsigned int hctx_idx)
+{
+	if (!blk_mq_is_sbitmap_shared(set->flags))
+		blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);
+
+	set->tags[hctx_idx] = NULL;
+}
+
 static void blk_mq_map_swqueue(struct request_queue *q)
 {
 	unsigned int i, j, hctx_idx;
@@ -2976,10 +2991,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 			 * fallback in case of a new remap fails
 			 * allocation
 			 */
-			if (i && set->tags[i]) {
-				blk_mq_free_map_and_rqs(set, set->tags[i], i);
-				set->tags[i] = NULL;
-			}
+			if (i)
+				__blk_mq_free_map_and_rqs(set, i);
 
 			hctx->tags = NULL;
 			continue;
@@ -3275,8 +3288,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx = hctxs[j];
 
 		if (hctx) {
-			blk_mq_free_map_and_rqs(set, set->tags[j], j);
-			set->tags[j] = NULL;
+			__blk_mq_free_map_and_rqs(set, j);
 			blk_mq_exit_hctx(q, set, hctx, j);
 			hctxs[j] = NULL;
 		}
@@ -3363,6 +3375,14 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 {
 	int i;
 
+	if (blk_mq_is_sbitmap_shared(set->flags)) {
+		set->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
+						BLK_MQ_NO_HCTX_IDX,
+						set->queue_depth);
+		if (!set->shared_sbitmap_tags)
+			return -ENOMEM;
+	}
+
 	for (i = 0; i < set->nr_hw_queues; i++) {
 		if (!__blk_mq_alloc_map_and_rqs(set, i))
 			goto out_unwind;
@@ -3372,9 +3392,12 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 	return 0;
 
 out_unwind:
-	while (--i >= 0) {
-		blk_mq_free_map_and_rqs(set, set->tags[i], i);
-		set->tags[i] = NULL;
+	while (--i >= 0)
+		__blk_mq_free_map_and_rqs(set, i);
+
+	if (blk_mq_is_sbitmap_shared(set->flags)) {
+		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
+					BLK_MQ_NO_HCTX_IDX);
 	}
 
 	return -ENOMEM;
@@ -3555,25 +3578,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 	if (ret)
 		goto out_free_mq_map;
 
-	if (blk_mq_is_sbitmap_shared(set->flags)) {
-		atomic_set(&set->active_queues_shared_sbitmap, 0);
-
-		if (blk_mq_init_shared_sbitmap(set)) {
-			ret = -ENOMEM;
-			goto out_free_mq_rq_maps;
-		}
-	}
-
 	mutex_init(&set->tag_list_lock);
 	INIT_LIST_HEAD(&set->tag_list);
 
 	return 0;
 
-out_free_mq_rq_maps:
-	for (i = 0; i < set->nr_hw_queues; i++) {
-		blk_mq_free_map_and_rqs(set, set->tags[i], i);
-		set->tags[i] = NULL;
-	}
 out_free_mq_map:
 	for (i = 0; i < set->nr_maps; i++) {
 		kfree(set->map[i].mq_map);
@@ -3605,13 +3614,13 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
 	int i, j;
 
-	for (i = 0; i < set->nr_hw_queues; i++) {
-		blk_mq_free_map_and_rqs(set, set->tags[i], i);
-		set->tags[i] = NULL;
-	}
+	for (i = 0; i < set->nr_hw_queues; i++)
+		__blk_mq_free_map_and_rqs(set, i);
 
-	if (blk_mq_is_sbitmap_shared(set->flags))
-		blk_mq_exit_shared_sbitmap(set);
+	if (blk_mq_is_sbitmap_shared(set->flags)) {
+		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
+					BLK_MQ_NO_HCTX_IDX);
+	}
 
 	for (j = 0; j < set->nr_maps; j++) {
 		kfree(set->map[j].mq_map);
@@ -3649,12 +3658,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 		if (hctx->sched_tags) {
 			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
 						      nr, true);
-			if (blk_mq_is_sbitmap_shared(set->flags)) {
-				hctx->sched_tags->bitmap_tags =
-					&q->sched_bitmap_tags;
-				hctx->sched_tags->breserved_tags =
-					&q->sched_breserved_tags;
-			}
 		} else {
 			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
 						      false);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index bcb0ca89d37a..8824ae03215a 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -54,7 +54,7 @@ void blk_mq_put_rq_ref(struct request *rq);
  */
 void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 		     unsigned int hctx_idx);
-void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags);
+void blk_mq_free_rq_map(struct blk_mq_tags *tags);
 struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 				unsigned int hctx_idx, unsigned int depth);
 void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
@@ -330,17 +330,16 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 
 	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
-		struct blk_mq_tag_set *set = q->tag_set;
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return true;
-		users = atomic_read(&set->active_queues_shared_sbitmap);
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return true;
-		users = atomic_read(&hctx->tags->active_queues);
 	}
 
+	users = atomic_read(&hctx->tags->active_queues);
+
 	if (!users)
 		return true;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 31cc41dfef6b..faa20a19bfcc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -440,13 +440,11 @@ enum hctx_type {
  * @flags:	   Zero or more BLK_MQ_F_* flags.
  * @driver_data:   Pointer to data owned by the block driver that created this
  *		   tag set.
- * @active_queues_shared_sbitmap:
- * 		   number of active request queues per tag set.
- * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
- * @__breserved_tags:
- *		   A shared reserved tags sbitmap, used over all hctx's
  * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues
  *		   elements.
+ * @shared_sbitmap_tags:
+ *		   Shared sbitmap set of tags. Has @nr_hw_queues elements. If
+ *		   set, shared by all @tags.
  * @tag_list_lock: Serializes tag_list accesses.
  * @tag_list:	   List of the request queues that use this tag set. See also
  *		   request_queue.tag_set_list.
@@ -463,12 +461,11 @@ struct blk_mq_tag_set {
 	unsigned int		timeout;
 	unsigned int		flags;
 	void			*driver_data;
-	atomic_t		active_queues_shared_sbitmap;
 
-	struct sbitmap_queue	__bitmap_tags;
-	struct sbitmap_queue	__breserved_tags;
 	struct blk_mq_tags	**tags;
 
+	struct blk_mq_tags	*shared_sbitmap_tags;
+
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
 };
@@ -640,6 +637,8 @@ enum {
 	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
 		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
 
+#define BLK_MQ_NO_HCTX_IDX	(-1U)
+
 struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
 		struct lock_class_key *lkclass);
 #define blk_mq_alloc_disk(set, queuedata)				\
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0e960d74615e..cf92c13eb80e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -238,8 +238,7 @@ struct request_queue {
 
 	atomic_t		nr_active_requests_shared_sbitmap;
 
-	struct sbitmap_queue	sched_bitmap_tags;
-	struct sbitmap_queue	sched_breserved_tags;
+	struct blk_mq_tags	*shared_sbitmap_tags;
 
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
-- 
cgit v1.2.3


From 079a2e3e862548087041a1873bbffceb41a72a33 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 5 Oct 2021 18:23:39 +0800
Subject: blk-mq: Change shared sbitmap naming to shared tags

Now that shared sbitmap support really means shared tags, rename symbols
to match that.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1633429419-228500-15-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  2 +-
 block/blk-mq-sched.c   | 32 ++++++++++++++++----------------
 block/blk-mq-tag.c     | 18 +++++++++---------
 block/blk-mq-tag.h     |  4 ++--
 block/blk-mq.c         | 32 ++++++++++++++++----------------
 block/blk-mq.h         | 16 ++++++++--------
 block/elevator.c       |  2 +-
 include/linux/blk-mq.h |  8 ++++----
 include/linux/blkdev.h |  4 ++--
 9 files changed, 59 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0ed17547bfed..b08aed26e6bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -554,7 +554,7 @@ struct request_queue *blk_alloc_queue(int node_id)
 
 	q->node = node_id;
 
-	atomic_set(&q->nr_active_requests_shared_sbitmap, 0);
+	atomic_set(&q->nr_active_requests_shared_tags, 0);
 
 	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
 	INIT_WORK(&q->timeout_work, blk_timeout_work);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 428da4949d80..27312da7d638 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -519,8 +519,8 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
 					  struct blk_mq_hw_ctx *hctx,
 					  unsigned int hctx_idx)
 {
-	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
-		hctx->sched_tags = q->shared_sbitmap_tags;
+	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
+		hctx->sched_tags = q->sched_shared_tags;
 		return 0;
 	}
 
@@ -532,10 +532,10 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
 	return 0;
 }
 
-static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
+static void blk_mq_exit_sched_shared_tags(struct request_queue *queue)
 {
-	blk_mq_free_rq_map(queue->shared_sbitmap_tags);
-	queue->shared_sbitmap_tags = NULL;
+	blk_mq_free_rq_map(queue->sched_shared_tags);
+	queue->sched_shared_tags = NULL;
 }
 
 /* called in queue's release handler, tagset has gone away */
@@ -546,17 +546,17 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->sched_tags) {
-			if (!blk_mq_is_sbitmap_shared(q->tag_set->flags))
+			if (!blk_mq_is_shared_tags(q->tag_set->flags))
 				blk_mq_free_rq_map(hctx->sched_tags);
 			hctx->sched_tags = NULL;
 		}
 	}
 
-	if (blk_mq_is_sbitmap_shared(flags))
-		blk_mq_exit_sched_shared_sbitmap(q);
+	if (blk_mq_is_shared_tags(flags))
+		blk_mq_exit_sched_shared_tags(q);
 }
 
-static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
+static int blk_mq_init_sched_shared_tags(struct request_queue *queue)
 {
 	struct blk_mq_tag_set *set = queue->tag_set;
 
@@ -564,13 +564,13 @@ static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
 	 * Set initial depth at max so that we don't need to reallocate for
 	 * updating nr_requests.
 	 */
-	queue->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
+	queue->sched_shared_tags = blk_mq_alloc_map_and_rqs(set,
 						BLK_MQ_NO_HCTX_IDX,
 						MAX_SCHED_RQ);
-	if (!queue->shared_sbitmap_tags)
+	if (!queue->sched_shared_tags)
 		return -ENOMEM;
 
-	blk_mq_tag_update_sched_shared_sbitmap(queue);
+	blk_mq_tag_update_sched_shared_tags(queue);
 
 	return 0;
 }
@@ -596,8 +596,8 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
 				   BLKDEV_DEFAULT_RQ);
 
-	if (blk_mq_is_sbitmap_shared(flags)) {
-		ret = blk_mq_init_sched_shared_sbitmap(q);
+	if (blk_mq_is_shared_tags(flags)) {
+		ret = blk_mq_init_sched_shared_tags(q);
 		if (ret)
 			return ret;
 	}
@@ -647,8 +647,8 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
-	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
-		blk_mq_free_rqs(q->tag_set, q->shared_sbitmap_tags,
+	if (blk_mq_is_shared_tags(q->tag_set->flags)) {
+		blk_mq_free_rqs(q->tag_set, q->sched_shared_tags,
 				BLK_MQ_NO_HCTX_IDX);
 	} else {
 		queue_for_each_hw_ctx(q, hctx, i) {
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 211068a5f676..72a2724a4eee 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -24,7 +24,7 @@
  */
 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
-	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
+	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
@@ -57,19 +57,19 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
 
-	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
+	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
 		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
 					&q->queue_flags))
 			return;
-		atomic_dec(&tags->active_queues);
 	} else {
 		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
-		atomic_dec(&tags->active_queues);
 	}
 
+	atomic_dec(&tags->active_queues);
+
 	blk_mq_tag_wakeup_all(tags, false);
 }
 
@@ -557,7 +557,7 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 		 * Only the sbitmap needs resizing since we allocated the max
 		 * initially.
 		 */
-		if (blk_mq_is_sbitmap_shared(set->flags))
+		if (blk_mq_is_shared_tags(set->flags))
 			return 0;
 
 		new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
@@ -578,16 +578,16 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 	return 0;
 }
 
-void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
+void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set, unsigned int size)
 {
-	struct blk_mq_tags *tags = set->shared_sbitmap_tags;
+	struct blk_mq_tags *tags = set->shared_tags;
 
 	sbitmap_queue_resize(&tags->bitmap_tags, size - set->reserved_tags);
 }
 
-void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q)
+void blk_mq_tag_update_sched_shared_tags(struct request_queue *q)
 {
-	sbitmap_queue_resize(&q->shared_sbitmap_tags->bitmap_tags,
+	sbitmap_queue_resize(&q->sched_shared_tags->bitmap_tags,
 			     q->nr_requests - q->tag_set->reserved_tags);
 }
 
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 1052d69147ba..d8ce89fa1686 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -43,9 +43,9 @@ extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
-extern void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set,
+extern void blk_mq_tag_resize_shared_tags(struct blk_mq_tag_set *set,
 					     unsigned int size);
-extern void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q);
+extern void blk_mq_tag_update_sched_shared_tags(struct request_queue *q);
 
 extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
 void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e026e5ebe2c9..f7428e11b109 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2235,7 +2235,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		blk_insert_flush(rq);
 		blk_mq_run_hw_queue(data.hctx, true);
 	} else if (plug && (q->nr_hw_queues == 1 ||
-		   blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
+		   blk_mq_is_shared_tags(rq->mq_hctx->flags) ||
 		   q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
 		/*
 		 * Use plugging if we have a ->commit_rqs() hook as well, as
@@ -2353,8 +2353,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 	struct blk_mq_tags *drv_tags;
 	struct page *page;
 
-	if (blk_mq_is_sbitmap_shared(set->flags))
-		drv_tags = set->shared_sbitmap_tags;
+	if (blk_mq_is_shared_tags(set->flags))
+		drv_tags = set->shared_tags;
 	else
 		drv_tags = set->tags[hctx_idx];
 
@@ -2883,8 +2883,8 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
 				       int hctx_idx)
 {
-	if (blk_mq_is_sbitmap_shared(set->flags)) {
-		set->tags[hctx_idx] = set->shared_sbitmap_tags;
+	if (blk_mq_is_shared_tags(set->flags)) {
+		set->tags[hctx_idx] = set->shared_tags;
 
 		return true;
 	}
@@ -2908,7 +2908,7 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
 static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
 				      unsigned int hctx_idx)
 {
-	if (!blk_mq_is_sbitmap_shared(set->flags))
+	if (!blk_mq_is_shared_tags(set->flags))
 		blk_mq_free_map_and_rqs(set, set->tags[hctx_idx], hctx_idx);
 
 	set->tags[hctx_idx] = NULL;
@@ -3375,11 +3375,11 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
 {
 	int i;
 
-	if (blk_mq_is_sbitmap_shared(set->flags)) {
-		set->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
+	if (blk_mq_is_shared_tags(set->flags)) {
+		set->shared_tags = blk_mq_alloc_map_and_rqs(set,
 						BLK_MQ_NO_HCTX_IDX,
 						set->queue_depth);
-		if (!set->shared_sbitmap_tags)
+		if (!set->shared_tags)
 			return -ENOMEM;
 	}
 
@@ -3395,8 +3395,8 @@ out_unwind:
 	while (--i >= 0)
 		__blk_mq_free_map_and_rqs(set, i);
 
-	if (blk_mq_is_sbitmap_shared(set->flags)) {
-		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
+	if (blk_mq_is_shared_tags(set->flags)) {
+		blk_mq_free_map_and_rqs(set, set->shared_tags,
 					BLK_MQ_NO_HCTX_IDX);
 	}
 
@@ -3617,8 +3617,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 	for (i = 0; i < set->nr_hw_queues; i++)
 		__blk_mq_free_map_and_rqs(set, i);
 
-	if (blk_mq_is_sbitmap_shared(set->flags)) {
-		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
+	if (blk_mq_is_shared_tags(set->flags)) {
+		blk_mq_free_map_and_rqs(set, set->shared_tags,
 					BLK_MQ_NO_HCTX_IDX);
 	}
 
@@ -3669,11 +3669,11 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
 	}
 	if (!ret) {
 		q->nr_requests = nr;
-		if (blk_mq_is_sbitmap_shared(set->flags)) {
+		if (blk_mq_is_shared_tags(set->flags)) {
 			if (q->elevator)
-				blk_mq_tag_update_sched_shared_sbitmap(q);
+				blk_mq_tag_update_sched_shared_tags(q);
 			else
-				blk_mq_tag_resize_shared_sbitmap(set, nr);
+				blk_mq_tag_resize_shared_tags(set, nr);
 		}
 	}
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 8824ae03215a..171e8cdcff54 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -157,7 +157,7 @@ struct blk_mq_alloc_data {
 	struct blk_mq_hw_ctx *hctx;
 };
 
-static inline bool blk_mq_is_sbitmap_shared(unsigned int flags)
+static inline bool blk_mq_is_shared_tags(unsigned int flags)
 {
 	return flags & BLK_MQ_F_TAG_HCTX_SHARED;
 }
@@ -217,24 +217,24 @@ static inline int blk_mq_get_rq_budget_token(struct request *rq)
 
 static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
 {
-	if (blk_mq_is_sbitmap_shared(hctx->flags))
-		atomic_inc(&hctx->queue->nr_active_requests_shared_sbitmap);
+	if (blk_mq_is_shared_tags(hctx->flags))
+		atomic_inc(&hctx->queue->nr_active_requests_shared_tags);
 	else
 		atomic_inc(&hctx->nr_active);
 }
 
 static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
 {
-	if (blk_mq_is_sbitmap_shared(hctx->flags))
-		atomic_dec(&hctx->queue->nr_active_requests_shared_sbitmap);
+	if (blk_mq_is_shared_tags(hctx->flags))
+		atomic_dec(&hctx->queue->nr_active_requests_shared_tags);
 	else
 		atomic_dec(&hctx->nr_active);
 }
 
 static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
 {
-	if (blk_mq_is_sbitmap_shared(hctx->flags))
-		return atomic_read(&hctx->queue->nr_active_requests_shared_sbitmap);
+	if (blk_mq_is_shared_tags(hctx->flags))
+		return atomic_read(&hctx->queue->nr_active_requests_shared_tags);
 	return atomic_read(&hctx->nr_active);
 }
 static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
@@ -328,7 +328,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 	if (bt->sb.depth == 1)
 		return true;
 
-	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
+	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
diff --git a/block/elevator.c b/block/elevator.c
index 57be09cd7f6d..1f39f6e8ebb9 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -637,7 +637,7 @@ static struct elevator_type *elevator_get_default(struct request_queue *q)
 		return NULL;
 
 	if (q->nr_hw_queues != 1 &&
-			!blk_mq_is_sbitmap_shared(q->tag_set->flags))
+	    !blk_mq_is_shared_tags(q->tag_set->flags))
 		return NULL;
 
 	return elevator_get(q, "mq-deadline", false);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index faa20a19bfcc..75d75657df21 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -442,9 +442,9 @@ enum hctx_type {
  *		   tag set.
  * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues
  *		   elements.
- * @shared_sbitmap_tags:
- *		   Shared sbitmap set of tags. Has @nr_hw_queues elements. If
- *		   set, shared by all @tags.
+ * @shared_tags:
+ *		   Shared set of tags. Has @nr_hw_queues elements. If set,
+ *		   shared by all @tags.
  * @tag_list_lock: Serializes tag_list accesses.
  * @tag_list:	   List of the request queues that use this tag set. See also
  *		   request_queue.tag_set_list.
@@ -464,7 +464,7 @@ struct blk_mq_tag_set {
 
 	struct blk_mq_tags	**tags;
 
-	struct blk_mq_tags	*shared_sbitmap_tags;
+	struct blk_mq_tags	*shared_tags;
 
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cf92c13eb80e..b19172db7eef 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -236,9 +236,9 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 
-	atomic_t		nr_active_requests_shared_sbitmap;
+	atomic_t		nr_active_requests_shared_tags;
 
-	struct blk_mq_tags	*shared_sbitmap_tags;
+	struct blk_mq_tags	*sched_shared_tags;
 
 	struct list_head	icq_list;
 #ifdef CONFIG_BLK_CGROUP
-- 
cgit v1.2.3


From ba0ffdd8ce48ad7f7e85191cd29f9674caca3745 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 6 Oct 2021 12:01:07 -0600
Subject: block: bump max plugged deferred size from 16 to 32

Particularly for NVMe with efficient deferred submission for many
requests, there are nice benefits to be seen by bumping the default max
plug count from 16 to 32. This is especially true for virtualized setups,
where the submit part is more expensive. But can be noticed even on
native hardware.

Reduce the multiple queue factor from 4 to 2, since we're changing the
default size.

While changing it, move the defines into the block layer private header.
These aren't values that anyone outside of the block layer uses, or
should use.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 4 ++--
 block/blk.h            | 6 ++++++
 include/linux/blkdev.h | 2 --
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8e41d88fcb8a..d861a969b2e0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2152,14 +2152,14 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 }
 
 /*
- * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
  * queues. This is important for md arrays to benefit from merging
  * requests.
  */
 static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
 {
 	if (plug->multiple_queues)
-		return BLK_MAX_REQUEST_COUNT * 4;
+		return BLK_MAX_REQUEST_COUNT * 2;
 	return BLK_MAX_REQUEST_COUNT;
 }
 
diff --git a/block/blk.h b/block/blk.h
index 7aaccbc26db4..019c50a140ba 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -224,6 +224,12 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 void blk_account_io_start(struct request *req);
 void blk_account_io_done(struct request *req, u64 now);
 
+/*
+ * Plug flush limits
+ */
+#define BLK_MAX_REQUEST_COUNT	32
+#define BLK_PLUG_FLUSH_SIZE	(128 * 1024)
+
 /*
  * Internal elevator interface
  */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b19172db7eef..472b4ab007c6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -727,8 +727,6 @@ struct blk_plug {
 	bool multiple_queues;
 	bool nowait;
 };
-#define BLK_MAX_REQUEST_COUNT 16
-#define BLK_PLUG_FLUSH_SIZE (128 * 1024)
 
 struct blk_plug_cb;
 typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool);
-- 
cgit v1.2.3


From 47c122e35d7e43b14129ceb9ed3a7e67599978fa Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 6 Oct 2021 06:34:11 -0600
Subject: block: pre-allocate requests if plug is started and is a batch

The caller typically has a good (or even exact) idea of how many requests
it needs to submit. We can make the request/tag allocation a lot more
efficient if we just allocate N requests/tags upfront when we queue the
first bio from the batch.

Provide a new plug start helper that allows the caller to specify how many
IOs are expected. This sets plug->nr_ios, and we can use that for smarter
request allocation. The plug provides a holding spot for requests, and
request allocation will check it before calling into the normal request
allocation path.

The blk_finish_plug() is called, check if there are unused requests and
free them. This should not happen in normal operations. The exception is
if we get merging, then we may be left with requests that need freeing
when done.

This raises the per-core performance on my setup from ~5.8M to ~6.1M
IOPS.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 47 +++++++++++++++++++--------------
 block/blk-mq.c         | 70 +++++++++++++++++++++++++++++++++++++++++---------
 block/blk-mq.h         |  5 ++++
 include/linux/blk-mq.h |  5 +++-
 include/linux/blkdev.h | 15 ++++++++++-
 5 files changed, 109 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3b5ee3f7cc1e..e25a1c3f8b76 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1632,6 +1632,31 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
 
+void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
+{
+	struct task_struct *tsk = current;
+
+	/*
+	 * If this is a nested plug, don't actually assign it.
+	 */
+	if (tsk->plug)
+		return;
+
+	INIT_LIST_HEAD(&plug->mq_list);
+	plug->cached_rq = NULL;
+	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
+	plug->rq_count = 0;
+	plug->multiple_queues = false;
+	plug->nowait = false;
+	INIT_LIST_HEAD(&plug->cb_list);
+
+	/*
+	 * Store ordering should not be needed here, since a potential
+	 * preempt will imply a full memory barrier
+	 */
+	tsk->plug = plug;
+}
+
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:	The &struct blk_plug that needs to be initialized
@@ -1657,25 +1682,7 @@ EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
  */
 void blk_start_plug(struct blk_plug *plug)
 {
-	struct task_struct *tsk = current;
-
-	/*
-	 * If this is a nested plug, don't actually assign it.
-	 */
-	if (tsk->plug)
-		return;
-
-	INIT_LIST_HEAD(&plug->mq_list);
-	INIT_LIST_HEAD(&plug->cb_list);
-	plug->rq_count = 0;
-	plug->multiple_queues = false;
-	plug->nowait = false;
-
-	/*
-	 * Store ordering should not be needed here, since a potential
-	 * preempt will imply a full memory barrier
-	 */
-	tsk->plug = plug;
+	blk_start_plug_nr_ios(plug, 1);
 }
 EXPORT_SYMBOL(blk_start_plug);
 
@@ -1727,6 +1734,8 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 
 	if (!list_empty(&plug->mq_list))
 		blk_mq_flush_plug_list(plug, from_schedule);
+	if (unlikely(!from_schedule && plug->cached_rq))
+		blk_mq_free_plug_rqs(plug);
 }
 
 /**
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d861a969b2e0..d9f14d3c2b8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -359,6 +359,7 @@ static struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data)
 	struct request_queue *q = data->q;
 	struct elevator_queue *e = q->elevator;
 	u64 alloc_time_ns = 0;
+	struct request *rq;
 	unsigned int tag;
 
 	/* alloc_time includes depth and tag waits */
@@ -392,10 +393,21 @@ retry:
 	 * case just retry the hctx assignment and tag allocation as CPU hotplug
 	 * should have migrated us to an online CPU by now.
 	 */
-	tag = blk_mq_get_tag(data);
-	if (tag == BLK_MQ_NO_TAG) {
+	do {
+		tag = blk_mq_get_tag(data);
+		if (tag != BLK_MQ_NO_TAG) {
+			rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+			if (!--data->nr_tags)
+				return rq;
+			if (e || data->hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+				return rq;
+			rq->rq_next = *data->cached_rq;
+			*data->cached_rq = rq;
+			data->flags |= BLK_MQ_REQ_NOWAIT;
+			continue;
+		}
 		if (data->flags & BLK_MQ_REQ_NOWAIT)
-			return NULL;
+			break;
 
 		/*
 		 * Give up the CPU and sleep for a random short time to ensure
@@ -404,8 +416,15 @@ retry:
 		 */
 		msleep(3);
 		goto retry;
+	} while (1);
+
+	if (data->cached_rq) {
+		rq = *data->cached_rq;
+		*data->cached_rq = rq->rq_next;
+		return rq;
 	}
-	return blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
+
+	return NULL;
 }
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
@@ -415,6 +434,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 		.q		= q,
 		.flags		= flags,
 		.cmd_flags	= op,
+		.nr_tags	= 1,
 	};
 	struct request *rq;
 	int ret;
@@ -443,6 +463,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 		.q		= q,
 		.flags		= flags,
 		.cmd_flags	= op,
+		.nr_tags	= 1,
 	};
 	u64 alloc_time_ns = 0;
 	unsigned int cpu;
@@ -544,6 +565,18 @@ void blk_mq_free_request(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
+void blk_mq_free_plug_rqs(struct blk_plug *plug)
+{
+	while (plug->cached_rq) {
+		struct request *rq;
+
+		rq = plug->cached_rq;
+		plug->cached_rq = rq->rq_next;
+		percpu_ref_get(&rq->q->q_usage_counter);
+		blk_mq_free_request(rq);
+	}
+}
+
 inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 {
 	u64 now = 0;
@@ -2185,6 +2218,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 	const int is_flush_fua = op_is_flush(bio->bi_opf);
 	struct blk_mq_alloc_data data = {
 		.q		= q,
+		.nr_tags	= 1,
 	};
 	struct request *rq;
 	struct blk_plug *plug;
@@ -2211,13 +2245,26 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 
 	hipri = bio->bi_opf & REQ_HIPRI;
 
-	data.cmd_flags = bio->bi_opf;
-	rq = __blk_mq_alloc_request(&data);
-	if (unlikely(!rq)) {
-		rq_qos_cleanup(q, bio);
-		if (bio->bi_opf & REQ_NOWAIT)
-			bio_wouldblock_error(bio);
-		goto queue_exit;
+	plug = blk_mq_plug(q, bio);
+	if (plug && plug->cached_rq) {
+		rq = plug->cached_rq;
+		plug->cached_rq = rq->rq_next;
+		INIT_LIST_HEAD(&rq->queuelist);
+		data.hctx = rq->mq_hctx;
+	} else {
+		data.cmd_flags = bio->bi_opf;
+		if (plug) {
+			data.nr_tags = plug->nr_ios;
+			plug->nr_ios = 1;
+			data.cached_rq = &plug->cached_rq;
+		}
+		rq = __blk_mq_alloc_request(&data);
+		if (unlikely(!rq)) {
+			rq_qos_cleanup(q, bio);
+			if (bio->bi_opf & REQ_NOWAIT)
+				bio_wouldblock_error(bio);
+			goto queue_exit;
+		}
 	}
 
 	trace_block_getrq(bio);
@@ -2236,7 +2283,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		return BLK_QC_T_NONE;
 	}
 
-	plug = blk_mq_plug(q, bio);
 	if (unlikely(is_flush_fua)) {
 		/* Bypass scheduler for flush requests */
 		blk_insert_flush(rq);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 171e8cdcff54..5da970bb8865 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -125,6 +125,7 @@ extern int __blk_mq_register_dev(struct device *dev, struct request_queue *q);
 extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
+void blk_mq_free_plug_rqs(struct blk_plug *plug);
 
 void blk_mq_release(struct request_queue *q);
 
@@ -152,6 +153,10 @@ struct blk_mq_alloc_data {
 	unsigned int shallow_depth;
 	unsigned int cmd_flags;
 
+	/* allocate multiple requests/tags in one go */
+	unsigned int nr_tags;
+	struct request **cached_rq;
+
 	/* input & output parameter */
 	struct blk_mq_ctx *ctx;
 	struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 75d75657df21..0e941f217578 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -90,7 +90,10 @@ struct request {
 	struct bio *bio;
 	struct bio *biotail;
 
-	struct list_head queuelist;
+	union {
+		struct list_head queuelist;
+		struct request *rq_next;
+	};
 
 	/*
 	 * The hash is used inside the scheduler, and killed once the
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 472b4ab007c6..17705c970d7e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -722,10 +722,17 @@ extern void blk_set_queue_dying(struct request_queue *);
  */
 struct blk_plug {
 	struct list_head mq_list; /* blk-mq requests */
-	struct list_head cb_list; /* md requires an unplug callback */
+
+	/* if ios_left is > 1, we can batch tag/rq allocations */
+	struct request *cached_rq;
+	unsigned short nr_ios;
+
 	unsigned short rq_count;
+
 	bool multiple_queues;
 	bool nowait;
+
+	struct list_head cb_list; /* md requires an unplug callback */
 };
 
 struct blk_plug_cb;
@@ -738,6 +745,7 @@ struct blk_plug_cb {
 extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
 					     void *data, int size);
 extern void blk_start_plug(struct blk_plug *);
+extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
 extern void blk_finish_plug(struct blk_plug *);
 extern void blk_flush_plug_list(struct blk_plug *, bool);
 
@@ -772,6 +780,11 @@ long nr_blockdev_pages(void);
 struct blk_plug {
 };
 
+static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
+					 unsigned short nr_ios)
+{
+}
+
 static inline void blk_start_plug(struct blk_plug *plug)
 {
 }
-- 
cgit v1.2.3


From 84b8514b46b417e299746b1297d3d0899e8539f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 12:44:49 +0200
Subject: block: move the *blkdev_ioctl declarations out of blkdev.h

These are only used inside of block/.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012104450.659013-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h           | 4 ++++
 include/linux/genhd.h | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 019c50a140ba..eadb2a640a69 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -405,6 +405,10 @@ static inline void bio_clear_hipri(struct bio *bio)
 	bio->bi_opf &= ~REQ_HIPRI;
 }
 
+int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
+		unsigned long arg);
+long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
+
 extern const struct address_space_operations def_blk_aops;
 
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0b48a0cf4262..cd4038fd5743 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -281,10 +281,6 @@ bool bdev_check_media_change(struct block_device *bdev);
 int __invalidate_device(struct block_device *bdev, bool kill_dirty);
 void set_capacity(struct gendisk *disk, sector_t size);
 
-/* for drivers/char/raw.c: */
-int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
 void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-- 
cgit v1.2.3


From 9e8c0d0d4d21d2c2c60132e2c18a73d08a230b42 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:17:57 +0200
Subject: block: remove BIO_BUG_ON

BIO_DEBUG is always defined, so just switch the two instances to use
BUG_ON directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 4 ++--
 include/linux/bio.h | 8 --------
 2 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index a6fb6a0b4295..35b875563c8b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -156,7 +156,7 @@ out:
 
 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
 {
-	BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
+	BUG_ON(nr_vecs > BIO_MAX_VECS);
 
 	if (nr_vecs == BIO_MAX_VECS)
 		mempool_free(bv, pool);
@@ -677,7 +677,7 @@ static void bio_alloc_cache_destroy(struct bio_set *bs)
 void bio_put(struct bio *bio)
 {
 	if (unlikely(bio_flagged(bio, BIO_REFFED))) {
-		BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
+		BUG_ON(!atomic_read(&bio->__bi_cnt));
 		if (!atomic_dec_and_test(&bio->__bi_cnt))
 			return;
 	}
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 00952e92eae1..65a356fa7110 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -11,14 +11,6 @@
 #include <linux/blk_types.h>
 #include <linux/uio.h>
 
-#define BIO_DEBUG
-
-#ifdef BIO_DEBUG
-#define BIO_BUG_ON	BUG_ON
-#else
-#define BIO_BUG_ON
-#endif
-
 #define BIO_MAX_VECS		256U
 
 static inline unsigned int bio_max_segs(unsigned int nr_segs)
-- 
cgit v1.2.3


From 11d9cab1ca6ed08747c6c5a274c6a8e8307aefbc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:17:58 +0200
Subject: block: don't include <linux/ioprio.h> in <linux/bio.h>

bio.h doesn't need any of the definitions from ioprio.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 65a356fa7110..ae94794d07c9 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -6,7 +6,6 @@
 #define __LINUX_BIO_H
 
 #include <linux/mempool.h>
-#include <linux/ioprio.h>
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
 #include <linux/blk_types.h>
 #include <linux/uio.h>
-- 
cgit v1.2.3


From 8addffd657a956944c1b8a74a1c9aabcfc5b4530 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:17:59 +0200
Subject: block: move bio_mergeable out of bio.h

bio_mergeable is only needed by I/O schedulers, so move it to
blk-mq-sched.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.h | 5 +++++
 include/linux/bio.h  | 8 --------
 2 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index c97b816c3800..fe252278ed9a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -37,6 +37,11 @@ static inline void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
 		__blk_mq_sched_restart(hctx);
 }
 
+static inline bool bio_mergeable(struct bio *bio)
+{
+	return !(bio->bi_opf & REQ_NOMERGE_FLAGS);
+}
+
 static inline bool
 blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ae94794d07c9..ec255f282994 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -69,14 +69,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio)
 	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
 }
 
-static inline bool bio_mergeable(struct bio *bio)
-{
-	if (bio->bi_opf & REQ_NOMERGE_FLAGS)
-		return false;
-
-	return true;
-}
-
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
 	if (bio_has_data(bio))
-- 
cgit v1.2.3


From b6559d8f9fdd7f0e139161cffea2645bd8d084c6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:18:00 +0200
Subject: block: fold bio_cur_bytes into blk_rq_cur_bytes

Fold bio_cur_bytes into the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h    | 8 --------
 include/linux/blk-mq.h | 6 +++++-
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index ec255f282994..cffd5eba401e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -69,14 +69,6 @@ static inline bool bio_no_advance_iter(const struct bio *bio)
 	       bio_op(bio) == REQ_OP_WRITE_ZEROES;
 }
 
-static inline unsigned int bio_cur_bytes(struct bio *bio)
-{
-	if (bio_has_data(bio))
-		return bio_iovec(bio).bv_len;
-	else /* dataless requests such as discard */
-		return bio->bi_iter.bi_size;
-}
-
 static inline void *bio_data(struct bio *bio)
 {
 	if (bio_has_data(bio))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0e941f217578..2219e9277118 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -927,7 +927,11 @@ static inline unsigned int blk_rq_bytes(const struct request *rq)
 
 static inline int blk_rq_cur_bytes(const struct request *rq)
 {
-	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
+	if (!rq->bio)
+		return 0;
+	if (!bio_has_data(rq->bio))	/* dataless requests such as discard */
+		return rq->bio->bi_iter.bi_size;
+	return bio_iovec(rq->bio).bv_len;
 }
 
 unsigned int blk_rq_err_bytes(const struct request *rq);
-- 
cgit v1.2.3


From 9a6083becbe113ed1e28059ce659dc8ae71b33c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:18:01 +0200
Subject: block: move bio_full out of bio.h

bio_full is only used in bio.c, so move it there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 17 +++++++++++++++++
 include/linux/bio.h | 19 -------------------
 2 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 35b875563c8b..7e2899071de2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -772,6 +772,23 @@ const char *bio_devname(struct bio *bio, char *buf)
 }
 EXPORT_SYMBOL(bio_devname);
 
+/**
+ * bio_full - check if the bio is full
+ * @bio:	bio to check
+ * @len:	length of one segment to be added
+ *
+ * Return true if @bio is full and one segment with @len bytes can't be
+ * added to the bio, otherwise return false
+ */
+static inline bool bio_full(struct bio *bio, unsigned len)
+{
+	if (bio->bi_vcnt >= bio->bi_max_vecs)
+		return true;
+	if (bio->bi_iter.bi_size > UINT_MAX - len)
+		return true;
+	return false;
+}
+
 static inline bool page_is_mergeable(const struct bio_vec *bv,
 		struct page *page, unsigned int len, unsigned int off,
 		bool *same_page)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index cffd5eba401e..2ffc7c768091 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -77,25 +77,6 @@ static inline void *bio_data(struct bio *bio)
 	return NULL;
 }
 
-/**
- * bio_full - check if the bio is full
- * @bio:	bio to check
- * @len:	length of one segment to be added
- *
- * Return true if @bio is full and one segment with @len bytes can't be
- * added to the bio, otherwise return false
- */
-static inline bool bio_full(struct bio *bio, unsigned len)
-{
-	if (bio->bi_vcnt >= bio->bi_max_vecs)
-		return true;
-
-	if (bio->bi_iter.bi_size > UINT_MAX - len)
-		return true;
-
-	return false;
-}
-
 static inline bool bio_next_segment(const struct bio *bio,
 				    struct bvec_iter_all *iter)
 {
-- 
cgit v1.2.3


From 9774b39175fe3606ce2a836a47134c53b75681c9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:18:02 +0200
Subject: block: mark __bio_try_merge_page static

Mark __bio_try_merge_page static and move it up a bit to avoid the need
for a forward declaration.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 77 ++++++++++++++++++++++++++---------------------------
 include/linux/bio.h |  2 --
 2 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 7e2899071de2..c89e402356a5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -808,6 +808,44 @@ static inline bool page_is_mergeable(const struct bio_vec *bv,
 	return (bv->bv_page + bv_end / PAGE_SIZE) == (page + off / PAGE_SIZE);
 }
 
+/**
+ * __bio_try_merge_page - try appending data to an existing bvec.
+ * @bio: destination bio
+ * @page: start page to add
+ * @len: length of the data to add
+ * @off: offset of the data relative to @page
+ * @same_page: return if the segment has been merged inside the same page
+ *
+ * Try to add the data at @page + @off to the last bvec of @bio.  This is a
+ * useful optimisation for file systems with a block size smaller than the
+ * page size.
+ *
+ * Warn if (@len, @off) crosses pages in case that @same_page is true.
+ *
+ * Return %true on success or %false on failure.
+ */
+static bool __bio_try_merge_page(struct bio *bio, struct page *page,
+		unsigned int len, unsigned int off, bool *same_page)
+{
+	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
+		return false;
+
+	if (bio->bi_vcnt > 0) {
+		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
+
+		if (page_is_mergeable(bv, page, len, off, same_page)) {
+			if (bio->bi_iter.bi_size > UINT_MAX - len) {
+				*same_page = false;
+				return false;
+			}
+			bv->bv_len += len;
+			bio->bi_iter.bi_size += len;
+			return true;
+		}
+	}
+	return false;
+}
+
 /*
  * Try to merge a page into a segment, while obeying the hardware segment
  * size limit.  This is not for normal read/write bios, but for passthrough
@@ -939,45 +977,6 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL_GPL(bio_add_zone_append_page);
 
-/**
- * __bio_try_merge_page - try appending data to an existing bvec.
- * @bio: destination bio
- * @page: start page to add
- * @len: length of the data to add
- * @off: offset of the data relative to @page
- * @same_page: return if the segment has been merged inside the same page
- *
- * Try to add the data at @page + @off to the last bvec of @bio.  This is a
- * useful optimisation for file systems with a block size smaller than the
- * page size.
- *
- * Warn if (@len, @off) crosses pages in case that @same_page is true.
- *
- * Return %true on success or %false on failure.
- */
-bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off, bool *same_page)
-{
-	if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
-		return false;
-
-	if (bio->bi_vcnt > 0) {
-		struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
-
-		if (page_is_mergeable(bv, page, len, off, same_page)) {
-			if (bio->bi_iter.bi_size > UINT_MAX - len) {
-				*same_page = false;
-				return false;
-			}
-			bv->bv_len += len;
-			bio->bi_iter.bi_size += len;
-			return true;
-		}
-	}
-	return false;
-}
-EXPORT_SYMBOL_GPL(__bio_try_merge_page);
-
 /**
  * __bio_add_page - add page(s) to a bio in a new segment
  * @bio: destination bio
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2ffc7c768091..faa0a2fabaf0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -425,8 +425,6 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 int bio_add_zone_append_page(struct bio *bio, struct page *page,
 			     unsigned int len, unsigned int offset);
-bool __bio_try_merge_page(struct bio *bio, struct page *page,
-		unsigned int len, unsigned int off, bool *same_page);
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
-- 
cgit v1.2.3


From ff18d77b5f0cf3e25aa13741eed4d788a13c04d7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:18:03 +0200
Subject: block: move bio_get_{first,last}_bvec out of bio.h

bio_get_first_bvec and bio_get_last_bvec are only used in blk-merge.c,
so move them there.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c   | 31 +++++++++++++++++++++++++++++++
 include/linux/bio.h | 31 -------------------------------
 2 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index bf7aedaad8f8..14ce19607cd8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -15,6 +15,37 @@
 #include "blk-rq-qos.h"
 #include "blk-throttle.h"
 
+static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
+}
+
+static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
+{
+	struct bvec_iter iter = bio->bi_iter;
+	int idx;
+
+	bio_get_first_bvec(bio, bv);
+	if (bv->bv_len == bio->bi_iter.bi_size)
+		return;		/* this bio only has a single bvec */
+
+	bio_advance_iter(bio, &iter, iter.bi_size);
+
+	if (!iter.bi_bvec_done)
+		idx = iter.bi_idx - 1;
+	else	/* in the middle of bvec */
+		idx = iter.bi_idx;
+
+	*bv = bio->bi_io_vec[idx];
+
+	/*
+	 * iter.bi_bvec_done records actual length of the last bvec
+	 * if this bio ends in the middle of one io vector
+	 */
+	if (iter.bi_bvec_done)
+		bv->bv_len = iter.bi_bvec_done;
+}
+
 static inline bool bio_will_gap(struct request_queue *q,
 		struct request *prev_rq, struct bio *prev, struct bio *next)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index faa0a2fabaf0..4c5106f2ed57 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -221,37 +221,6 @@ static inline void bio_clear_flag(struct bio *bio, unsigned int bit)
 	bio->bi_flags &= ~(1U << bit);
 }
 
-static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
-{
-	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
-}
-
-static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
-{
-	struct bvec_iter iter = bio->bi_iter;
-	int idx;
-
-	bio_get_first_bvec(bio, bv);
-	if (bv->bv_len == bio->bi_iter.bi_size)
-		return;		/* this bio only has a single bvec */
-
-	bio_advance_iter(bio, &iter, iter.bi_size);
-
-	if (!iter.bi_bvec_done)
-		idx = iter.bi_idx - 1;
-	else	/* in the middle of bvec */
-		idx = iter.bi_idx;
-
-	*bv = bio->bi_io_vec[idx];
-
-	/*
-	 * iter.bi_bvec_done records actual length of the last bvec
-	 * if this bio ends in the middle of one io vector
-	 */
-	if (iter.bi_bvec_done)
-		bv->bv_len = iter.bi_bvec_done;
-}
-
 static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
 {
 	WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
-- 
cgit v1.2.3


From 4f7ab09a1ca0bc955635705c359ab3021b405fd0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 18:18:04 +0200
Subject: block: mark bio_truncate static

bio_truncate is only used in bio.c, so mark it static.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211012161804.991559-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 2 +-
 include/linux/bio.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index c89e402356a5..d5120451c36a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -546,7 +546,7 @@ EXPORT_SYMBOL(zero_fill_bio);
  *   REQ_OP_READ, zero the truncated part. This function should only
  *   be used for handling corner cases, such as bio eod.
  */
-void bio_truncate(struct bio *bio, unsigned new_size)
+static void bio_truncate(struct bio *bio, unsigned new_size)
 {
 	struct bio_vec bv;
 	struct bvec_iter iter;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4c5106f2ed57..d8d27742a75f 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -405,7 +405,6 @@ extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
 			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
 extern void bio_free_pages(struct bio *bio);
-void bio_truncate(struct bio *bio, unsigned new_size);
 void guard_bio_eod(struct bio *bio);
 void zero_fill_bio(struct bio *bio);
 
-- 
cgit v1.2.3


From 9672b0d43782047b1825a96bafee1b6aefa35bc2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 9 Oct 2021 13:02:23 -0600
Subject: sbitmap: add __sbitmap_queue_get_batch()

The block layer tag allocation batching still calls into sbitmap to get
each tag, but we can improve on that. Add __sbitmap_queue_get_batch(),
which returns a mask of tags all at once, along with an offset for
those tags.

An example return would be 0xff, where bits 0..7 are set, with
tag_offset == 128. The valid tags in this case would be 128..135.

A batch is specific to an individual sbitmap_map, hence it cannot be
larger than that. The requested number of tags is automatically reduced
to the max that can be satisfied with a single map.

On failure, 0 is returned. Caller should fall back to single tag
allocation at that point/

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 13 +++++++++++++
 lib/sbitmap.c           | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 2713e689ad66..e30b56023ead 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -426,6 +426,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth);
  */
 int __sbitmap_queue_get(struct sbitmap_queue *sbq);
 
+/**
+ * __sbitmap_queue_get_batch() - Try to allocate a batch of free bits
+ * @sbq: Bitmap queue to allocate from.
+ * @nr_tags: number of tags requested
+ * @offset: offset to add to returned bits
+ *
+ * Return: Mask of allocated tags, 0 if none are found. Each tag allocated is
+ * a bit in the mask returned, and the caller must add @offset to the value to
+ * get the absolute tag value.
+ */
+unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
+					unsigned int *offset);
+
 /**
  * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct
  * sbitmap_queue, limiting the depth used from each word, with preemption
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index b25db9be938a..f398e0ae548e 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -489,6 +489,57 @@ int __sbitmap_queue_get(struct sbitmap_queue *sbq)
 }
 EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
 
+unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
+					unsigned int *offset)
+{
+	struct sbitmap *sb = &sbq->sb;
+	unsigned int hint, depth;
+	unsigned long index, nr;
+	int i;
+
+	if (unlikely(sb->round_robin))
+		return 0;
+
+	depth = READ_ONCE(sb->depth);
+	hint = update_alloc_hint_before_get(sb, depth);
+
+	index = SB_NR_TO_INDEX(sb, hint);
+
+	for (i = 0; i < sb->map_nr; i++) {
+		struct sbitmap_word *map = &sb->map[index];
+		unsigned long get_mask;
+
+		sbitmap_deferred_clear(map);
+		if (map->word == (1UL << (map->depth - 1)) - 1)
+			continue;
+
+		nr = find_first_zero_bit(&map->word, map->depth);
+		if (nr + nr_tags <= map->depth) {
+			atomic_long_t *ptr = (atomic_long_t *) &map->word;
+			int map_tags = min_t(int, nr_tags, map->depth);
+			unsigned long val, ret;
+
+			get_mask = ((1UL << map_tags) - 1) << nr;
+			do {
+				val = READ_ONCE(map->word);
+				ret = atomic_long_cmpxchg(ptr, val, get_mask | val);
+			} while (ret != val);
+			get_mask = (get_mask & ~ret) >> nr;
+			if (get_mask) {
+				*offset = nr + (index << sb->shift);
+				update_alloc_hint_after_get(sb, depth, hint,
+							*offset + map_tags - 1);
+				return get_mask;
+			}
+		}
+		/* Jump to next index. */
+		if (++index >= sb->map_nr)
+			index = 0;
+	}
+
+	return 0;
+}
+
 int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq,
 				unsigned int shallow_depth)
 {
-- 
cgit v1.2.3


From f70299f0d58e0e21f7f5f5ab27e601e8d3f0373e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:15 +0200
Subject: blk-mq: factor out a blk_qc_to_hctx helper

Add a helper to get the hctx from a request_queue and cookie, and fold
the blk_qc_t_to_queue_num helper into it as no other callers are left.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-6-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            | 8 +++++++-
 include/linux/blk_types.h | 5 -----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 97b911866de1..35b2ab0a373a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -65,6 +65,12 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
 	return bucket;
 }
 
+static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
+		blk_qc_t qc)
+{
+	return q->queue_hw_ctx[(qc & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT];
+}
+
 /*
  * Check if any of the ctx, dispatch list or elevator
  * have pending work in this hardware queue.
@@ -4071,7 +4077,7 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 	if (current->plug)
 		blk_flush_plug_list(current->plug, false);
 
-	hctx = q->queue_hw_ctx[blk_qc_t_to_queue_num(cookie)];
+	hctx = blk_qc_to_hctx(q, cookie);
 
 	/*
 	 * If we sleep, have the caller restart the poll loop to reset
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 3b967053e9f5..000351c5312a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -505,11 +505,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie)
 	return cookie != BLK_QC_T_NONE;
 }
 
-static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
-{
-	return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
-}
-
 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 {
 	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
-- 
cgit v1.2.3


From efbabbe121f96d4b1a98a2c2ef5d2e8f7fb41c87 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:17 +0200
Subject: blk-mq: remove blk_qc_t_to_tag and blk_qc_t_is_internal

Merge both functions into their only caller to keep the blk-mq tag to
blk_qc_t mapping as private as possible in blk-mq.c.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            |  8 +++++---
 include/linux/blk_types.h | 10 ----------
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 636773056874..24a10c973f79 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -74,9 +74,11 @@ static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
 static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
 		blk_qc_t qc)
 {
-	if (blk_qc_t_is_internal(qc))
-		return blk_mq_tag_to_rq(hctx->sched_tags, blk_qc_t_to_tag(qc));
-	return blk_mq_tag_to_rq(hctx->tags, blk_qc_t_to_tag(qc));
+	unsigned int tag = qc & ((1U << BLK_QC_T_SHIFT) - 1);
+
+	if (qc & BLK_QC_T_INTERNAL)
+		return blk_mq_tag_to_rq(hctx->sched_tags, tag);
+	return blk_mq_tag_to_rq(hctx->tags, tag);
 }
 
 /*
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 000351c5312a..fb7c1477617b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -505,16 +505,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie)
 	return cookie != BLK_QC_T_NONE;
 }
 
-static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
-{
-	return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
-}
-
-static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
-{
-	return (cookie & BLK_QC_T_INTERNAL) != 0;
-}
-
 struct blk_rq_stat {
 	u64 mean;
 	u64 min;
-- 
cgit v1.2.3


From 28a1ae6b9daba6ac65700eeb38479bd6fadec089 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:18 +0200
Subject: blk-mq: remove blk_qc_t_valid

Move the trivial check into the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-9-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            | 2 +-
 include/linux/blk_types.h | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 24a10c973f79..7d0d947921a6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4098,7 +4098,7 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
  */
 int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 {
-	if (!blk_qc_t_valid(cookie) ||
+	if (cookie == BLK_QC_T_NONE ||
 	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
 		return 0;
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fb7c1477617b..5017ba8fc539 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -500,11 +500,6 @@ typedef unsigned int blk_qc_t;
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
-static inline bool blk_qc_t_valid(blk_qc_t cookie)
-{
-	return cookie != BLK_QC_T_NONE;
-}
-
 struct blk_rq_stat {
 	u64 mean;
 	u64 min;
-- 
cgit v1.2.3


From ef99b2d37666b7a600baab9e1c4944436652b0a2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:19 +0200
Subject: block: replace the spin argument to blk_iopoll with a flags argument

Switch the boolean spin argument to blk_poll to passing a set of flags
instead.  This will allow to control polling behavior in a more fine
grained way.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-10-hch@lst.de
[axboe: adapt to changed io_uring iopoll]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-exec.c       |  2 +-
 block/blk-mq.c         | 17 +++++++----------
 block/fops.c           |  8 ++++----
 fs/io_uring.c          |  9 +++++----
 fs/iomap/direct-io.c   |  6 +++---
 include/linux/blkdev.h |  4 +++-
 include/linux/fs.h     |  2 +-
 include/linux/iomap.h  |  2 +-
 mm/page_io.c           |  2 +-
 9 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-exec.c b/block/blk-exec.c
index d6cd501c0d34..1fa7f25e5726 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -71,7 +71,7 @@ static bool blk_rq_is_poll(struct request *rq)
 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
 {
 	do {
-		blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), true);
+		blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0);
 		cond_resched();
 	} while (!completion_done(wait));
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7d0d947921a6..6609e10657a8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4052,7 +4052,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
 }
 
 static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
-		bool spin)
+		unsigned int flags)
 {
 	struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
 	long state = get_current_state();
@@ -4075,7 +4075,7 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
 		if (task_is_running(current))
 			return 1;
 
-		if (ret < 0 || !spin)
+		if (ret < 0 || (flags & BLK_POLL_ONESHOT))
 			break;
 		cpu_relax();
 	} while (!need_resched());
@@ -4088,15 +4088,13 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
  * blk_poll - poll for IO completions
  * @q:  the queue
  * @cookie: cookie passed back at IO submission time
- * @spin: whether to spin for completions
+ * @flags: BLK_POLL_* flags that control the behavior
  *
  * Description:
  *    Poll for completions on the passed in queue. Returns number of
- *    completed entries found. If @spin is true, then blk_poll will continue
- *    looping until at least one completion is found, unless the task is
- *    otherwise marked running (or we need to reschedule).
+ *    completed entries found.
  */
-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
+int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
 {
 	if (cookie == BLK_QC_T_NONE ||
 	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
@@ -4105,12 +4103,11 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin)
 	if (current->plug)
 		blk_flush_plug_list(current->plug, false);
 
-	/* If specified not to spin, we also should not sleep. */
-	if (spin && q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
+	if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
 		if (blk_mq_poll_hybrid(q, cookie))
 			return 1;
 	}
-	return blk_mq_poll_classic(q, cookie, spin);
+	return blk_mq_poll_classic(q, cookie, flags);
 }
 EXPORT_SYMBOL_GPL(blk_poll);
 
diff --git a/block/fops.c b/block/fops.c
index 15324f2e5a91..db8f2fe68dd2 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -108,7 +108,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		if (!READ_ONCE(bio.bi_private))
 			break;
 		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc, true))
+		    !blk_poll(bdev_get_queue(bdev), qc, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -141,12 +141,12 @@ struct blkdev_dio {
 
 static struct bio_set blkdev_dio_pool;
 
-static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
+static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags)
 {
 	struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
 	struct request_queue *q = bdev_get_queue(bdev);
 
-	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
+	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags);
 }
 
 static void blkdev_bio_end_io(struct bio *bio)
@@ -297,7 +297,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		if (!READ_ONCE(dio->waiter))
 			break;
 
-		if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, true))
+		if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d2e86788c872..541fec2bd49a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2457,14 +2457,15 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			long min)
 {
 	struct io_kiocb *req, *tmp;
+	unsigned int poll_flags = 0;
 	LIST_HEAD(done);
-	bool spin;
 
 	/*
 	 * Only spin for completions if we don't have multiple devices hanging
 	 * off our complete list, and we're under the requested amount.
 	 */
-	spin = !ctx->poll_multi_queue && *nr_events < min;
+	if (ctx->poll_multi_queue || *nr_events >= min)
+		poll_flags |= BLK_POLL_ONESHOT;
 
 	list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) {
 		struct kiocb *kiocb = &req->rw.kiocb;
@@ -2482,11 +2483,11 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		if (!list_empty(&done))
 			break;
 
-		ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
+		ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags);
 		if (unlikely(ret < 0))
 			return ret;
 		else if (ret)
-			spin = false;
+			poll_flags |= BLK_POLL_ONESHOT;
 
 		/* iopoll may have completed current req */
 		if (READ_ONCE(req->iopoll_completed))
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 560ae967f70e..236aba256cd1 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -49,13 +49,13 @@ struct iomap_dio {
 	};
 };
 
-int iomap_dio_iopoll(struct kiocb *kiocb, bool spin)
+int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags)
 {
 	struct request_queue *q = READ_ONCE(kiocb->private);
 
 	if (!q)
 		return 0;
-	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), spin);
+	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags);
 }
 EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
 
@@ -642,7 +642,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			if (!(iocb->ki_flags & IOCB_HIPRI) ||
 			    !dio->submit.last_queue ||
 			    !blk_poll(dio->submit.last_queue,
-					 dio->submit.cookie, true))
+					 dio->submit.cookie, 0))
 				blk_io_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 17705c970d7e..e177346bc020 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -564,7 +564,9 @@ extern const char *blk_op_str(unsigned int op);
 int blk_status_to_errno(blk_status_t status);
 blk_status_t errno_to_blk_status(int errno);
 
-int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin);
+/* only poll the hardware once, don't continue until a completion was found */
+#define BLK_POLL_ONESHOT		(1 << 0)
+int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7a633353fd2..c443cddf414f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2075,7 +2075,7 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
-	int (*iopoll)(struct kiocb *kiocb, bool spin);
+	int (*iopoll)(struct kiocb *kiocb, unsigned int flags);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 24f8489583ca..1e86b65567c2 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -337,7 +337,7 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags);
 ssize_t iomap_dio_complete(struct iomap_dio *dio);
-int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
+int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags);
 
 #ifdef CONFIG_SWAP
 struct file;
diff --git a/mm/page_io.c b/mm/page_io.c
index c493ce9ebcf5..5d5543fcefa4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -428,7 +428,7 @@ int swap_readpage(struct page *page, bool synchronous)
 		if (!READ_ONCE(bio->bi_private))
 			break;
 
-		if (!blk_poll(disk->queue, qc, true))
+		if (!blk_poll(disk->queue, qc, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-- 
cgit v1.2.3


From d729cf9acb9311956c8a37113dcfa0160a2d9665 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:20 +0200
Subject: io_uring: don't sleep when polling for I/O

There is no point in sleeping for the expected I/O completion timeout
in the io_uring async polling model as we never poll for a specific
I/O.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-11-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 3 ++-
 fs/io_uring.c          | 2 +-
 include/linux/blkdev.h | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6609e10657a8..97c24e461d0a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4103,7 +4103,8 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
 	if (current->plug)
 		blk_flush_plug_list(current->plug, false);
 
-	if (q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
+	if (!(flags & BLK_POLL_NOSLEEP) &&
+	    q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
 		if (blk_mq_poll_hybrid(q, cookie))
 			return 1;
 	}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 541fec2bd49a..c5066146b8de 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2457,7 +2457,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			long min)
 {
 	struct io_kiocb *req, *tmp;
-	unsigned int poll_flags = 0;
+	unsigned int poll_flags = BLK_POLL_NOSLEEP;
 	LIST_HEAD(done);
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e177346bc020..2b80c98fc373 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -566,6 +566,8 @@ blk_status_t errno_to_blk_status(int errno);
 
 /* only poll the hardware once, don't continue until a completion was found */
 #define BLK_POLL_ONESHOT		(1 << 0)
+/* do not sleep to wait for the expected completion time */
+#define BLK_POLL_NOSLEEP		(1 << 1)
 int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
-- 
cgit v1.2.3


From 6ce913fe3eee14f40f778e85999c9e599dda8c6b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:21 +0200
Subject: block: rename REQ_HIPRI to REQ_POLLED

Unlike the RWF_HIPRI userspace ABI which is intentionally kept vague,
the bio flag is specific to the polling implementation, so rename and
document it properly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-12-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c          |  2 +-
 block/blk-merge.c         |  3 +--
 block/blk-mq-debugfs.c    |  2 +-
 block/blk-mq.c            |  4 ++--
 block/blk-mq.h            |  4 ++--
 block/blk.h               |  4 ++--
 drivers/nvme/host/core.c  |  2 +-
 drivers/scsi/scsi_debug.c | 10 +++++-----
 include/linux/bio.h       |  2 +-
 include/linux/blk_types.h |  4 ++--
 mm/page_io.c              |  2 +-
 11 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 80fa7aa394c7..8eb0e08d5395 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -842,7 +842,7 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio)
 	}
 
 	if (!test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-		bio_clear_hipri(bio);
+		bio_clear_polled(bio);
 
 	switch (bio_op(bio)) {
 	case REQ_OP_DISCARD:
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 9b77b4d6c2a1..f88d7863f997 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -318,8 +318,7 @@ split:
 	 * iopoll in direct IO routine. Given performance gain of iopoll for
 	 * big IO can be trival, disable iopoll when split needed.
 	 */
-	bio_clear_hipri(bio);
-
+	bio_clear_polled(bio);
 	return bio_split(bio, sectors, GFP_NOIO, bs);
 }
 
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 3daea160d670..409a8256d9ff 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -287,7 +287,7 @@ static const char *const cmd_flag_name[] = {
 	CMD_FLAG_NAME(BACKGROUND),
 	CMD_FLAG_NAME(NOWAIT),
 	CMD_FLAG_NAME(NOUNMAP),
-	CMD_FLAG_NAME(HIPRI),
+	CMD_FLAG_NAME(POLLED),
 };
 #undef CMD_FLAG_NAME
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 97c24e461d0a..a34ffcf861c3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -732,7 +732,7 @@ bool blk_mq_complete_request_remote(struct request *rq)
 	 * For a polled request, always complete locallly, it's pointless
 	 * to redirect the completion.
 	 */
-	if (rq->cmd_flags & REQ_HIPRI)
+	if (rq->cmd_flags & REQ_POLLED)
 		return false;
 
 	if (blk_mq_complete_need_ipi(rq)) {
@@ -2278,7 +2278,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 
 	rq_qos_throttle(q, bio);
 
-	hipri = bio->bi_opf & REQ_HIPRI;
+	hipri = bio->bi_opf & REQ_POLLED;
 
 	plug = blk_mq_plug(q, bio);
 	if (plug && plug->cached_rq) {
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5da970bb8865..a9fe01e14951 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -106,9 +106,9 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
 	enum hctx_type type = HCTX_TYPE_DEFAULT;
 
 	/*
-	 * The caller ensure that if REQ_HIPRI, poll must be enabled.
+	 * The caller ensure that if REQ_POLLED, poll must be enabled.
 	 */
-	if (flags & REQ_HIPRI)
+	if (flags & REQ_POLLED)
 		type = HCTX_TYPE_POLL;
 	else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
 		type = HCTX_TYPE_READ;
diff --git a/block/blk.h b/block/blk.h
index cab8d659d8a6..fa05d3f07976 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -416,11 +416,11 @@ extern struct device_attribute dev_attr_events;
 extern struct device_attribute dev_attr_events_async;
 extern struct device_attribute dev_attr_events_poll_msecs;
 
-static inline void bio_clear_hipri(struct bio *bio)
+static inline void bio_clear_polled(struct bio *bio)
 {
 	/* can't support alloc cache if we turn off polling */
 	bio_clear_flag(bio, BIO_PERCPU_CACHE);
-	bio->bi_opf &= ~REQ_HIPRI;
+	bio->bi_opf &= ~REQ_POLLED;
 }
 
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3d444b13cd69..ae15cb714596 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -632,7 +632,7 @@ static inline void nvme_init_request(struct request *req,
 
 	req->cmd_flags |= REQ_FAILFAST_DRIVER;
 	if (req->mq_hctx->type == HCTX_TYPE_POLL)
-		req->cmd_flags |= REQ_HIPRI;
+		req->cmd_flags |= REQ_POLLED;
 	nvme_clear_nvme_request(req);
 	memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd));
 }
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 66f507469a31..40b473eea357 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -5384,7 +5384,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 {
 	bool new_sd_dp;
 	bool inject = false;
-	bool hipri = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_HIPRI;
+	bool polled = scsi_cmd_to_rq(cmnd)->cmd_flags & REQ_POLLED;
 	int k, num_in_q, qdepth;
 	unsigned long iflags;
 	u64 ns_from_boot = 0;
@@ -5471,7 +5471,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 	if (sdebug_host_max_queue)
 		sd_dp->hc_idx = get_tag(cmnd);
 
-	if (hipri)
+	if (polled)
 		ns_from_boot = ktime_get_boottime_ns();
 
 	/* one of the resp_*() response functions is called here */
@@ -5531,7 +5531,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 				kt -= d;
 			}
 		}
-		if (hipri) {
+		if (polled) {
 			sd_dp->cmpl_ts = ktime_add(ns_to_ktime(ns_from_boot), kt);
 			spin_lock_irqsave(&sqp->qc_lock, iflags);
 			if (!sd_dp->init_poll) {
@@ -5562,7 +5562,7 @@ static int schedule_resp(struct scsi_cmnd *cmnd, struct sdebug_dev_info *devip,
 		if (unlikely((sdebug_opts & SDEBUG_OPT_CMD_ABORT) &&
 			     atomic_read(&sdeb_inject_pending)))
 			sd_dp->aborted = true;
-		if (hipri) {
+		if (polled) {
 			sd_dp->cmpl_ts = ns_to_ktime(ns_from_boot);
 			spin_lock_irqsave(&sqp->qc_lock, iflags);
 			if (!sd_dp->init_poll) {
@@ -7331,7 +7331,7 @@ static int sdebug_blk_mq_poll(struct Scsi_Host *shost, unsigned int queue_num)
 			if (kt_from_boot < sd_dp->cmpl_ts)
 				continue;
 
-		} else		/* ignoring non REQ_HIPRI requests */
+		} else		/* ignoring non REQ_POLLED requests */
 			continue;
 		devip = (struct sdebug_dev_info *)scp->device->hostdata;
 		if (likely(devip))
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d8d27742a75f..c7a2d880e927 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -706,7 +706,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
  */
 static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 {
-	bio->bi_opf |= REQ_HIPRI;
+	bio->bi_opf |= REQ_POLLED;
 	if (!is_sync_kiocb(kiocb))
 		bio->bi_opf |= REQ_NOWAIT;
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5017ba8fc539..f8b9fce68834 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -384,7 +384,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
-	__REQ_HIPRI,
+	__REQ_POLLED,		/* caller polls for completion using blk_poll */
 
 	/* for driver use */
 	__REQ_DRV,
@@ -409,7 +409,7 @@ enum req_flag_bits {
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
-#define REQ_HIPRI		(1ULL << __REQ_HIPRI)
+#define REQ_POLLED		(1ULL << __REQ_POLLED)
 
 #define REQ_DRV			(1ULL << __REQ_DRV)
 #define REQ_SWAP		(1ULL << __REQ_SWAP)
diff --git a/mm/page_io.c b/mm/page_io.c
index 5d5543fcefa4..ed2eded74f3a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -416,7 +416,7 @@ int swap_readpage(struct page *page, bool synchronous)
 	 * attempt to access it in the page fault retry time check.
 	 */
 	if (synchronous) {
-		bio->bi_opf |= REQ_HIPRI;
+		bio->bi_opf |= REQ_POLLED;
 		get_task_struct(current);
 		bio->bi_private = current;
 	}
-- 
cgit v1.2.3


From 19416123ab3e1348b3532347af221d8f60838431 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 12 Oct 2021 13:12:23 +0200
Subject: block: define 'struct bvec_iter' as packed

'struct bvec_iter' is embedded into 'struct bio', define it as packed
so that we can get one extra 4bytes for other uses without expanding
bio.

'struct bvec_iter' is often allocated on stack, so making it packed
doesn't affect performance. Also I have run io_uring on both
nvme/null_blk, and not observe performance effect in this way.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bvec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 0e9bdd42dafb..35c25dff651a 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -44,7 +44,7 @@ struct bvec_iter {
 
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
-};
+} __packed;
 
 struct bvec_iter_all {
 	struct bio_vec	bv;
-- 
cgit v1.2.3


From 3e08773c3841e9db7a520908cc2b136a77d275ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Oct 2021 13:12:24 +0200
Subject: block: switch polling to be bio based

Replace the blk_poll interface that requires the caller to keep a queue
and cookie from the submissions with polling based on the bio.

Polling for the bio itself leads to a few advantages:

 - the cookie construction can made entirely private in blk-mq.c
 - the caller does not need to remember the request_queue and cookie
   separately and thus sidesteps their lifetime issues
 - keeping the device and the cookie inside the bio allows to trivially
   support polling BIOs remapping by stacking drivers
 - a lot of code to propagate the cookie back up the submission path can
   be removed entirely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mark Wunderlich <mark.wunderlich@intel.com>
Link: https://lore.kernel.org/r/20211012111226.760968-15-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/emu/nfblock.c             |   3 +-
 arch/xtensa/platforms/iss/simdisk.c |   3 +-
 block/bio.c                         |   1 +
 block/blk-core.c                    | 127 +++++++++++++++++++++++++++---------
 block/blk-exec.c                    |  10 ++-
 block/blk-mq.c                      |  72 +++++++-------------
 block/blk-mq.h                      |   2 +
 block/fops.c                        |  25 +++----
 drivers/block/brd.c                 |  12 ++--
 drivers/block/drbd/drbd_int.h       |   2 +-
 drivers/block/drbd/drbd_req.c       |   3 +-
 drivers/block/n64cart.c             |  12 ++--
 drivers/block/null_blk/main.c       |   3 +-
 drivers/block/pktcdvd.c             |   7 +-
 drivers/block/ps3vram.c             |   6 +-
 drivers/block/rsxx/dev.c            |   7 +-
 drivers/block/zram/zram_drv.c       |  10 +--
 drivers/md/bcache/request.c         |  13 ++--
 drivers/md/bcache/request.h         |   4 +-
 drivers/md/dm.c                     |  28 +++-----
 drivers/md/md.c                     |  10 ++-
 drivers/nvdimm/blk.c                |   5 +-
 drivers/nvdimm/btt.c                |   5 +-
 drivers/nvdimm/pmem.c               |   3 +-
 drivers/nvme/host/multipath.c       |   6 +-
 drivers/s390/block/dcssblk.c        |   7 +-
 fs/btrfs/inode.c                    |   8 +--
 fs/ext4/file.c                      |   2 +-
 fs/gfs2/file.c                      |   4 +-
 fs/iomap/direct-io.c                |  36 ++++------
 fs/xfs/xfs_file.c                   |   2 +-
 fs/zonefs/super.c                   |   2 +-
 include/linux/bio.h                 |   2 +-
 include/linux/blk-mq.h              |  15 +----
 include/linux/blk_types.h           |  12 ++--
 include/linux/blkdev.h              |   8 ++-
 include/linux/fs.h                  |   6 +-
 include/linux/iomap.h               |   5 +-
 mm/page_io.c                        |   8 +--
 39 files changed, 232 insertions(+), 264 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 9a8394e96388..4ef457ba5220 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -58,7 +58,7 @@ struct nfhd_device {
 	struct gendisk *disk;
 };
 
-static blk_qc_t nfhd_submit_bio(struct bio *bio)
+static void nfhd_submit_bio(struct bio *bio)
 {
 	struct nfhd_device *dev = bio->bi_bdev->bd_disk->private_data;
 	struct bio_vec bvec;
@@ -76,7 +76,6 @@ static blk_qc_t nfhd_submit_bio(struct bio *bio)
 		sec += len;
 	}
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int nfhd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 3cdfa00738e0..ddd1fe3db474 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -100,7 +100,7 @@ static void simdisk_transfer(struct simdisk *dev, unsigned long sector,
 	spin_unlock(&dev->lock);
 }
 
-static blk_qc_t simdisk_submit_bio(struct bio *bio)
+static void simdisk_submit_bio(struct bio *bio)
 {
 	struct simdisk *dev = bio->bi_bdev->bd_disk->private_data;
 	struct bio_vec bvec;
@@ -118,7 +118,6 @@ static blk_qc_t simdisk_submit_bio(struct bio *bio)
 	}
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int simdisk_open(struct block_device *bdev, fmode_t mode)
diff --git a/block/bio.c b/block/bio.c
index df45f4b996ac..a3c9ff23a036 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -282,6 +282,7 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 
 	atomic_set(&bio->__bi_remaining, 1);
 	atomic_set(&bio->__bi_cnt, 1);
+	bio->bi_cookie = BLK_QC_T_NONE;
 
 	bio->bi_max_vecs = max_vecs;
 	bio->bi_io_vec = table;
diff --git a/block/blk-core.c b/block/blk-core.c
index 8eb0e08d5395..f008c38ae967 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -915,25 +915,22 @@ end_io:
 	return false;
 }
 
-static blk_qc_t __submit_bio(struct bio *bio)
+static void __submit_bio(struct bio *bio)
 {
 	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	blk_qc_t ret = BLK_QC_T_NONE;
 
 	if (unlikely(bio_queue_enter(bio) != 0))
-		return BLK_QC_T_NONE;
+		return;
 
 	if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio))
 		goto queue_exit;
-	if (disk->fops->submit_bio) {
-		ret = disk->fops->submit_bio(bio);
-		goto queue_exit;
+	if (!disk->fops->submit_bio) {
+		blk_mq_submit_bio(bio);
+		return;
 	}
-	return blk_mq_submit_bio(bio);
-
+	disk->fops->submit_bio(bio);
 queue_exit:
 	blk_queue_exit(disk->queue);
-	return ret;
 }
 
 /*
@@ -955,10 +952,9 @@ queue_exit:
  * bio_list_on_stack[1] contains bios that were submitted before the current
  *	->submit_bio_bio, but that haven't been processed yet.
  */
-static blk_qc_t __submit_bio_noacct(struct bio *bio)
+static void __submit_bio_noacct(struct bio *bio)
 {
 	struct bio_list bio_list_on_stack[2];
-	blk_qc_t ret = BLK_QC_T_NONE;
 
 	BUG_ON(bio->bi_next);
 
@@ -975,7 +971,7 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
 		bio_list_on_stack[1] = bio_list_on_stack[0];
 		bio_list_init(&bio_list_on_stack[0]);
 
-		ret = __submit_bio(bio);
+		__submit_bio(bio);
 
 		/*
 		 * Sort new bios into those for a lower level and those for the
@@ -998,22 +994,19 @@ static blk_qc_t __submit_bio_noacct(struct bio *bio)
 	} while ((bio = bio_list_pop(&bio_list_on_stack[0])));
 
 	current->bio_list = NULL;
-	return ret;
 }
 
-static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
+static void __submit_bio_noacct_mq(struct bio *bio)
 {
 	struct bio_list bio_list[2] = { };
-	blk_qc_t ret;
 
 	current->bio_list = bio_list;
 
 	do {
-		ret = __submit_bio(bio);
+		__submit_bio(bio);
 	} while ((bio = bio_list_pop(&bio_list[0])));
 
 	current->bio_list = NULL;
-	return ret;
 }
 
 /**
@@ -1025,7 +1018,7 @@ static blk_qc_t __submit_bio_noacct_mq(struct bio *bio)
  * systems and other upper level users of the block layer should use
  * submit_bio() instead.
  */
-blk_qc_t submit_bio_noacct(struct bio *bio)
+void submit_bio_noacct(struct bio *bio)
 {
 	/*
 	 * We only want one ->submit_bio to be active at a time, else stack
@@ -1033,14 +1026,12 @@ blk_qc_t submit_bio_noacct(struct bio *bio)
 	 * to collect a list of requests submited by a ->submit_bio method while
 	 * it is active, and then process them after it returned.
 	 */
-	if (current->bio_list) {
+	if (current->bio_list)
 		bio_list_add(&current->bio_list[0], bio);
-		return BLK_QC_T_NONE;
-	}
-
-	if (!bio->bi_bdev->bd_disk->fops->submit_bio)
-		return __submit_bio_noacct_mq(bio);
-	return __submit_bio_noacct(bio);
+	else if (!bio->bi_bdev->bd_disk->fops->submit_bio)
+		__submit_bio_noacct_mq(bio);
+	else
+		__submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio_noacct);
 
@@ -1057,10 +1048,10 @@ EXPORT_SYMBOL(submit_bio_noacct);
  * in @bio.  The bio must NOT be touched by thecaller until ->bi_end_io() has
  * been called.
  */
-blk_qc_t submit_bio(struct bio *bio)
+void submit_bio(struct bio *bio)
 {
 	if (blkcg_punt_bio_submit(bio))
-		return BLK_QC_T_NONE;
+		return;
 
 	/*
 	 * If it's a regular read/write or a barrier with data attached,
@@ -1092,19 +1083,91 @@ blk_qc_t submit_bio(struct bio *bio)
 	if (unlikely(bio_op(bio) == REQ_OP_READ &&
 	    bio_flagged(bio, BIO_WORKINGSET))) {
 		unsigned long pflags;
-		blk_qc_t ret;
 
 		psi_memstall_enter(&pflags);
-		ret = submit_bio_noacct(bio);
+		submit_bio_noacct(bio);
 		psi_memstall_leave(&pflags);
-
-		return ret;
+		return;
 	}
 
-	return submit_bio_noacct(bio);
+	submit_bio_noacct(bio);
 }
 EXPORT_SYMBOL(submit_bio);
 
+/**
+ * bio_poll - poll for BIO completions
+ * @bio: bio to poll for
+ * @flags: BLK_POLL_* flags that control the behavior
+ *
+ * Poll for completions on queue associated with the bio. Returns number of
+ * completed entries found.
+ *
+ * Note: the caller must either be the context that submitted @bio, or
+ * be in a RCU critical section to prevent freeing of @bio.
+ */
+int bio_poll(struct bio *bio, unsigned int flags)
+{
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+	blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
+	int ret;
+
+	if (cookie == BLK_QC_T_NONE ||
+	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
+		return 0;
+
+	if (current->plug)
+		blk_flush_plug_list(current->plug, false);
+
+	if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
+		return 0;
+	if (WARN_ON_ONCE(!queue_is_mq(q)))
+		ret = 0;	/* not yet implemented, should not happen */
+	else
+		ret = blk_mq_poll(q, cookie, flags);
+	blk_queue_exit(q);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bio_poll);
+
+/*
+ * Helper to implement file_operations.iopoll.  Requires the bio to be stored
+ * in iocb->private, and cleared before freeing the bio.
+ */
+int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags)
+{
+	struct bio *bio;
+	int ret = 0;
+
+	/*
+	 * Note: the bio cache only uses SLAB_TYPESAFE_BY_RCU, so bio can
+	 * point to a freshly allocated bio at this point.  If that happens
+	 * we have a few cases to consider:
+	 *
+	 *  1) the bio is beeing initialized and bi_bdev is NULL.  We can just
+	 *     simply nothing in this case
+	 *  2) the bio points to a not poll enabled device.  bio_poll will catch
+	 *     this and return 0
+	 *  3) the bio points to a poll capable device, including but not
+	 *     limited to the one that the original bio pointed to.  In this
+	 *     case we will call into the actual poll method and poll for I/O,
+	 *     even if we don't need to, but it won't cause harm either.
+	 *
+	 * For cases 2) and 3) above the RCU grace period ensures that bi_bdev
+	 * is still allocated. Because partitions hold a reference to the whole
+	 * device bdev and thus disk, the disk is also still valid.  Grabbing
+	 * a reference to the queue in bio_poll() ensures the hctxs and requests
+	 * are still valid as well.
+	 */
+	rcu_read_lock();
+	bio = READ_ONCE(kiocb->private);
+	if (bio && bio->bi_bdev)
+		ret = bio_poll(bio, flags);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iocb_bio_iopoll);
+
 /**
  * blk_cloned_rq_check_limits - Helper function to check a cloned request
  *                              for the new queue limits
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 1fa7f25e5726..55f0cd34b37b 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,13 +65,19 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
 
 static bool blk_rq_is_poll(struct request *rq)
 {
-	return rq->mq_hctx && rq->mq_hctx->type == HCTX_TYPE_POLL;
+	if (!rq->mq_hctx)
+		return false;
+	if (rq->mq_hctx->type != HCTX_TYPE_POLL)
+		return false;
+	if (WARN_ON_ONCE(!rq->bio))
+		return false;
+	return true;
 }
 
 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
 {
 	do {
-		blk_poll(rq->q, request_to_qc_t(rq->mq_hctx, rq), 0);
+		bio_poll(rq->bio, 0);
 		cond_resched();
 	} while (!completion_done(wait));
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a34ffcf861c3..0860f622099f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -65,6 +65,9 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
 	return bucket;
 }
 
+#define BLK_QC_T_SHIFT		16
+#define BLK_QC_T_INTERNAL	(1U << 31)
+
 static inline struct blk_mq_hw_ctx *blk_qc_to_hctx(struct request_queue *q,
 		blk_qc_t qc)
 {
@@ -81,6 +84,13 @@ static inline struct request *blk_qc_to_rq(struct blk_mq_hw_ctx *hctx,
 	return blk_mq_tag_to_rq(hctx->tags, tag);
 }
 
+static inline blk_qc_t blk_rq_to_qc(struct request *rq)
+{
+	return (rq->mq_hctx->queue_num << BLK_QC_T_SHIFT) |
+		(rq->tag != -1 ?
+		 rq->tag : (rq->internal_tag | BLK_QC_T_INTERNAL));
+}
+
 /*
  * Check if any of the ctx, dispatch list or elevator
  * have pending work in this hardware queue.
@@ -819,6 +829,8 @@ void blk_mq_start_request(struct request *rq)
 	if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE)
 		q->integrity.profile->prepare_fn(rq);
 #endif
+	if (rq->bio && rq->bio->bi_opf & REQ_POLLED)
+	        WRITE_ONCE(rq->bio->bi_cookie, blk_rq_to_qc(rq));
 }
 EXPORT_SYMBOL(blk_mq_start_request);
 
@@ -2045,19 +2057,15 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 }
 
 static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
-					    struct request *rq,
-					    blk_qc_t *cookie, bool last)
+					    struct request *rq, bool last)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
 		.rq = rq,
 		.last = last,
 	};
-	blk_qc_t new_cookie;
 	blk_status_t ret;
 
-	new_cookie = request_to_qc_t(hctx, rq);
-
 	/*
 	 * For OK queue, we are done. For error, caller may kill it.
 	 * Any other error (busy), just add it to our list as we
@@ -2067,7 +2075,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	switch (ret) {
 	case BLK_STS_OK:
 		blk_mq_update_dispatch_busy(hctx, false);
-		*cookie = new_cookie;
 		break;
 	case BLK_STS_RESOURCE:
 	case BLK_STS_DEV_RESOURCE:
@@ -2076,7 +2083,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 		break;
 	default:
 		blk_mq_update_dispatch_busy(hctx, false);
-		*cookie = BLK_QC_T_NONE;
 		break;
 	}
 
@@ -2085,7 +2091,6 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
-						blk_qc_t *cookie,
 						bool bypass_insert, bool last)
 {
 	struct request_queue *q = rq->q;
@@ -2119,7 +2124,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	return __blk_mq_issue_directly(hctx, rq, cookie, last);
+	return __blk_mq_issue_directly(hctx, rq, last);
 insert:
 	if (bypass_insert)
 		return BLK_STS_RESOURCE;
@@ -2133,7 +2138,6 @@ insert:
  * blk_mq_try_issue_directly - Try to send a request directly to device driver.
  * @hctx: Pointer of the associated hardware queue.
  * @rq: Pointer to request to be sent.
- * @cookie: Request queue cookie.
  *
  * If the device has enough resources to accept a new request now, send the
  * request directly to device driver. Else, insert at hctx->dispatch queue, so
@@ -2141,7 +2145,7 @@ insert:
  * queue have higher priority.
  */
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
-		struct request *rq, blk_qc_t *cookie)
+		struct request *rq)
 {
 	blk_status_t ret;
 	int srcu_idx;
@@ -2150,7 +2154,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	hctx_lock(hctx, &srcu_idx);
 
-	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false, true);
+	ret = __blk_mq_try_issue_directly(hctx, rq, false, true);
 	if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE)
 		blk_mq_request_bypass_insert(rq, false, true);
 	else if (ret != BLK_STS_OK)
@@ -2163,11 +2167,10 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
 {
 	blk_status_t ret;
 	int srcu_idx;
-	blk_qc_t unused_cookie;
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
 	hctx_lock(hctx, &srcu_idx);
-	ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true, last);
+	ret = __blk_mq_try_issue_directly(hctx, rq, true, last);
 	hctx_unlock(hctx, srcu_idx);
 
 	return ret;
@@ -2247,10 +2250,8 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
  *
  * It will not queue the request if there is an error with the bio, or at the
  * request creation.
- *
- * Returns: Request queue cookie.
  */
-blk_qc_t blk_mq_submit_bio(struct bio *bio)
+void blk_mq_submit_bio(struct bio *bio)
 {
 	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
 	const int is_sync = op_is_sync(bio->bi_opf);
@@ -2259,9 +2260,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 	struct blk_plug *plug;
 	struct request *same_queue_rq = NULL;
 	unsigned int nr_segs;
-	blk_qc_t cookie;
 	blk_status_t ret;
-	bool hipri;
 
 	blk_queue_bounce(q, &bio);
 	__blk_queue_split(&bio, &nr_segs);
@@ -2278,8 +2277,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 
 	rq_qos_throttle(q, bio);
 
-	hipri = bio->bi_opf & REQ_POLLED;
-
 	plug = blk_mq_plug(q, bio);
 	if (plug && plug->cached_rq) {
 		rq = plug->cached_rq;
@@ -2310,8 +2307,6 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 
 	rq_qos_track(q, rq, bio);
 
-	cookie = request_to_qc_t(rq->mq_hctx, rq);
-
 	blk_mq_bio_to_request(rq, bio, nr_segs);
 
 	ret = blk_crypto_init_request(rq);
@@ -2319,7 +2314,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		bio->bi_status = ret;
 		bio_endio(bio);
 		blk_mq_free_request(rq);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	if (unlikely(is_flush_fua)) {
@@ -2375,7 +2370,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		if (same_queue_rq) {
 			trace_block_unplug(q, 1, true);
 			blk_mq_try_issue_directly(same_queue_rq->mq_hctx,
-						  same_queue_rq, &cookie);
+						  same_queue_rq);
 		}
 	} else if ((q->nr_hw_queues > 1 && is_sync) ||
 		   !rq->mq_hctx->dispatch_busy) {
@@ -2383,18 +2378,15 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
 		 * There is no scheduler and we can try to send directly
 		 * to the hardware.
 		 */
-		blk_mq_try_issue_directly(rq->mq_hctx, rq, &cookie);
+		blk_mq_try_issue_directly(rq->mq_hctx, rq);
 	} else {
 		/* Default case. */
 		blk_mq_sched_insert_request(rq, false, true, true);
 	}
 
-	if (!hipri)
-		return BLK_QC_T_NONE;
-	return cookie;
+	return;
 queue_exit:
 	blk_queue_exit(q);
-	return BLK_QC_T_NONE;
 }
 
 static size_t order_to_size(unsigned int order)
@@ -4084,25 +4076,8 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
 	return 0;
 }
 
-/**
- * blk_poll - poll for IO completions
- * @q:  the queue
- * @cookie: cookie passed back at IO submission time
- * @flags: BLK_POLL_* flags that control the behavior
- *
- * Description:
- *    Poll for completions on the passed in queue. Returns number of
- *    completed entries found.
- */
-int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
 {
-	if (cookie == BLK_QC_T_NONE ||
-	    !test_bit(QUEUE_FLAG_POLL, &q->queue_flags))
-		return 0;
-
-	if (current->plug)
-		blk_flush_plug_list(current->plug, false);
-
 	if (!(flags & BLK_POLL_NOSLEEP) &&
 	    q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
 		if (blk_mq_poll_hybrid(q, cookie))
@@ -4110,7 +4085,6 @@ int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
 	}
 	return blk_mq_poll_classic(q, cookie, flags);
 }
-EXPORT_SYMBOL_GPL(blk_poll);
 
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
diff --git a/block/blk-mq.h b/block/blk-mq.h
index a9fe01e14951..8be447995106 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -37,6 +37,8 @@ struct blk_mq_ctx {
 	struct kobject		kobj;
 } ____cacheline_aligned_in_smp;
 
+void blk_mq_submit_bio(struct bio *bio);
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
 void blk_mq_exit_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
diff --git a/block/fops.c b/block/fops.c
index db8f2fe68dd2..ce1255529ba2 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -61,7 +61,6 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	bool should_dirty = false;
 	struct bio bio;
 	ssize_t ret;
-	blk_qc_t qc;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
@@ -102,13 +101,12 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 	if (iocb->ki_flags & IOCB_HIPRI)
 		bio_set_polled(&bio, iocb);
 
-	qc = submit_bio(&bio);
+	submit_bio(&bio);
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!READ_ONCE(bio.bi_private))
 			break;
-		if (!(iocb->ki_flags & IOCB_HIPRI) ||
-		    !blk_poll(bdev_get_queue(bdev), qc, 0))
+		if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -141,14 +139,6 @@ struct blkdev_dio {
 
 static struct bio_set blkdev_dio_pool;
 
-static int blkdev_iopoll(struct kiocb *kiocb, unsigned int flags)
-{
-	struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags);
-}
-
 static void blkdev_bio_end_io(struct bio *bio)
 {
 	struct blkdev_dio *dio = bio->bi_private;
@@ -162,6 +152,8 @@ static void blkdev_bio_end_io(struct bio *bio)
 			struct kiocb *iocb = dio->iocb;
 			ssize_t ret;
 
+			WRITE_ONCE(iocb->private, NULL);
+
 			if (likely(!dio->bio.bi_status)) {
 				ret = dio->size;
 				iocb->ki_pos += ret;
@@ -200,7 +192,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	bool do_poll = (iocb->ki_flags & IOCB_HIPRI);
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
 	loff_t pos = iocb->ki_pos;
-	blk_qc_t qc = BLK_QC_T_NONE;
 	int ret = 0;
 
 	if ((pos | iov_iter_alignment(iter)) &
@@ -262,9 +253,9 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		if (!nr_pages) {
 			if (do_poll)
 				bio_set_polled(bio, iocb);
-			qc = submit_bio(bio);
+			submit_bio(bio);
 			if (do_poll)
-				WRITE_ONCE(iocb->ki_cookie, qc);
+				WRITE_ONCE(iocb->private, bio);
 			break;
 		}
 		if (!dio->multi_bio) {
@@ -297,7 +288,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		if (!READ_ONCE(dio->waiter))
 			break;
 
-		if (!do_poll || !blk_poll(bdev_get_queue(bdev), qc, 0))
+		if (!do_poll || !bio_poll(bio, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -594,7 +585,7 @@ const struct file_operations def_blk_fops = {
 	.llseek		= blkdev_llseek,
 	.read_iter	= blkdev_read_iter,
 	.write_iter	= blkdev_write_iter,
-	.iopoll		= blkdev_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 	.mmap		= generic_file_mmap,
 	.fsync		= blkdev_fsync,
 	.unlocked_ioctl	= blkdev_ioctl,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 530b31240203..aa0472718dce 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -282,7 +282,7 @@ out:
 	return err;
 }
 
-static blk_qc_t brd_submit_bio(struct bio *bio)
+static void brd_submit_bio(struct bio *bio)
 {
 	struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
 	sector_t sector = bio->bi_iter.bi_sector;
@@ -299,16 +299,14 @@ static blk_qc_t brd_submit_bio(struct bio *bio)
 
 		err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
 				  bio_op(bio), sector);
-		if (err)
-			goto io_error;
+		if (err) {
+			bio_io_error(bio);
+			return;
+		}
 		sector += len >> SECTOR_SHIFT;
 	}
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
-io_error:
-	bio_io_error(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int brd_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 5d9181382ce1..6674a0b88341 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1448,7 +1448,7 @@ extern void conn_free_crypto(struct drbd_connection *connection);
 /* drbd_req */
 extern void do_submit(struct work_struct *ws);
 extern void __drbd_make_request(struct drbd_device *, struct bio *);
-extern blk_qc_t drbd_submit_bio(struct bio *bio);
+void drbd_submit_bio(struct bio *bio);
 extern int drbd_read_remote(struct drbd_device *device, struct drbd_request *req);
 extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 5ca233644d70..3235532ae077 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1596,7 +1596,7 @@ void do_submit(struct work_struct *ws)
 	}
 }
 
-blk_qc_t drbd_submit_bio(struct bio *bio)
+void drbd_submit_bio(struct bio *bio)
 {
 	struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
 
@@ -1609,7 +1609,6 @@ blk_qc_t drbd_submit_bio(struct bio *bio)
 
 	inc_ap_bio(device);
 	__drbd_make_request(device, bio);
-	return BLK_QC_T_NONE;
 }
 
 static bool net_timeout_reached(struct drbd_request *net_req,
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
index 26798da661bd..b168ca25b6c9 100644
--- a/drivers/block/n64cart.c
+++ b/drivers/block/n64cart.c
@@ -84,7 +84,7 @@ static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos)
 	return true;
 }
 
-static blk_qc_t n64cart_submit_bio(struct bio *bio)
+static void n64cart_submit_bio(struct bio *bio)
 {
 	struct bio_vec bvec;
 	struct bvec_iter iter;
@@ -92,16 +92,14 @@ static blk_qc_t n64cart_submit_bio(struct bio *bio)
 	u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT;
 
 	bio_for_each_segment(bvec, bio, iter) {
-		if (!n64cart_do_bvec(dev, &bvec, pos))
-			goto io_error;
+		if (!n64cart_do_bvec(dev, &bvec, pos)) {
+			bio_io_error(bio);
+			return;
+		}
 		pos += bvec.bv_len;
 	}
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
-io_error:
-	bio_io_error(bio);
-	return BLK_QC_T_NONE;
 }
 
 static const struct block_device_operations n64cart_fops = {
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 187d779c8ca0..e5cbcf582233 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1422,7 +1422,7 @@ static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
 	return &nullb->queues[index];
 }
 
-static blk_qc_t null_submit_bio(struct bio *bio)
+static void null_submit_bio(struct bio *bio)
 {
 	sector_t sector = bio->bi_iter.bi_sector;
 	sector_t nr_sectors = bio_sectors(bio);
@@ -1434,7 +1434,6 @@ static blk_qc_t null_submit_bio(struct bio *bio)
 	cmd->bio = bio;
 
 	null_handle_cmd(cmd, sector, nr_sectors, bio_op(bio));
-	return BLK_QC_T_NONE;
 }
 
 static bool should_timeout_request(struct request *rq)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 0f26b2510a75..cb52cce6fb03 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2400,7 +2400,7 @@ static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
 	}
 }
 
-static blk_qc_t pkt_submit_bio(struct bio *bio)
+static void pkt_submit_bio(struct bio *bio)
 {
 	struct pktcdvd_device *pd;
 	char b[BDEVNAME_SIZE];
@@ -2423,7 +2423,7 @@ static blk_qc_t pkt_submit_bio(struct bio *bio)
 	 */
 	if (bio_data_dir(bio) == READ) {
 		pkt_make_request_read(pd, bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
@@ -2455,10 +2455,9 @@ static blk_qc_t pkt_submit_bio(struct bio *bio)
 		pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
 	} while (split != bio);
 
-	return BLK_QC_T_NONE;
+	return;
 end_io:
 	bio_io_error(bio);
-	return BLK_QC_T_NONE;
 }
 
 static void pkt_init_queue(struct pktcdvd_device *pd)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c7b19e128b03..d1ebf193cb9a 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -578,7 +578,7 @@ out:
 	return next;
 }
 
-static blk_qc_t ps3vram_submit_bio(struct bio *bio)
+static void ps3vram_submit_bio(struct bio *bio)
 {
 	struct ps3_system_bus_device *dev = bio->bi_bdev->bd_disk->private_data;
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
@@ -594,13 +594,11 @@ static blk_qc_t ps3vram_submit_bio(struct bio *bio)
 	spin_unlock_irq(&priv->lock);
 
 	if (busy)
-		return BLK_QC_T_NONE;
+		return;
 
 	do {
 		bio = ps3vram_do_bio(dev, bio);
 	} while (bio);
-
-	return BLK_QC_T_NONE;
 }
 
 static const struct block_device_operations ps3vram_fops = {
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 1cc40b0ea761..268252380e88 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -50,7 +50,7 @@ struct rsxx_bio_meta {
 
 static struct kmem_cache *bio_meta_pool;
 
-static blk_qc_t rsxx_submit_bio(struct bio *bio);
+static void rsxx_submit_bio(struct bio *bio);
 
 /*----------------- Block Device Operations -----------------*/
 static int rsxx_blkdev_ioctl(struct block_device *bdev,
@@ -120,7 +120,7 @@ static void bio_dma_done_cb(struct rsxx_cardinfo *card,
 	}
 }
 
-static blk_qc_t rsxx_submit_bio(struct bio *bio)
+static void rsxx_submit_bio(struct bio *bio)
 {
 	struct rsxx_cardinfo *card = bio->bi_bdev->bd_disk->private_data;
 	struct rsxx_bio_meta *bio_meta;
@@ -169,7 +169,7 @@ static blk_qc_t rsxx_submit_bio(struct bio *bio)
 	if (st)
 		goto queue_err;
 
-	return BLK_QC_T_NONE;
+	return;
 
 queue_err:
 	kmem_cache_free(bio_meta_pool, bio_meta);
@@ -177,7 +177,6 @@ req_err:
 	if (st)
 		bio->bi_status = st;
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 /*----------------- Device Setup -------------------*/
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..a68297fb51a2 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1598,22 +1598,18 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
 /*
  * Handler function for all zram I/O requests.
  */
-static blk_qc_t zram_submit_bio(struct bio *bio)
+static void zram_submit_bio(struct bio *bio)
 {
 	struct zram *zram = bio->bi_bdev->bd_disk->private_data;
 
 	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
 					bio->bi_iter.bi_size)) {
 		atomic64_inc(&zram->stats.invalid_io);
-		goto error;
+		bio_io_error(bio);
+		return;
 	}
 
 	__zram_make_request(zram, bio);
-	return BLK_QC_T_NONE;
-
-error:
-	bio_io_error(bio);
-	return BLK_QC_T_NONE;
 }
 
 static void zram_slot_free_notify(struct block_device *bdev,
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6d1de889baeb..23b28edae90f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -1163,7 +1163,7 @@ static void quit_max_writeback_rate(struct cache_set *c,
 
 /* Cached devices - read & write stuff */
 
-blk_qc_t cached_dev_submit_bio(struct bio *bio)
+void cached_dev_submit_bio(struct bio *bio)
 {
 	struct search *s;
 	struct block_device *orig_bdev = bio->bi_bdev;
@@ -1176,7 +1176,7 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
 		     dc->io_disable)) {
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	if (likely(d->c)) {
@@ -1222,8 +1222,6 @@ blk_qc_t cached_dev_submit_bio(struct bio *bio)
 	} else
 		/* I/O request sent to backing device */
 		detached_dev_do_request(d, bio, orig_bdev, start_time);
-
-	return BLK_QC_T_NONE;
 }
 
 static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode,
@@ -1273,7 +1271,7 @@ static void flash_dev_nodata(struct closure *cl)
 	continue_at(cl, search_free, NULL);
 }
 
-blk_qc_t flash_dev_submit_bio(struct bio *bio)
+void flash_dev_submit_bio(struct bio *bio)
 {
 	struct search *s;
 	struct closure *cl;
@@ -1282,7 +1280,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
 	if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	s = search_alloc(bio, d, bio->bi_bdev, bio_start_io_acct(bio));
@@ -1298,7 +1296,7 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
 		continue_at_nobarrier(&s->cl,
 				      flash_dev_nodata,
 				      bcache_wq);
-		return BLK_QC_T_NONE;
+		return;
 	} else if (bio_data_dir(bio)) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
 					&KEY(d->id, bio->bi_iter.bi_sector, 0),
@@ -1314,7 +1312,6 @@ blk_qc_t flash_dev_submit_bio(struct bio *bio)
 	}
 
 	continue_at(cl, search_free, NULL);
-	return BLK_QC_T_NONE;
 }
 
 static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode,
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 82b38366a95d..38ab4856eaab 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -37,10 +37,10 @@ unsigned int bch_get_congested(const struct cache_set *c);
 void bch_data_insert(struct closure *cl);
 
 void bch_cached_dev_request_init(struct cached_dev *dc);
-blk_qc_t cached_dev_submit_bio(struct bio *bio);
+void cached_dev_submit_bio(struct bio *bio);
 
 void bch_flash_dev_request_init(struct bcache_device *d);
-blk_qc_t flash_dev_submit_bio(struct bio *bio);
+void flash_dev_submit_bio(struct bio *bio);
 
 extern struct kmem_cache *bch_search_cache;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 76d9da49fda7..7870e6460633 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1183,14 +1183,13 @@ static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
 	mutex_unlock(&md->swap_bios_lock);
 }
 
-static blk_qc_t __map_bio(struct dm_target_io *tio)
+static void __map_bio(struct dm_target_io *tio)
 {
 	int r;
 	sector_t sector;
 	struct bio *clone = &tio->clone;
 	struct dm_io *io = tio->io;
 	struct dm_target *ti = tio->ti;
-	blk_qc_t ret = BLK_QC_T_NONE;
 
 	clone->bi_end_io = clone_endio;
 
@@ -1226,7 +1225,7 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 	case DM_MAPIO_REMAPPED:
 		/* the bio has been remapped so dispatch it */
 		trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector);
-		ret = submit_bio_noacct(clone);
+		submit_bio_noacct(clone);
 		break;
 	case DM_MAPIO_KILL:
 		if (unlikely(swap_bios_limit(ti, clone))) {
@@ -1248,8 +1247,6 @@ static blk_qc_t __map_bio(struct dm_target_io *tio)
 		DMWARN("unimplemented target map return value: %d", r);
 		BUG();
 	}
-
-	return ret;
 }
 
 static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
@@ -1336,7 +1333,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
 	}
 }
 
-static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci,
+static void __clone_and_map_simple_bio(struct clone_info *ci,
 					   struct dm_target_io *tio, unsigned *len)
 {
 	struct bio *clone = &tio->clone;
@@ -1346,8 +1343,7 @@ static blk_qc_t __clone_and_map_simple_bio(struct clone_info *ci,
 	__bio_clone_fast(clone, ci->bio);
 	if (len)
 		bio_setup_sector(clone, ci->sector, *len);
-
-	return __map_bio(tio);
+	__map_bio(tio);
 }
 
 static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
@@ -1361,7 +1357,7 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
 
 	while ((bio = bio_list_pop(&blist))) {
 		tio = container_of(bio, struct dm_target_io, clone);
-		(void) __clone_and_map_simple_bio(ci, tio, len);
+		__clone_and_map_simple_bio(ci, tio, len);
 	}
 }
 
@@ -1405,7 +1401,7 @@ static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 		free_tio(tio);
 		return r;
 	}
-	(void) __map_bio(tio);
+	__map_bio(tio);
 
 	return 0;
 }
@@ -1520,11 +1516,10 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md,
 /*
  * Entry point to split a bio into clones and submit them to the targets.
  */
-static blk_qc_t __split_and_process_bio(struct mapped_device *md,
+static void __split_and_process_bio(struct mapped_device *md,
 					struct dm_table *map, struct bio *bio)
 {
 	struct clone_info ci;
-	blk_qc_t ret = BLK_QC_T_NONE;
 	int error = 0;
 
 	init_clone_info(&ci, md, map, bio);
@@ -1567,19 +1562,17 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 
 			bio_chain(b, bio);
 			trace_block_split(b, bio->bi_iter.bi_sector);
-			ret = submit_bio_noacct(bio);
+			submit_bio_noacct(bio);
 		}
 	}
 
 	/* drop the extra reference count */
 	dm_io_dec_pending(ci.io, errno_to_blk_status(error));
-	return ret;
 }
 
-static blk_qc_t dm_submit_bio(struct bio *bio)
+static void dm_submit_bio(struct bio *bio)
 {
 	struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
-	blk_qc_t ret = BLK_QC_T_NONE;
 	int srcu_idx;
 	struct dm_table *map;
 
@@ -1609,10 +1602,9 @@ static blk_qc_t dm_submit_bio(struct bio *bio)
 	if (is_abnormal_io(bio))
 		blk_queue_split(&bio);
 
-	ret = __split_and_process_bio(md, map, bio);
+	__split_and_process_bio(md, map, bio);
 out:
 	dm_put_live_table(md, srcu_idx);
-	return ret;
 }
 
 /*-----------------------------------------------------------------
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ec09083ff0ef..22310d5d8d41 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -443,19 +443,19 @@ check_suspended:
 }
 EXPORT_SYMBOL(md_handle_request);
 
-static blk_qc_t md_submit_bio(struct bio *bio)
+static void md_submit_bio(struct bio *bio)
 {
 	const int rw = bio_data_dir(bio);
 	struct mddev *mddev = bio->bi_bdev->bd_disk->private_data;
 
 	if (mddev == NULL || mddev->pers == NULL) {
 		bio_io_error(bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
 		bio_io_error(bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	blk_queue_split(&bio);
@@ -464,15 +464,13 @@ static blk_qc_t md_submit_bio(struct bio *bio)
 		if (bio_sectors(bio) != 0)
 			bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	/* bio could be mergeable after passing to underlayer */
 	bio->bi_opf &= ~REQ_NOMERGE;
 
 	md_handle_request(mddev, bio);
-
-	return BLK_QC_T_NONE;
 }
 
 /* mddev_suspend makes sure no new requests are submitted
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 088d3dd6f6fa..b6c6866f9259 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -162,7 +162,7 @@ static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
 	return err;
 }
 
-static blk_qc_t nd_blk_submit_bio(struct bio *bio)
+static void nd_blk_submit_bio(struct bio *bio)
 {
 	struct bio_integrity_payload *bip;
 	struct nd_namespace_blk *nsblk = bio->bi_bdev->bd_disk->private_data;
@@ -173,7 +173,7 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio)
 	bool do_acct;
 
 	if (!bio_integrity_prep(bio))
-		return BLK_QC_T_NONE;
+		return;
 
 	bip = bio_integrity(bio);
 	rw = bio_data_dir(bio);
@@ -199,7 +199,6 @@ static blk_qc_t nd_blk_submit_bio(struct bio *bio)
 		bio_end_io_acct(bio, start);
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 92dec4952297..4295fa809420 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1440,7 +1440,7 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
 	return ret;
 }
 
-static blk_qc_t btt_submit_bio(struct bio *bio)
+static void btt_submit_bio(struct bio *bio)
 {
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct btt *btt = bio->bi_bdev->bd_disk->private_data;
@@ -1451,7 +1451,7 @@ static blk_qc_t btt_submit_bio(struct bio *bio)
 	bool do_acct;
 
 	if (!bio_integrity_prep(bio))
-		return BLK_QC_T_NONE;
+		return;
 
 	do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
 	if (do_acct)
@@ -1483,7 +1483,6 @@ static blk_qc_t btt_submit_bio(struct bio *bio)
 		bio_end_io_acct(bio, start);
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int btt_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ef4950f80832..a67a3ad1d413 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -190,7 +190,7 @@ static blk_status_t pmem_do_write(struct pmem_device *pmem,
 	return rc;
 }
 
-static blk_qc_t pmem_submit_bio(struct bio *bio)
+static void pmem_submit_bio(struct bio *bio)
 {
 	int ret = 0;
 	blk_status_t rc = 0;
@@ -229,7 +229,6 @@ static blk_qc_t pmem_submit_bio(struct bio *bio)
 		bio->bi_status = errno_to_blk_status(ret);
 
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fba06618c6c2..ab78aa5d28c6 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -312,12 +312,11 @@ static bool nvme_available_path(struct nvme_ns_head *head)
 	return false;
 }
 
-static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
+static void nvme_ns_head_submit_bio(struct bio *bio)
 {
 	struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
 	struct device *dev = disk_to_dev(head->disk);
 	struct nvme_ns *ns;
-	blk_qc_t ret = BLK_QC_T_NONE;
 	int srcu_idx;
 
 	/*
@@ -334,7 +333,7 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
 		bio->bi_opf |= REQ_NVME_MPATH;
 		trace_block_bio_remap(bio, disk_devt(ns->head->disk),
 				      bio->bi_iter.bi_sector);
-		ret = submit_bio_noacct(bio);
+		submit_bio_noacct(bio);
 	} else if (nvme_available_path(head)) {
 		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
@@ -349,7 +348,6 @@ static blk_qc_t nvme_ns_head_submit_bio(struct bio *bio)
 	}
 
 	srcu_read_unlock(&head->srcu, srcu_idx);
-	return ret;
 }
 
 static int nvme_ns_head_open(struct block_device *bdev, fmode_t mode)
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 5be3d1c39a78..59e513d34b0f 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -30,7 +30,7 @@
 
 static int dcssblk_open(struct block_device *bdev, fmode_t mode);
 static void dcssblk_release(struct gendisk *disk, fmode_t mode);
-static blk_qc_t dcssblk_submit_bio(struct bio *bio);
+static void dcssblk_submit_bio(struct bio *bio);
 static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn);
 
@@ -854,7 +854,7 @@ dcssblk_release(struct gendisk *disk, fmode_t mode)
 	up_write(&dcssblk_devices_sem);
 }
 
-static blk_qc_t
+static void
 dcssblk_submit_bio(struct bio *bio)
 {
 	struct dcssblk_dev_info *dev_info;
@@ -907,10 +907,9 @@ dcssblk_submit_bio(struct bio *bio)
 		bytes_done += bvec.bv_len;
 	}
 	bio_endio(bio);
-	return BLK_QC_T_NONE;
+	return;
 fail:
 	bio_io_error(bio);
-	return BLK_QC_T_NONE;
 }
 
 static long
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4a9077c52444..04090ba0ef73 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8248,7 +8248,7 @@ static struct btrfs_dio_private *btrfs_create_dio_private(struct bio *dio_bio,
 	return dip;
 }
 
-static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
+static void btrfs_submit_direct(const struct iomap_iter *iter,
 		struct bio *dio_bio, loff_t file_offset)
 {
 	struct inode *inode = iter->inode;
@@ -8278,7 +8278,7 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
 		}
 		dio_bio->bi_status = BLK_STS_RESOURCE;
 		bio_endio(dio_bio);
-		return BLK_QC_T_NONE;
+		return;
 	}
 
 	if (!write) {
@@ -8372,15 +8372,13 @@ static blk_qc_t btrfs_submit_direct(const struct iomap_iter *iter,
 
 		free_extent_map(em);
 	} while (submit_len > 0);
-	return BLK_QC_T_NONE;
+	return;
 
 out_err_em:
 	free_extent_map(em);
 out_err:
 	dip->dio_bio->bi_status = status;
 	btrfs_dio_private_put(dip);
-
-	return BLK_QC_T_NONE;
 }
 
 const struct iomap_ops btrfs_dio_iomap_ops = {
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac0e11bbb445..9c5559faacda 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -915,7 +915,7 @@ const struct file_operations ext4_file_operations = {
 	.llseek		= ext4_llseek,
 	.read_iter	= ext4_file_read_iter,
 	.write_iter	= ext4_file_write_iter,
-	.iopoll		= iomap_dio_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 	.unlocked_ioctl = ext4_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ext4_compat_ioctl,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c559827cb6f9..635f0e3f10ec 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1353,7 +1353,7 @@ const struct file_operations gfs2_file_fops = {
 	.llseek		= gfs2_llseek,
 	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
-	.iopoll		= iomap_dio_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.compat_ioctl	= gfs2_compat_ioctl,
 	.mmap		= gfs2_mmap,
@@ -1386,7 +1386,7 @@ const struct file_operations gfs2_file_fops_nolock = {
 	.llseek		= gfs2_llseek,
 	.read_iter	= gfs2_file_read_iter,
 	.write_iter	= gfs2_file_write_iter,
-	.iopoll		= iomap_dio_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.compat_ioctl	= gfs2_compat_ioctl,
 	.mmap		= gfs2_mmap,
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 236aba256cd1..8efab177011d 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -38,8 +38,7 @@ struct iomap_dio {
 		struct {
 			struct iov_iter		*iter;
 			struct task_struct	*waiter;
-			struct request_queue	*last_queue;
-			blk_qc_t		cookie;
+			struct bio		*poll_bio;
 		} submit;
 
 		/* used for aio completion: */
@@ -49,29 +48,20 @@ struct iomap_dio {
 	};
 };
 
-int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags)
-{
-	struct request_queue *q = READ_ONCE(kiocb->private);
-
-	if (!q)
-		return 0;
-	return blk_poll(q, READ_ONCE(kiocb->ki_cookie), flags);
-}
-EXPORT_SYMBOL_GPL(iomap_dio_iopoll);
-
 static void iomap_dio_submit_bio(const struct iomap_iter *iter,
 		struct iomap_dio *dio, struct bio *bio, loff_t pos)
 {
 	atomic_inc(&dio->ref);
 
-	if (dio->iocb->ki_flags & IOCB_HIPRI)
+	if (dio->iocb->ki_flags & IOCB_HIPRI) {
 		bio_set_polled(bio, dio->iocb);
+		dio->submit.poll_bio = bio;
+	}
 
-	dio->submit.last_queue = bdev_get_queue(iter->iomap.bdev);
 	if (dio->dops && dio->dops->submit_io)
-		dio->submit.cookie = dio->dops->submit_io(iter, bio, pos);
+		dio->dops->submit_io(iter, bio, pos);
 	else
-		dio->submit.cookie = submit_bio(bio);
+		submit_bio(bio);
 }
 
 ssize_t iomap_dio_complete(struct iomap_dio *dio)
@@ -164,9 +154,11 @@ static void iomap_dio_bio_end_io(struct bio *bio)
 		} else if (dio->flags & IOMAP_DIO_WRITE) {
 			struct inode *inode = file_inode(dio->iocb->ki_filp);
 
+			WRITE_ONCE(dio->iocb->private, NULL);
 			INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
 			queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
 		} else {
+			WRITE_ONCE(dio->iocb->private, NULL);
 			iomap_dio_complete_work(&dio->aio.work);
 		}
 	}
@@ -497,8 +489,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	dio->submit.iter = iter;
 	dio->submit.waiter = current;
-	dio->submit.cookie = BLK_QC_T_NONE;
-	dio->submit.last_queue = NULL;
+	dio->submit.poll_bio = NULL;
 
 	if (iov_iter_rw(iter) == READ) {
 		if (iomi.pos >= dio->i_size)
@@ -611,8 +602,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (dio->flags & IOMAP_DIO_WRITE_FUA)
 		dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
-	WRITE_ONCE(iocb->ki_cookie, dio->submit.cookie);
-	WRITE_ONCE(iocb->private, dio->submit.last_queue);
+	WRITE_ONCE(iocb->private, dio->submit.poll_bio);
 
 	/*
 	 * We are about to drop our additional submission reference, which
@@ -639,10 +629,8 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			if (!READ_ONCE(dio->submit.waiter))
 				break;
 
-			if (!(iocb->ki_flags & IOCB_HIPRI) ||
-			    !dio->submit.last_queue ||
-			    !blk_poll(dio->submit.last_queue,
-					 dio->submit.cookie, 0))
+			if (!dio->submit.poll_bio ||
+			    !bio_poll(dio->submit.poll_bio, 0))
 				blk_io_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7aa943edfc02..62e7fbe4e54c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1452,7 +1452,7 @@ const struct file_operations xfs_file_operations = {
 	.write_iter	= xfs_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-	.iopoll		= iomap_dio_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 	.unlocked_ioctl	= xfs_file_ioctl,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= xfs_file_compat_ioctl,
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index ddc346a9df9b..3ce5f47338cb 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -1128,7 +1128,7 @@ static const struct file_operations zonefs_file_operations = {
 	.write_iter	= zonefs_file_write_iter,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-	.iopoll		= iomap_dio_iopoll,
+	.iopoll		= iocb_bio_iopoll,
 };
 
 static struct kmem_cache *zonefs_inode_cachep;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c7a2d880e927..62d684b7dd4c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -349,7 +349,7 @@ static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned short nr_iovecs)
 	return bio_alloc_bioset(gfp_mask, nr_iovecs, &fs_bio_set);
 }
 
-extern blk_qc_t submit_bio(struct bio *);
+void submit_bio(struct bio *bio);
 
 extern void bio_endio(struct bio *);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2219e9277118..a9c1d0882550 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -359,9 +359,9 @@ struct blk_mq_hw_ctx {
 	/** @kobj: Kernel object for sysfs. */
 	struct kobject		kobj;
 
-	/** @poll_considered: Count times blk_poll() was called. */
+	/** @poll_considered: Count times blk_mq_poll() was called. */
 	unsigned long		poll_considered;
-	/** @poll_invoked: Count how many requests blk_poll() polled. */
+	/** @poll_invoked: Count how many requests blk_mq_poll() polled. */
 	unsigned long		poll_invoked;
 	/** @poll_success: Count how many polled requests were completed. */
 	unsigned long		poll_success;
@@ -815,16 +815,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
 	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
 	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
 
-static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx,
-		struct request *rq)
-{
-	if (rq->tag != -1)
-		return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT);
-
-	return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) |
-			BLK_QC_T_INTERNAL;
-}
-
 static inline void blk_mq_cleanup_rq(struct request *rq)
 {
 	if (rq->q->mq_ops->cleanup_rq)
@@ -843,7 +833,6 @@ static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio,
 		rq->rq_disk = bio->bi_bdev->bd_disk;
 }
 
-blk_qc_t blk_mq_submit_bio(struct bio *bio);
 void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx,
 		struct lock_class_key *key);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f8b9fce68834..72736b4c057c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -208,6 +208,9 @@ static inline void bio_issue_init(struct bio_issue *issue,
 			((u64)size << BIO_ISSUE_SIZE_SHIFT));
 }
 
+typedef unsigned int blk_qc_t;
+#define BLK_QC_T_NONE		-1U
+
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
@@ -227,8 +230,8 @@ struct bio {
 
 	struct bvec_iter	bi_iter;
 
+	blk_qc_t		bi_cookie;
 	bio_end_io_t		*bi_end_io;
-
 	void			*bi_private;
 #ifdef CONFIG_BLK_CGROUP
 	/*
@@ -384,7 +387,7 @@ enum req_flag_bits {
 	/* command specific flags for REQ_OP_WRITE_ZEROES: */
 	__REQ_NOUNMAP,		/* do not free blocks when zeroing */
 
-	__REQ_POLLED,		/* caller polls for completion using blk_poll */
+	__REQ_POLLED,		/* caller polls for completion using bio_poll */
 
 	/* for driver use */
 	__REQ_DRV,
@@ -495,11 +498,6 @@ static inline int op_stat_group(unsigned int op)
 	return op_is_write(op);
 }
 
-typedef unsigned int blk_qc_t;
-#define BLK_QC_T_NONE		-1U
-#define BLK_QC_T_SHIFT		16
-#define BLK_QC_T_INTERNAL	(1U << 31)
-
 struct blk_rq_stat {
 	u64 mean;
 	u64 min;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2b80c98fc373..2a8689e949b4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@ struct request;
 struct sg_io_hdr;
 struct blkcg_gq;
 struct blk_flush_queue;
+struct kiocb;
 struct pr_ops;
 struct rq_qos;
 struct blk_queue_stats;
@@ -550,7 +551,7 @@ static inline unsigned int blk_queue_depth(struct request_queue *q)
 
 extern int blk_register_queue(struct gendisk *disk);
 extern void blk_unregister_queue(struct gendisk *disk);
-blk_qc_t submit_bio_noacct(struct bio *bio);
+void submit_bio_noacct(struct bio *bio);
 
 extern int blk_lld_busy(struct request_queue *q);
 extern void blk_queue_split(struct bio **);
@@ -568,7 +569,8 @@ blk_status_t errno_to_blk_status(int errno);
 #define BLK_POLL_ONESHOT		(1 << 0)
 /* do not sleep to wait for the expected completion time */
 #define BLK_POLL_NOSLEEP		(1 << 1)
-int blk_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
+int bio_poll(struct bio *bio, unsigned int flags);
+int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
@@ -1176,7 +1178,7 @@ static inline void blk_ksm_unregister(struct request_queue *q) { }
 
 
 struct block_device_operations {
-	blk_qc_t (*submit_bio) (struct bio *bio);
+	void (*submit_bio)(struct bio *bio);
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
 	int (*rw_page)(struct block_device *, sector_t, struct page *, unsigned int);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c443cddf414f..f595f4097cb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -334,11 +334,7 @@ struct kiocb {
 	int			ki_flags;
 	u16			ki_hint;
 	u16			ki_ioprio; /* See linux/ioprio.h */
-	union {
-		unsigned int		ki_cookie; /* for ->iopoll */
-		struct wait_page_queue	*ki_waitq; /* for async buffered IO */
-	};
-
+	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
 	randomized_struct_fields_end
 };
 
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 1e86b65567c2..63f4ea4dac9b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -313,8 +313,8 @@ int iomap_writepages(struct address_space *mapping,
 struct iomap_dio_ops {
 	int (*end_io)(struct kiocb *iocb, ssize_t size, int error,
 		      unsigned flags);
-	blk_qc_t (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
-			      loff_t file_offset);
+	void (*submit_io)(const struct iomap_iter *iter, struct bio *bio,
+		          loff_t file_offset);
 };
 
 /*
@@ -337,7 +337,6 @@ struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags);
 ssize_t iomap_dio_complete(struct iomap_dio *dio);
-int iomap_dio_iopoll(struct kiocb *kiocb, unsigned int flags);
 
 #ifdef CONFIG_SWAP
 struct file;
diff --git a/mm/page_io.c b/mm/page_io.c
index ed2eded74f3a..a68faab5b310 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -358,8 +358,6 @@ int swap_readpage(struct page *page, bool synchronous)
 	struct bio *bio;
 	int ret = 0;
 	struct swap_info_struct *sis = page_swap_info(page);
-	blk_qc_t qc;
-	struct gendisk *disk;
 	unsigned long pflags;
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page) && !synchronous, page);
@@ -409,8 +407,6 @@ int swap_readpage(struct page *page, bool synchronous)
 	bio->bi_iter.bi_sector = swap_page_sector(page);
 	bio->bi_end_io = end_swap_bio_read;
 	bio_add_page(bio, page, thp_size(page), 0);
-
-	disk = bio->bi_bdev->bd_disk;
 	/*
 	 * Keep this task valid during swap readpage because the oom killer may
 	 * attempt to access it in the page fault retry time check.
@@ -422,13 +418,13 @@ int swap_readpage(struct page *page, bool synchronous)
 	}
 	count_vm_event(PSWPIN);
 	bio_get(bio);
-	qc = submit_bio(bio);
+	submit_bio(bio);
 	while (synchronous) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!READ_ONCE(bio->bi_private))
 			break;
 
-		if (!blk_poll(disk->queue, qc, 0))
+		if (!bio_poll(bio, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-- 
cgit v1.2.3


From 17220ca5ce9606c1b015c4316fca18734c2df0bb Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 14 Oct 2021 15:03:26 +0100
Subject: block: cache request queue in bdev

There are tons of places where we need to get a request_queue only
having bdev, which turns into bdev->bd_disk->queue. There are probably a
hundred of such places considering inline helpers, and enough of them
are in hot paths.

Cache queue pointer in struct block_device and make use of it in
bdev_get_queue().

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a3bfaecdd28956f03629d0ca5c63ebc096e1c809.1634219547.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c              | 1 +
 block/genhd.c             | 4 +++-
 include/linux/blk_types.h | 1 +
 include/linux/blkdev.h    | 2 +-
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index 93b1188d7e58..fed8d0c041c7 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -493,6 +493,7 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	spin_lock_init(&bdev->bd_size_lock);
 	bdev->bd_partno = partno;
 	bdev->bd_inode = inode;
+	bdev->bd_queue = disk->queue;
 	bdev->bd_stats = alloc_percpu(struct disk_stats);
 	if (!bdev->bd_stats) {
 		iput(inode);
diff --git a/block/genhd.c b/block/genhd.c
index ffbdb9b24555..d148c38450d7 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1267,6 +1267,9 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	if (!disk->bdi)
 		goto out_free_disk;
 
+	/* bdev_alloc() might need the queue, set before the first call */
+	disk->queue = q;
+
 	disk->part0 = bdev_alloc(disk, 0);
 	if (!disk->part0)
 		goto out_free_bdi;
@@ -1282,7 +1285,6 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	disk_to_dev(disk)->type = &disk_type;
 	device_initialize(disk_to_dev(disk));
 	inc_diskseq(disk);
-	disk->queue = q;
 	q->disk = disk;
 	lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0);
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 72736b4c057c..1e370929c89e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -38,6 +38,7 @@ struct block_device {
 	u8			bd_partno;
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct gendisk *	bd_disk;
+	struct request_queue *	bd_queue;
 
 	/* The counter of freeze processes */
 	int			bd_fsfreeze_count;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2a8689e949b4..d5b21fc8f49e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -574,7 +574,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-	return bdev->bd_disk->queue;	/* this is never NULL */
+	return bdev->bd_queue;	/* this is never NULL */
 }
 
 /*
-- 
cgit v1.2.3


From b5f9c644eb1baafcd349ad134e2110773f8d0a38 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Mon, 4 Oct 2021 14:59:35 +0200
Subject: PCI: Remove struct pci_dev->driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are no remaining uses of the struct pci_dev->driver pointer, so
remove it.

Link: https://lore.kernel.org/r/20211004125935.2300113-12-u.kleine-koenig@pengutronix.de
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-driver.c | 4 ----
 include/linux/pci.h      | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index b919c5361694..3da121cb8cbf 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -319,12 +319,10 @@ static long local_pci_probe(void *_ddi)
 	 * its remove routine.
 	 */
 	pm_runtime_get_sync(dev);
-	pci_dev->driver = pci_drv;
 	rc = pci_drv->probe(pci_dev, ddi->id);
 	if (!rc)
 		return rc;
 	if (rc < 0) {
-		pci_dev->driver = NULL;
 		pm_runtime_put_sync(dev);
 		return rc;
 	}
@@ -390,7 +388,6 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
  * @pci_dev: PCI device being probed
  *
  * returns 0 on success, else error.
- * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
  */
 static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
 {
@@ -465,7 +462,6 @@ static void pci_device_remove(struct device *dev)
 		pm_runtime_put_noidle(dev);
 	}
 	pcibios_free_irq(pci_dev);
-	pci_dev->driver = NULL;
 	pci_iov_remove(pci_dev);
 
 	/* Undo the runtime PM settings in local_pci_probe() */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a2d62165a7f7..03bfdb25a55c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -342,7 +342,6 @@ struct pci_dev {
 	u16		pcie_flags_reg;	/* Cached PCIe Capabilities Register */
 	unsigned long	*dma_alias_mask;/* Mask of enabled devfn aliases */
 
-	struct pci_driver *driver;	/* Driver bound to this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
 					   device implements.  Normally this is
 					   0xffffffff.  You only need to change
-- 
cgit v1.2.3


From bb523b406c849eef8f265a07cd7f320f1f177743 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 2 Aug 2021 13:44:20 +0200
Subject: gup: Turn fault_in_pages_{readable,writeable} into
 fault_in_{readable,writeable}

Turn fault_in_pages_{readable,writeable} into versions that return the
number of bytes not faulted in, similar to copy_to_user, instead of
returning a non-zero value when any of the requested pages couldn't be
faulted in.  This supports the existing users that require all pages to
be faulted in as well as new users that are happy if any pages can be
faulted in.

Rename the functions to fault_in_{readable,writeable} to make sure
this change doesn't silently break things.

Neither of these functions is entirely trivial and it doesn't seem
useful to inline them, so move them to mm/gup.c.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 arch/powerpc/kernel/kvm.c           |  3 +-
 arch/powerpc/kernel/signal_32.c     |  4 +--
 arch/powerpc/kernel/signal_64.c     |  2 +-
 arch/x86/kernel/fpu/signal.c        |  7 ++--
 drivers/gpu/drm/armada/armada_gem.c |  7 ++--
 fs/btrfs/ioctl.c                    |  5 ++-
 include/linux/pagemap.h             | 57 ++---------------------------
 lib/iov_iter.c                      | 10 +++---
 mm/filemap.c                        |  2 +-
 mm/gup.c                            | 72 +++++++++++++++++++++++++++++++++++++
 10 files changed, 93 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index d89cf802d9aa..6568823cf306 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -669,7 +669,8 @@ static void __init kvm_use_magic_page(void)
 	on_each_cpu(kvm_map_magic_page, &features, 1);
 
 	/* Quick self-test to see if the mapping works */
-	if (fault_in_pages_readable((const char *)KVM_MAGIC_PAGE, sizeof(u32))) {
+	if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
+			      sizeof(u32))) {
 		kvm_patching_worked = false;
 		return;
 	}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 0608581967f0..38c3eae40c14 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1048,7 +1048,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	if (new_ctx == NULL)
 		return 0;
 	if (!access_ok(new_ctx, ctx_size) ||
-	    fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
+	    fault_in_readable((char __user *)new_ctx, ctx_size))
 		return -EFAULT;
 
 	/*
@@ -1237,7 +1237,7 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
 #endif
 
 	if (!access_ok(ctx, sizeof(*ctx)) ||
-	    fault_in_pages_readable((u8 __user *)ctx, sizeof(*ctx)))
+	    fault_in_readable((char __user *)ctx, sizeof(*ctx)))
 		return -EFAULT;
 
 	/*
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1831bba0582e..9f471b4a11e3 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -688,7 +688,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	if (new_ctx == NULL)
 		return 0;
 	if (!access_ok(new_ctx, ctx_size) ||
-	    fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
+	    fault_in_readable((char __user *)new_ctx, ctx_size))
 		return -EFAULT;
 
 	/*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index fa17a27390ab..164c96434704 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -205,7 +205,7 @@ retry:
 	fpregs_unlock();
 
 	if (ret) {
-		if (!fault_in_pages_writeable(buf_fx, fpu_user_xstate_size))
+		if (!fault_in_writeable(buf_fx, fpu_user_xstate_size))
 			goto retry;
 		return -EFAULT;
 	}
@@ -278,10 +278,9 @@ retry:
 		if (ret != -EFAULT)
 			return -EINVAL;
 
-		ret = fault_in_pages_readable(buf, size);
-		if (!ret)
+		if (!fault_in_readable(buf, size))
 			goto retry;
-		return ret;
+		return -EFAULT;
 	}
 
 	/*
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 21909642ee4c..8fbb25913327 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -336,7 +336,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	struct drm_armada_gem_pwrite *args = data;
 	struct armada_gem_object *dobj;
 	char __user *ptr;
-	int ret;
+	int ret = 0;
 
 	DRM_DEBUG_DRIVER("handle %u off %u size %u ptr 0x%llx\n",
 		args->handle, args->offset, args->size, args->ptr);
@@ -349,9 +349,8 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (!access_ok(ptr, args->size))
 		return -EFAULT;
 
-	ret = fault_in_pages_readable(ptr, args->size);
-	if (ret)
-		return ret;
+	if (fault_in_readable(ptr, args->size))
+		return -EFAULT;
 
 	dobj = armada_gem_object_lookup(file, args->handle);
 	if (dobj == NULL)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cc61813213d8..c0739f0af634 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2261,9 +2261,8 @@ static noinline int search_ioctl(struct inode *inode,
 	key.offset = sk->min_offset;
 
 	while (1) {
-		ret = fault_in_pages_writeable(ubuf + sk_offset,
-					       *buf_size - sk_offset);
-		if (ret)
+		ret = -EFAULT;
+		if (fault_in_writeable(ubuf + sk_offset, *buf_size - sk_offset))
 			break;
 
 		ret = btrfs_search_forward(root, &key, path, sk->min_transid);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..9fe94f7a4f7e 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -733,61 +733,10 @@ int wait_on_page_private_2_killable(struct page *page);
 extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
 
 /*
- * Fault everything in given userspace address range in.
+ * Fault in userspace address range.
  */
-static inline int fault_in_pages_writeable(char __user *uaddr, size_t size)
-{
-	char __user *end = uaddr + size - 1;
-
-	if (unlikely(size == 0))
-		return 0;
-
-	if (unlikely(uaddr > end))
-		return -EFAULT;
-	/*
-	 * Writing zeroes into userspace here is OK, because we know that if
-	 * the zero gets there, we'll be overwriting it.
-	 */
-	do {
-		if (unlikely(__put_user(0, uaddr) != 0))
-			return -EFAULT;
-		uaddr += PAGE_SIZE;
-	} while (uaddr <= end);
-
-	/* Check whether the range spilled into the next page. */
-	if (((unsigned long)uaddr & PAGE_MASK) ==
-			((unsigned long)end & PAGE_MASK))
-		return __put_user(0, end);
-
-	return 0;
-}
-
-static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
-{
-	volatile char c;
-	const char __user *end = uaddr + size - 1;
-
-	if (unlikely(size == 0))
-		return 0;
-
-	if (unlikely(uaddr > end))
-		return -EFAULT;
-
-	do {
-		if (unlikely(__get_user(c, uaddr) != 0))
-			return -EFAULT;
-		uaddr += PAGE_SIZE;
-	} while (uaddr <= end);
-
-	/* Check whether the range spilled into the next page. */
-	if (((unsigned long)uaddr & PAGE_MASK) ==
-			((unsigned long)end & PAGE_MASK)) {
-		return __get_user(c, end);
-	}
-
-	(void)c;
-	return 0;
-}
+size_t fault_in_writeable(char __user *uaddr, size_t size);
+size_t fault_in_readable(const char __user *uaddr, size_t size);
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 				pgoff_t index, gfp_t gfp_mask);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 60b5e6edfbaa..c88908f0f138 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -191,7 +191,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b
 	buf = iov->iov_base + skip;
 	copy = min(bytes, iov->iov_len - skip);
 
-	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
+	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
 		kaddr = kmap_atomic(page);
 		from = kaddr + offset;
 
@@ -275,7 +275,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t
 	buf = iov->iov_base + skip;
 	copy = min(bytes, iov->iov_len - skip);
 
-	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
+	if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
 		kaddr = kmap_atomic(page);
 		to = kaddr + offset;
 
@@ -446,13 +446,11 @@ int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
 			bytes = i->count;
 		for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
 			size_t len = min(bytes, p->iov_len - skip);
-			int err;
 
 			if (unlikely(!len))
 				continue;
-			err = fault_in_pages_readable(p->iov_base + skip, len);
-			if (unlikely(err))
-				return err;
+			if (fault_in_readable(p->iov_base + skip, len))
+				return -EFAULT;
 			bytes -= len;
 		}
 	}
diff --git a/mm/filemap.c b/mm/filemap.c
index dae481293b5d..ff34f4087f87 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -90,7 +90,7 @@
  *      ->lock_page		(filemap_fault, access_process_vm)
  *
  *  ->i_rwsem			(generic_perform_write)
- *    ->mmap_lock		(fault_in_pages_readable->do_page_fault)
+ *    ->mmap_lock		(fault_in_readable->do_page_fault)
  *
  *  bdi->wb.list_lock
  *    sb_lock			(fs/fs-writeback.c)
diff --git a/mm/gup.c b/mm/gup.c
index 886d6148d3d0..a7efb027d6cf 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1656,6 +1656,78 @@ finish_or_fault:
 }
 #endif /* !CONFIG_MMU */
 
+/**
+ * fault_in_writeable - fault in userspace address range for writing
+ * @uaddr: start of address range
+ * @size: size of address range
+ *
+ * Returns the number of bytes not faulted in (like copy_to_user() and
+ * copy_from_user()).
+ */
+size_t fault_in_writeable(char __user *uaddr, size_t size)
+{
+	char __user *start = uaddr, *end;
+
+	if (unlikely(size == 0))
+		return 0;
+	if (!PAGE_ALIGNED(uaddr)) {
+		if (unlikely(__put_user(0, uaddr) != 0))
+			return size;
+		uaddr = (char __user *)PAGE_ALIGN((unsigned long)uaddr);
+	}
+	end = (char __user *)PAGE_ALIGN((unsigned long)start + size);
+	if (unlikely(end < start))
+		end = NULL;
+	while (uaddr != end) {
+		if (unlikely(__put_user(0, uaddr) != 0))
+			goto out;
+		uaddr += PAGE_SIZE;
+	}
+
+out:
+	if (size > uaddr - start)
+		return size - (uaddr - start);
+	return 0;
+}
+EXPORT_SYMBOL(fault_in_writeable);
+
+/**
+ * fault_in_readable - fault in userspace address range for reading
+ * @uaddr: start of user address range
+ * @size: size of user address range
+ *
+ * Returns the number of bytes not faulted in (like copy_to_user() and
+ * copy_from_user()).
+ */
+size_t fault_in_readable(const char __user *uaddr, size_t size)
+{
+	const char __user *start = uaddr, *end;
+	volatile char c;
+
+	if (unlikely(size == 0))
+		return 0;
+	if (!PAGE_ALIGNED(uaddr)) {
+		if (unlikely(__get_user(c, uaddr) != 0))
+			return size;
+		uaddr = (const char __user *)PAGE_ALIGN((unsigned long)uaddr);
+	}
+	end = (const char __user *)PAGE_ALIGN((unsigned long)start + size);
+	if (unlikely(end < start))
+		end = NULL;
+	while (uaddr != end) {
+		if (unlikely(__get_user(c, uaddr) != 0))
+			goto out;
+		uaddr += PAGE_SIZE;
+	}
+
+out:
+	(void)c;
+	if (size > uaddr - start)
+		return size - (uaddr - start);
+	return 0;
+}
+EXPORT_SYMBOL(fault_in_readable);
+
 /**
  * get_dump_page() - pin user page in memory while writing it to core dump
  * @addr: user address
-- 
cgit v1.2.3


From a6294593e8a1290091d0b078d5d33da5e0cd3dfe Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 2 Aug 2021 14:54:16 +0200
Subject: iov_iter: Turn iov_iter_fault_in_readable into
 fault_in_iov_iter_readable

Turn iov_iter_fault_in_readable into a function that returns the number
of bytes not faulted in, similar to copy_to_user, instead of returning a
non-zero value when any of the requested pages couldn't be faulted in.
This supports the existing users that require all pages to be faulted in
as well as new users that are happy if any pages can be faulted in.

Rename iov_iter_fault_in_readable to fault_in_iov_iter_readable to make
sure this change doesn't silently break things.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/btrfs/file.c        |  2 +-
 fs/f2fs/file.c         |  2 +-
 fs/fuse/file.c         |  2 +-
 fs/iomap/buffered-io.c |  2 +-
 fs/ntfs/file.c         |  2 +-
 fs/ntfs3/file.c        |  2 +-
 include/linux/uio.h    |  2 +-
 lib/iov_iter.c         | 33 +++++++++++++++++++++------------
 mm/filemap.c           |  2 +-
 9 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7ff577005d0f..f37211d3bb69 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1710,7 +1710,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 		 * Fault pages before locking them in prepare_pages
 		 * to avoid recursive lock
 		 */
-		if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
+		if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
 			ret = -EFAULT;
 			break;
 		}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 9c8ef33bd8d3..eb971e1e7227 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -4276,7 +4276,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		size_t target_size = 0;
 		int err;
 
-		if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
+		if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
 			set_inode_flag(inode, FI_NO_PREALLOC);
 
 		if ((iocb->ki_flags & IOCB_NOWAIT)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 11404f8c21c7..4b6d8e13322d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1164,7 +1164,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
 
  again:
 		err = -EFAULT;
-		if (iov_iter_fault_in_readable(ii, bytes))
+		if (fault_in_iov_iter_readable(ii, bytes))
 			break;
 
 		err = -ENOMEM;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 9cc5798423d1..1753c26c8e76 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -750,7 +750,7 @@ again:
 		 * same page as we're writing to, without it being marked
 		 * up-to-date.
 		 */
-		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+		if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
 			status = -EFAULT;
 			break;
 		}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index ab4f3362466d..a43adeacd930 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1829,7 +1829,7 @@ again:
 		 * pages being swapped out between us bringing them into memory
 		 * and doing the actual copying.
 		 */
-		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+		if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
 			status = -EFAULT;
 			break;
 		}
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 424450e77ad5..a52388387175 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -987,7 +987,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
 		frame_vbo = pos & ~(frame_size - 1);
 		index = frame_vbo >> PAGE_SHIFT;
 
-		if (unlikely(iov_iter_fault_in_readable(from, bytes))) {
+		if (unlikely(fault_in_iov_iter_readable(from, bytes))) {
 			err = -EFAULT;
 			goto out;
 		}
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 207101a9c5c3..d18458af6681 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -133,7 +133,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
 				  size_t bytes, struct iov_iter *i);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes);
+size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c88908f0f138..ce3d4f610626 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -430,33 +430,42 @@ out:
 }
 
 /*
+ * fault_in_iov_iter_readable - fault in iov iterator for reading
+ * @i: iterator
+ * @size: maximum length
+ *
  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
- * bytes.  For each iovec, fault in each page that constitutes the iovec.
+ * @size.  For each iovec, fault in each page that constitutes the iovec.
+ *
+ * Returns the number of bytes not faulted in (like copy_to_user() and
+ * copy_from_user()).
  *
- * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
- * because it is an invalid address).
+ * Always returns 0 for non-userspace iterators.
  */
-int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
+size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
 {
 	if (iter_is_iovec(i)) {
+		size_t count = min(size, iov_iter_count(i));
 		const struct iovec *p;
 		size_t skip;
 
-		if (bytes > i->count)
-			bytes = i->count;
-		for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
-			size_t len = min(bytes, p->iov_len - skip);
+		size -= count;
+		for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
+			size_t len = min(count, p->iov_len - skip);
+			size_t ret;
 
 			if (unlikely(!len))
 				continue;
-			if (fault_in_readable(p->iov_base + skip, len))
-				return -EFAULT;
-			bytes -= len;
+			ret = fault_in_readable(p->iov_base + skip, len);
+			count -= len - ret;
+			if (ret)
+				break;
 		}
+		return count + size;
 	}
 	return 0;
 }
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
+EXPORT_SYMBOL(fault_in_iov_iter_readable);
 
 void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
diff --git a/mm/filemap.c b/mm/filemap.c
index ff34f4087f87..4dd5edcd39fd 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3757,7 +3757,7 @@ again:
 		 * same page as we're writing to, without it being marked
 		 * up-to-date.
 		 */
-		if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+		if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
 			status = -EFAULT;
 			break;
 		}
-- 
cgit v1.2.3


From d4aa57a1cac3c99ffd641f7c8e0a7aff5656de0d Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 13 Oct 2021 09:01:43 -0600
Subject: block: don't bother iter advancing a fully done bio

If we're completing nbytes and nbytes is the size of the bio, don't bother
with calling into the iterator increment helpers. Just clear the bio
size and we're done.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 15 ++-------------
 include/linux/bio.h | 24 ++++++++++++++++++++++--
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 5fb8092577bf..4f397ba47db5 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1278,18 +1278,7 @@ int submit_bio_wait(struct bio *bio)
 }
 EXPORT_SYMBOL(submit_bio_wait);
 
-/**
- * bio_advance - increment/complete a bio by some number of bytes
- * @bio:	bio to advance
- * @bytes:	number of bytes to complete
- *
- * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
- * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
- * be updated on the last bvec as well.
- *
- * @bio will then represent the remaining, uncompleted portion of the io.
- */
-void bio_advance(struct bio *bio, unsigned bytes)
+void __bio_advance(struct bio *bio, unsigned bytes)
 {
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, bytes);
@@ -1297,7 +1286,7 @@ void bio_advance(struct bio *bio, unsigned bytes)
 	bio_crypt_advance(bio, bytes);
 	bio_advance_iter(bio, &bio->bi_iter, bytes);
 }
-EXPORT_SYMBOL(bio_advance);
+EXPORT_SYMBOL(__bio_advance);
 
 void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
 			struct bio *src, struct bvec_iter *src_iter)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 62d684b7dd4c..9538f20ffaa5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -119,6 +119,28 @@ static inline void bio_advance_iter_single(const struct bio *bio,
 		bvec_iter_advance_single(bio->bi_io_vec, iter, bytes);
 }
 
+void __bio_advance(struct bio *, unsigned bytes);
+
+/**
+ * bio_advance - increment/complete a bio by some number of bytes
+ * @bio:	bio to advance
+ * @bytes:	number of bytes to complete
+ *
+ * This updates bi_sector, bi_size and bi_idx; if the number of bytes to
+ * complete doesn't align with a bvec boundary, then bv_len and bv_offset will
+ * be updated on the last bvec as well.
+ *
+ * @bio will then represent the remaining, uncompleted portion of the io.
+ */
+static inline void bio_advance(struct bio *bio, unsigned int nbytes)
+{
+	if (nbytes == bio->bi_iter.bi_size) {
+		bio->bi_iter.bi_size = 0;
+		return;
+	}
+	__bio_advance(bio, nbytes);
+}
+
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
@@ -381,8 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs)
 struct request_queue;
 
 extern int submit_bio_wait(struct bio *bio);
-extern void bio_advance(struct bio *, unsigned);
-
 extern void bio_init(struct bio *bio, struct bio_vec *table,
 		     unsigned short max_vecs);
 extern void bio_uninit(struct bio *);
-- 
cgit v1.2.3


From b60876296847e6cd7f1da4b8b7f0f31399d59aa1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 15 Oct 2021 15:03:52 -0600
Subject: block: improve layout of struct request

It's been a while since this was analyzed, move some members around to
better flow with the use case. Initial state up top, and queued state
after that. This improves my peak case by about 1.5%, from 7750K to
7900K IOPS.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 90 ++++++++++++++++++++++++++------------------------
 1 file changed, 46 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a9c1d0882550..8ca9728cc7f2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -83,6 +83,8 @@ struct request {
 	int tag;
 	int internal_tag;
 
+	unsigned int timeout;
+
 	/* the following two fields are internal, NEVER access directly */
 	unsigned int __data_len;	/* total data len */
 	sector_t __sector;		/* sector cursor */
@@ -95,49 +97,6 @@ struct request {
 		struct request *rq_next;
 	};
 
-	/*
-	 * The hash is used inside the scheduler, and killed once the
-	 * request reaches the dispatch list. The ipi_list is only used
-	 * to queue the request for softirq completion, which is long
-	 * after the request has been unhashed (and even removed from
-	 * the dispatch list).
-	 */
-	union {
-		struct hlist_node hash;	/* merge hash */
-		struct llist_node ipi_list;
-	};
-
-	/*
-	 * The rb_node is only used inside the io scheduler, requests
-	 * are pruned when moved to the dispatch queue. So let the
-	 * completion_data share space with the rb_node.
-	 */
-	union {
-		struct rb_node rb_node;	/* sort/lookup */
-		struct bio_vec special_vec;
-		void *completion_data;
-		int error_count; /* for legacy drivers, don't use */
-	};
-
-	/*
-	 * Three pointers are available for the IO schedulers, if they need
-	 * more they have to dynamically allocate it.  Flush requests are
-	 * never put on the IO scheduler. So let the flush fields share
-	 * space with the elevator data.
-	 */
-	union {
-		struct {
-			struct io_cq		*icq;
-			void			*priv[2];
-		} elv;
-
-		struct {
-			unsigned int		seq;
-			struct list_head	list;
-			rq_end_io_fn		*saved_end_io;
-		} flush;
-	};
-
 	struct gendisk *rq_disk;
 	struct block_device *part;
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -180,9 +139,52 @@ struct request {
 	enum mq_rq_state state;
 	refcount_t ref;
 
-	unsigned int timeout;
 	unsigned long deadline;
 
+	/*
+	 * The hash is used inside the scheduler, and killed once the
+	 * request reaches the dispatch list. The ipi_list is only used
+	 * to queue the request for softirq completion, which is long
+	 * after the request has been unhashed (and even removed from
+	 * the dispatch list).
+	 */
+	union {
+		struct hlist_node hash;	/* merge hash */
+		struct llist_node ipi_list;
+	};
+
+	/*
+	 * The rb_node is only used inside the io scheduler, requests
+	 * are pruned when moved to the dispatch queue. So let the
+	 * completion_data share space with the rb_node.
+	 */
+	union {
+		struct rb_node rb_node;	/* sort/lookup */
+		struct bio_vec special_vec;
+		void *completion_data;
+		int error_count; /* for legacy drivers, don't use */
+	};
+
+
+	/*
+	 * Three pointers are available for the IO schedulers, if they need
+	 * more they have to dynamically allocate it.  Flush requests are
+	 * never put on the IO scheduler. So let the flush fields share
+	 * space with the elevator data.
+	 */
+	union {
+		struct {
+			struct io_cq		*icq;
+			void			*priv[2];
+		} elv;
+
+		struct {
+			unsigned int		seq;
+			struct list_head	list;
+			rq_end_io_fn		*saved_end_io;
+		} flush;
+	};
+
 	union {
 		struct __call_single_data csd;
 		u64 fifo_time;
-- 
cgit v1.2.3


From 2ff0682da6e09c1e0db63a2d2abcd4efb531c8db Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 15 Oct 2021 09:44:38 -0600
Subject: block: store elevator state in request

Add an rq private RQF_ELV flag, which tells the block layer that this
request was initialized on a queue that has an IO scheduler attached.
This allows for faster checking in the fast path, rather than having to
deference rq->q later on.

Elevator switching does full quiesce of the queue before detaching an
IO scheduler, so it's safe to cache this in the request itself.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.h   | 27 ++++++++++++++++-----------
 block/blk-mq.c         | 20 +++++++++++---------
 include/linux/blk-mq.h |  2 ++
 3 files changed, 29 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index fe252278ed9a..98836106b25f 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -56,29 +56,34 @@ static inline bool
 blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 			 struct bio *bio)
 {
-	struct elevator_queue *e = q->elevator;
-
-	if (e && e->type->ops.allow_merge)
-		return e->type->ops.allow_merge(q, rq, bio);
+	if (rq->rq_flags & RQF_ELV) {
+		struct elevator_queue *e = q->elevator;
 
+		if (e->type->ops.allow_merge)
+			return e->type->ops.allow_merge(q, rq, bio);
+	}
 	return true;
 }
 
 static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 {
-	struct elevator_queue *e = rq->q->elevator;
+	if (rq->rq_flags & RQF_ELV) {
+		struct elevator_queue *e = rq->q->elevator;
 
-	if (e && e->type->ops.completed_request)
-		e->type->ops.completed_request(rq, now);
+		if (e->type->ops.completed_request)
+			e->type->ops.completed_request(rq, now);
+	}
 }
 
 static inline void blk_mq_sched_requeue_request(struct request *rq)
 {
-	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
+	if (rq->rq_flags & RQF_ELV) {
+		struct request_queue *q = rq->q;
+		struct elevator_queue *e = q->elevator;
 
-	if ((rq->rq_flags & RQF_ELVPRIV) && e && e->type->ops.requeue_request)
-		e->type->ops.requeue_request(rq);
+		if ((rq->rq_flags & RQF_ELVPRIV) && e->type->ops.requeue_request)
+			e->type->ops.requeue_request(rq);
+	}
 }
 
 static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9cff9e8eada4..28eb1f3c6f76 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -306,7 +306,7 @@ void blk_mq_wake_waiters(struct request_queue *q)
  */
 static inline bool blk_mq_need_time_stamp(struct request *rq)
 {
-	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS)) || rq->q->elevator;
+	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
 }
 
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
@@ -316,9 +316,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	struct request *rq = tags->static_rqs[tag];
 
 	if (data->q->elevator) {
+		rq->rq_flags = RQF_ELV;
 		rq->tag = BLK_MQ_NO_TAG;
 		rq->internal_tag = tag;
 	} else {
+		rq->rq_flags = 0;
 		rq->tag = tag;
 		rq->internal_tag = BLK_MQ_NO_TAG;
 	}
@@ -327,7 +329,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	rq->q = data->q;
 	rq->mq_ctx = data->ctx;
 	rq->mq_hctx = data->hctx;
-	rq->rq_flags = 0;
 	rq->cmd_flags = data->cmd_flags;
 	if (data->flags & BLK_MQ_REQ_PM)
 		rq->rq_flags |= RQF_PM;
@@ -363,11 +364,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 	data->ctx->rq_dispatched[op_is_sync(data->cmd_flags)]++;
 	refcount_set(&rq->ref, 1);
 
-	if (!op_is_flush(data->cmd_flags)) {
+	if (!op_is_flush(data->cmd_flags) && (rq->rq_flags & RQF_ELV)) {
 		struct elevator_queue *e = data->q->elevator;
 
 		rq->elv.icq = NULL;
-		if (e && e->type->ops.prepare_request) {
+		if (e->type->ops.prepare_request) {
 			if (e->type->icq_cache)
 				blk_mq_sched_assign_ioc(rq);
 
@@ -588,12 +589,13 @@ static void __blk_mq_free_request(struct request *rq)
 void blk_mq_free_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
-	struct elevator_queue *e = q->elevator;
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
 
-	if (rq->rq_flags & RQF_ELVPRIV) {
-		if (e && e->type->ops.finish_request)
+	if (rq->rq_flags & (RQF_ELVPRIV | RQF_ELV)) {
+		struct elevator_queue *e = q->elevator;
+
+		if (e->type->ops.finish_request)
 			e->type->ops.finish_request(rq);
 		if (rq->elv.icq) {
 			put_io_context(rq->elv.icq->ioc);
@@ -2254,7 +2256,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	if (q->elevator && !bypass_insert)
+	if ((rq->rq_flags & RQF_ELV) && !bypass_insert)
 		goto insert;
 
 	budget_token = blk_mq_get_dispatch_budget(q);
@@ -2492,7 +2494,7 @@ void blk_mq_submit_bio(struct bio *bio)
 		}
 
 		blk_add_rq_to_plug(plug, rq);
-	} else if (q->elevator) {
+	} else if (rq->rq_flags & RQF_ELV) {
 		/* Insert the request at the IO scheduler queue */
 		blk_mq_sched_insert_request(rq, false, true, true);
 	} else if (plug && !blk_queue_nomerges(q)) {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8ca9728cc7f2..95c3bd3a008e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -55,6 +55,8 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 20))
 /* ->timeout has been called, don't expire again */
 #define RQF_TIMED_OUT		((__force req_flags_t)(1 << 21))
+/* queue has elevator attached */
+#define RQF_ELV			((__force req_flags_t)(1 << 22))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \
-- 
cgit v1.2.3


From 0d20e9fb1262b1f9ac895b287db892bc75b05b84 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Mon, 18 Oct 2021 17:19:31 +0200
Subject: rtc: add BSM parameter

BSM or Backup Switch Mode is a common feature on RTCs, allowing to select
how the RTC will decide when to switch from its primary power supply to the
backup power supply. It is necessary to be able to set it from userspace as
there are uses cases where it has to be done dynamically.

Supported values are:
  RTC_BSM_DISABLED: disabled
  RTC_BSM_DIRECT: switching will happen as soon as Vbackup > Vdd
  RTC_BSM_LEVEL: switching will happen around a threshold, usually with an
  hysteresis
  RTC_BSM_STANDBY: switching will not happen until Vdd > Vbackup, this is
  useful to ensure the RTC doesn't draw any power until the device is first
  powered on.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20211018151933.76865-6-alexandre.belloni@bootlin.com
---
 drivers/rtc/dev.c        | 10 ++++++++--
 include/linux/rtc.h      |  2 ++
 include/uapi/linux/rtc.h |  9 ++++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/dev.c b/drivers/rtc/dev.c
index abee1fc4705e..e104972a28fd 100644
--- a/drivers/rtc/dev.c
+++ b/drivers/rtc/dev.c
@@ -409,7 +409,10 @@ static long rtc_dev_ioctl(struct file *file,
 			break;
 
 		default:
-			err = -EINVAL;
+			if (rtc->ops->param_get)
+				err = rtc->ops->param_get(rtc->dev.parent, &param);
+			else
+				err = -EINVAL;
 		}
 
 		if (!err)
@@ -436,7 +439,10 @@ static long rtc_dev_ioctl(struct file *file,
 			return rtc_set_offset(rtc, param.svalue);
 
 		default:
-			err = -EINVAL;
+			if (rtc->ops->param_set)
+				err = rtc->ops->param_set(rtc->dev.parent, &param);
+			else
+				err = -EINVAL;
 		}
 
 		break;
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 354e0843ab17..47fd1c2d3a57 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -66,6 +66,8 @@ struct rtc_class_ops {
 	int (*alarm_irq_enable)(struct device *, unsigned int enabled);
 	int (*read_offset)(struct device *, long *offset);
 	int (*set_offset)(struct device *, long offset);
+	int (*param_get)(struct device *, struct rtc_param *param);
+	int (*param_set)(struct device *, struct rtc_param *param);
 };
 
 struct rtc_device;
diff --git a/include/uapi/linux/rtc.h b/include/uapi/linux/rtc.h
index 5debe82439c2..03e5b776e597 100644
--- a/include/uapi/linux/rtc.h
+++ b/include/uapi/linux/rtc.h
@@ -132,11 +132,18 @@ struct rtc_param {
 #define RTC_FEATURE_ALARM_RES_2S	3
 #define RTC_FEATURE_UPDATE_INTERRUPT	4
 #define RTC_FEATURE_CORRECTION		5
-#define RTC_FEATURE_CNT			6
+#define RTC_FEATURE_BACKUP_SWITCH_MODE	6
+#define RTC_FEATURE_CNT			7
 
 /* parameter list */
 #define RTC_PARAM_FEATURES		0
 #define RTC_PARAM_CORRECTION		1
+#define RTC_PARAM_BACKUP_SWITCH_MODE	2
+
+#define RTC_BSM_DISABLED	0
+#define RTC_BSM_DIRECT		1
+#define RTC_BSM_LEVEL		2
+#define RTC_BSM_STANDBY		3
 
 #define RTC_MAX_FREQ	8192
 
-- 
cgit v1.2.3


From 88dee3b0efe4b769fb22ce2e963aabae403e6801 Mon Sep 17 00:00:00 2001
From: Cai Huoqing <caihuoqing@baidu.com>
Date: Mon, 18 Oct 2021 20:41:09 +0800
Subject: PCI: Remove unused pci_pool wrappers

The pci_pool users have been converted to dma_pool.  Remove the unused
pci_pool wrappers.

Link: https://lore.kernel.org/r/20211018124110.214-1-caihuoqing@baidu.com
Signed-off-by: Cai Huoqing <caihuoqing@baidu.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..2ae9dd7f975f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1498,19 +1498,8 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 #define PCI_IRQ_ALL_TYPES \
 	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
 
-/* kmem_cache style wrapper around pci_alloc_consistent() */
-
 #include <linux/dmapool.h>
 
-#define	pci_pool dma_pool
-#define pci_pool_create(name, pdev, size, align, allocation) \
-		dma_pool_create(name, &pdev->dev, size, align, allocation)
-#define	pci_pool_destroy(pool) dma_pool_destroy(pool)
-#define	pci_pool_alloc(pool, flags, handle) dma_pool_alloc(pool, flags, handle)
-#define	pci_pool_zalloc(pool, flags, handle) \
-		dma_pool_zalloc(pool, flags, handle)
-#define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
-
 struct msix_entry {
 	u32	vector;	/* Kernel uses to write allocated vector */
 	u16	entry;	/* Driver uses to specify entry, OS writes */
-- 
cgit v1.2.3


From 4797632f4f1d8af4e0670adcb97bf9800dc3beca Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 17 May 2021 20:16:57 -0700
Subject: string.h: Introduce memset_after() for wiping trailing
 members/padding

A common idiom in kernel code is to wipe the contents of a structure
after a given member. This is especially useful in places where there is
trailing padding. These open-coded cases are usually difficult to read
and very sensitive to struct layout changes. Introduce a new helper,
memset_after() that takes the target struct instance, the byte to write,
and the member name after which the zeroing should start.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Francis Laniel <laniel_francis@privacyrequired.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/string.h | 17 +++++++++++++++++
 lib/memcpy_kunit.c     | 13 +++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index ac1c769a5a80..da490c2154a9 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -271,6 +271,23 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
 		memcpy(dest, src, dest_len);
 }
 
+/**
+ * memset_after - Set a value after a struct member to the end of a struct
+ *
+ * @obj: Address of target struct instance
+ * @v: Byte value to repeatedly write
+ * @member: after which struct member to start writing bytes
+ *
+ * This is good for clearing padding following the given member.
+ */
+#define memset_after(obj, v, member)					\
+({									\
+	u8 *__ptr = (u8 *)(obj);					\
+	typeof(v) __val = (v);						\
+	memset(__ptr + offsetofend(typeof(*(obj)), member), __val,	\
+	       sizeof(*(obj)) - offsetofend(typeof(*(obj)), member));	\
+})
+
 /**
  * str_has_prefix - Test if a string has a given prefix
  * @str: The string to test
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 8b2109bb62df..5c5b4f3221d9 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -215,6 +215,13 @@ static void memset_test(struct kunit *test)
 			  0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
 			},
 	};
+	struct some_bytes after = {
+		.data = { 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x72,
+			  0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72,
+			  0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72,
+			  0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72,
+			},
+	};
 	struct some_bytes dest = { };
 	int count, value;
 	u8 *ptr;
@@ -245,6 +252,12 @@ static void memset_test(struct kunit *test)
 	ptr += 8;
 	memset(ptr++, value++, count++);
 	compare("argument side-effects", dest, three);
+
+	/* Verify memset_after() */
+	dest = control;
+	memset_after(&dest, 0x72, three);
+	compare("memset_after()", dest, after);
+
 #undef TEST_OP
 }
 
-- 
cgit v1.2.3


From 6dbefad40815a61aecbcf9b552e87ef57ab8cc7d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 17 May 2021 20:16:57 -0700
Subject: string.h: Introduce memset_startat() for wiping trailing members and
 padding

A common idiom in kernel code is to wipe the contents of a structure
starting from a given member. These open-coded cases are usually difficult
to read and very sensitive to struct layout changes. Like memset_after(),
introduce a new helper, memset_startat() that takes the target struct
instance, the byte to write, and the member name where zeroing should
start.

Note that this doesn't zero padding preceding the target member. For
those cases, memset_after() should be used on the preceding member.

Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Francis Laniel <laniel_francis@privacyrequired.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Daniel Axtens <dja@axtens.net>
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/string.h | 18 ++++++++++++++++++
 lib/memcpy_kunit.c     | 11 +++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index da490c2154a9..5a36608144a9 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -288,6 +288,24 @@ static inline void memcpy_and_pad(void *dest, size_t dest_len,
 	       sizeof(*(obj)) - offsetofend(typeof(*(obj)), member));	\
 })
 
+/**
+ * memset_startat - Set a value starting at a member to the end of a struct
+ *
+ * @obj: Address of target struct instance
+ * @v: Byte value to repeatedly write
+ * @member: struct member to start writing at
+ *
+ * Note that if there is padding between the prior member and the target
+ * member, memset_after() should be used to clear the prior padding.
+ */
+#define memset_startat(obj, v, member)					\
+({									\
+	u8 *__ptr = (u8 *)(obj);					\
+	typeof(v) __val = (v);						\
+	memset(__ptr + offsetof(typeof(*(obj)), member), __val,		\
+	       sizeof(*(obj)) - offsetof(typeof(*(obj)), member));	\
+})
+
 /**
  * str_has_prefix - Test if a string has a given prefix
  * @str: The string to test
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 5c5b4f3221d9..62f8ffcbbaa3 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -222,6 +222,13 @@ static void memset_test(struct kunit *test)
 			  0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72,
 			},
 	};
+	struct some_bytes startat = {
+		.data = { 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30,
+			  0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79,
+			  0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79,
+			  0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79, 0x79,
+			},
+	};
 	struct some_bytes dest = { };
 	int count, value;
 	u8 *ptr;
@@ -258,6 +265,10 @@ static void memset_test(struct kunit *test)
 	memset_after(&dest, 0x72, three);
 	compare("memset_after()", dest, after);
 
+	/* Verify memset_startat() */
+	dest = control;
+	memset_startat(&dest, 0x79, four);
+	compare("memset_startat()", dest, startat);
 #undef TEST_OP
 }
 
-- 
cgit v1.2.3


From 3080ea5553cc909b000d1f1d964a9041962f2c5b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 9 Aug 2021 11:21:23 -0700
Subject: stddef: Introduce DECLARE_FLEX_ARRAY() helper

There are many places where kernel code wants to have several different
typed trailing flexible arrays. This would normally be done with multiple
flexible arrays in a union, but since GCC and Clang don't (on the surface)
allow this, there have been many open-coded workarounds, usually involving
neighboring 0-element arrays at the end of a structure. For example,
instead of something like this:

struct thing {
	...
	union {
		struct type1 foo[];
		struct type2 bar[];
	};
};

code works around the compiler with:

struct thing {
	...
	struct type1 foo[0];
	struct type2 bar[];
};

Another case is when a flexible array is wanted as the single member
within a struct (which itself is usually in a union). For example, this
would be worked around as:

union many {
	...
	struct {
		struct type3 baz[0];
	};
};

These kinds of work-arounds cause problems with size checks against such
zero-element arrays (for example when building with -Warray-bounds and
-Wzero-length-bounds, and with the coming FORTIFY_SOURCE improvements),
so they must all be converted to "real" flexible arrays, avoiding warnings
like this:

fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree':
fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds]
  209 |    anode->btree.u.internal[0].down = cpu_to_le32(a);
      |    ~~~~~~~~~~~~~~~~~~~~~~~^~~
In file included from fs/hpfs/hpfs_fn.h:26,
                 from fs/hpfs/anode.c:10:
fs/hpfs/hpfs.h:412:32: note: while referencing 'internal'
  412 |     struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
      |                                ^~~~~~~~

drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg':
drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds]
  360 |  tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len];
      |                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22,
                 from drivers/net/can/usb/etas_es58x/es58x_fd.c:17:
drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg'
  231 |   u8 raw_msg[0];
      |      ^~~~~~~

However, it _is_ entirely possible to have one or more flexible arrays
in a struct or union: it just has to be in another struct. And since it
cannot be alone in a struct, such a struct must have at least 1 other
named member -- but that member can be zero sized. Wrap all this nonsense
into the new DECLARE_FLEX_ARRAY() in support of having flexible arrays
in unions (or alone in a struct).

As with struct_group(), since this is needed in UAPI headers as well,
implement the core there, with a non-UAPI wrapper.

Additionally update kernel-doc to understand its existence.

https://github.com/KSPP/linux/issues/137

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/stddef.h      | 13 +++++++++++++
 include/uapi/linux/stddef.h | 16 ++++++++++++++++
 scripts/kernel-doc          |  2 ++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 8b103a53b000..ca507bd5f808 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -84,4 +84,17 @@ enum {
 #define struct_group_tagged(TAG, NAME, MEMBERS...) \
 	__struct_group(TAG, NAME, /* no attrs */, MEMBERS)
 
+/**
+ * DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
+ *
+ * @TYPE: The type of each flexible array element
+ * @NAME: The name of the flexible array member
+ *
+ * In order to have a flexible array member in a union or alone in a
+ * struct, it needs to be wrapped in an anonymous struct with at least 1
+ * named member, but that member can be empty.
+ */
+#define DECLARE_FLEX_ARRAY(TYPE, NAME) \
+	__DECLARE_FLEX_ARRAY(TYPE, NAME)
+
 #endif
diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h
index 610204f7c275..3021ea25a284 100644
--- a/include/uapi/linux/stddef.h
+++ b/include/uapi/linux/stddef.h
@@ -25,3 +25,19 @@
 		struct { MEMBERS } ATTRS; \
 		struct TAG { MEMBERS } ATTRS NAME; \
 	}
+
+/**
+ * __DECLARE_FLEX_ARRAY() - Declare a flexible array usable in a union
+ *
+ * @TYPE: The type of each flexible array element
+ * @NAME: The name of the flexible array member
+ *
+ * In order to have a flexible array member in a union or alone in a
+ * struct, it needs to be wrapped in an anonymous struct with at least 1
+ * named member, but that member can be empty.
+ */
+#define __DECLARE_FLEX_ARRAY(TYPE, NAME)	\
+	struct { \
+		struct { } __empty_ ## NAME; \
+		TYPE NAME[]; \
+	}
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 38aa799a776c..5d54b57ff90c 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1263,6 +1263,8 @@ sub dump_struct($$) {
 	$members =~ s/DECLARE_KFIFO\s*\($args,\s*$args,\s*$args\)/$2 \*$1/gos;
 	# replace DECLARE_KFIFO_PTR
 	$members =~ s/DECLARE_KFIFO_PTR\s*\($args,\s*$args\)/$2 \*$1/gos;
+	# replace DECLARE_FLEX_ARRAY
+	$members =~ s/(?:__)?DECLARE_FLEX_ARRAY\s*\($args,\s*$args\)/$1 $2\[\]/gos;
 	my $declaration = $members;
 
 	# Split nested struct/union elements as newer ones
-- 
cgit v1.2.3


From fa7845cfd53f3b1d3f60efa55db89805595bc045 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 9 Aug 2021 11:29:33 -0700
Subject: treewide: Replace open-coded flex arrays in unions

In support of enabling -Warray-bounds and -Wzero-length-bounds and
correctly handling run-time memcpy() bounds checking, replace all
open-coded flexible arrays (i.e. 0-element arrays) in unions with the
DECLARE_FLEX_ARRAY() helper macro.

This fixes warnings such as:

fs/hpfs/anode.c: In function 'hpfs_add_sector_to_btree':
fs/hpfs/anode.c:209:27: warning: array subscript 0 is outside the bounds of an interior zero-length array 'struct bplus_internal_node[0]' [-Wzero-length-bounds]
  209 |    anode->btree.u.internal[0].down = cpu_to_le32(a);
      |    ~~~~~~~~~~~~~~~~~~~~~~~^~~
In file included from fs/hpfs/hpfs_fn.h:26,
                 from fs/hpfs/anode.c:10:
fs/hpfs/hpfs.h:412:32: note: while referencing 'internal'
  412 |     struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
      |                                ^~~~~~~~

drivers/net/can/usb/etas_es58x/es58x_fd.c: In function 'es58x_fd_tx_can_msg':
drivers/net/can/usb/etas_es58x/es58x_fd.c:360:35: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'u8[0]' {aka 'unsigned char[]'} [-Wzero-length-bounds]
  360 |  tx_can_msg = (typeof(tx_can_msg))&es58x_fd_urb_cmd->raw_msg[msg_len];
      |                                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/can/usb/etas_es58x/es58x_core.h:22,
                 from drivers/net/can/usb/etas_es58x/es58x_fd.c:17:
drivers/net/can/usb/etas_es58x/es58x_fd.h:231:6: note: while referencing 'raw_msg'
  231 |   u8 raw_msg[0];
      |      ^~~~~~~

Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Ayush Sawal <ayush.sawal@chelsio.com>
Cc: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Cc: Rohit Maheshwari <rohitm@chelsio.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Stanislaw Gruszka <stf_xl@wp.pl>
Cc: Luca Coelho <luciano.coelho@intel.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Johannes Berg <johannes.berg@intel.com>
Cc: Mordechay Goodstein <mordechay.goodstein@intel.com>
Cc: Lee Jones <lee.jones@linaro.org>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: Marc Kleine-Budde <mkl@pengutronix.de>
Cc: Arunachalam Santhanam <arunachalam.santhanam@in.bosch.com>
Cc: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Cc: linux-crypto@vger.kernel.org
Cc: ath10k@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-can@vger.kernel.org
Cc: bpf@vger.kernel.org
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # drivers/net/can/usb/etas_es58x/*
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/crypto/chelsio/chcr_crypto.h              | 14 +++++++++-----
 drivers/net/can/usb/etas_es58x/es581_4.h          |  2 +-
 drivers/net/can/usb/etas_es58x/es58x_fd.h         |  2 +-
 drivers/net/wireless/ath/ath10k/htt.h             |  7 +++++--
 drivers/net/wireless/intel/iwlegacy/commands.h    |  6 ++++--
 drivers/net/wireless/intel/iwlwifi/dvm/commands.h |  6 ++++--
 drivers/net/wireless/intel/iwlwifi/fw/api/tx.h    | 12 ++++++++----
 drivers/scsi/aic94xx/aic94xx_sds.c                |  6 ++++--
 fs/hpfs/hpfs.h                                    |  8 ++++----
 include/linux/filter.h                            |  6 ++++--
 include/scsi/sas.h                                | 12 ++++++++----
 include/uapi/rdma/rdma_user_rxe.h                 |  4 ++--
 include/uapi/sound/asoc.h                         |  4 ++--
 13 files changed, 56 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index e89f9e0094b4..c7816c83e324 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -222,8 +222,10 @@ struct chcr_authenc_ctx {
 };
 
 struct __aead_ctx {
-	struct chcr_gcm_ctx gcm[0];
-	struct chcr_authenc_ctx authenc[];
+	union {
+		DECLARE_FLEX_ARRAY(struct chcr_gcm_ctx, gcm);
+		DECLARE_FLEX_ARRAY(struct chcr_authenc_ctx, authenc);
+	};
 };
 
 struct chcr_aead_ctx {
@@ -245,9 +247,11 @@ struct hmac_ctx {
 };
 
 struct __crypto_ctx {
-	struct hmac_ctx hmacctx[0];
-	struct ablk_ctx ablkctx[0];
-	struct chcr_aead_ctx aeadctx[];
+	union {
+		DECLARE_FLEX_ARRAY(struct hmac_ctx, hmacctx);
+		DECLARE_FLEX_ARRAY(struct ablk_ctx, ablkctx);
+		DECLARE_FLEX_ARRAY(struct chcr_aead_ctx, aeadctx);
+	};
 };
 
 struct chcr_context {
diff --git a/drivers/net/can/usb/etas_es58x/es581_4.h b/drivers/net/can/usb/etas_es58x/es581_4.h
index 4bc60a6df697..667ecb77168c 100644
--- a/drivers/net/can/usb/etas_es58x/es581_4.h
+++ b/drivers/net/can/usb/etas_es58x/es581_4.h
@@ -192,7 +192,7 @@ struct es581_4_urb_cmd {
 		struct es581_4_rx_cmd_ret rx_cmd_ret;
 		__le64 timestamp;
 		u8 rx_cmd_ret_u8;
-		u8 raw_msg[0];
+		DECLARE_FLEX_ARRAY(u8, raw_msg);
 	} __packed;
 
 	__le16 reserved_for_crc16_do_not_use;
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.h b/drivers/net/can/usb/etas_es58x/es58x_fd.h
index a191891b8777..c4b19a6a33ae 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_fd.h
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.h
@@ -219,7 +219,7 @@ struct es58x_fd_urb_cmd {
 		struct es58x_fd_tx_ack_msg tx_ack_msg;
 		__le64 timestamp;
 		__le32 rx_cmd_ret_le32;
-		u8 raw_msg[0];
+		DECLARE_FLEX_ARRAY(u8, raw_msg);
 	} __packed;
 
 	__le16 reserved_for_crc16_do_not_use;
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index ec689e3ce48a..a6de08d3bf4a 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -1674,8 +1674,11 @@ struct htt_tx_fetch_ind {
 	__le32 token;
 	__le16 num_resp_ids;
 	__le16 num_records;
-	__le32 resp_ids[0]; /* ath10k_htt_get_tx_fetch_ind_resp_ids() */
-	struct htt_tx_fetch_record records[];
+	union {
+		/* ath10k_htt_get_tx_fetch_ind_resp_ids() */
+		DECLARE_FLEX_ARRAY(__le32, resp_ids);
+		DECLARE_FLEX_ARRAY(struct htt_tx_fetch_record, records);
+	};
 } __packed;
 
 static inline void *
diff --git a/drivers/net/wireless/intel/iwlegacy/commands.h b/drivers/net/wireless/intel/iwlegacy/commands.h
index 89c6671b32bc..4a97310f8fee 100644
--- a/drivers/net/wireless/intel/iwlegacy/commands.h
+++ b/drivers/net/wireless/intel/iwlegacy/commands.h
@@ -1408,8 +1408,10 @@ struct il3945_tx_cmd {
 	 * MAC header goes here, followed by 2 bytes padding if MAC header
 	 * length is 26 or 30 bytes, followed by payload data
 	 */
-	u8 payload[0];
-	struct ieee80211_hdr hdr[];
+	union {
+		DECLARE_FLEX_ARRAY(u8, payload);
+		DECLARE_FLEX_ARRAY(struct ieee80211_hdr, hdr);
+	};
 } __packed;
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/commands.h b/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
index 235c7a2e3483..75a4b8e26232 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/commands.h
@@ -1251,8 +1251,10 @@ struct iwl_tx_cmd {
 	 * MAC header goes here, followed by 2 bytes padding if MAC header
 	 * length is 26 or 30 bytes, followed by payload data
 	 */
-	u8 payload[0];
-	struct ieee80211_hdr hdr[];
+	union {
+		DECLARE_FLEX_ARRAY(u8, payload);
+		DECLARE_FLEX_ARRAY(struct ieee80211_hdr, hdr);
+	};
 } __packed;
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
index 24e4a82a55da..5fddfd391941 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h
@@ -239,8 +239,10 @@ struct iwl_tx_cmd {
 	u8 tid_tspec;
 	__le16 pm_frame_timeout;
 	__le16 reserved4;
-	u8 payload[0];
-	struct ieee80211_hdr hdr[0];
+	union {
+		DECLARE_FLEX_ARRAY(u8, payload);
+		DECLARE_FLEX_ARRAY(struct ieee80211_hdr, hdr);
+	};
 } __packed; /* TX_CMD_API_S_VER_6 */
 
 struct iwl_dram_sec_info {
@@ -713,8 +715,10 @@ struct iwl_mvm_compressed_ba_notif {
 	__le32 tx_rate;
 	__le16 tfd_cnt;
 	__le16 ra_tid_cnt;
-	struct iwl_mvm_compressed_ba_ratid ra_tid[0];
-	struct iwl_mvm_compressed_ba_tfd tfd[];
+	union {
+		DECLARE_FLEX_ARRAY(struct iwl_mvm_compressed_ba_ratid, ra_tid);
+		DECLARE_FLEX_ARRAY(struct iwl_mvm_compressed_ba_tfd, tfd);
+	};
 } __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */
 
 /**
diff --git a/drivers/scsi/aic94xx/aic94xx_sds.c b/drivers/scsi/aic94xx/aic94xx_sds.c
index 46815e65f7a4..5def83c88f13 100644
--- a/drivers/scsi/aic94xx/aic94xx_sds.c
+++ b/drivers/scsi/aic94xx/aic94xx_sds.c
@@ -517,8 +517,10 @@ struct asd_ms_conn_map {
 	u8    num_nodes;
 	u8    usage_model_id;
 	u32   _resvd;
-	struct asd_ms_conn_desc conn_desc[0];
-	struct asd_ms_node_desc node_desc[];
+	union {
+		DECLARE_FLEX_ARRAY(struct asd_ms_conn_desc, conn_desc);
+		DECLARE_FLEX_ARRAY(struct asd_ms_node_desc, node_desc);
+	};
 } __attribute__ ((packed));
 
 struct asd_ctrla_phy_entry {
diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h
index d92c4af3e1b4..281dec8f636b 100644
--- a/fs/hpfs/hpfs.h
+++ b/fs/hpfs/hpfs.h
@@ -409,10 +409,10 @@ struct bplus_header
   __le16 first_free;			/* offset from start of header to
 					   first free node in array */
   union {
-    struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving
-					       subtree pointers */
-    struct bplus_leaf_node external[0];	    /* (external) 3-word entries giving
-					       sector runs */
+	/* (internal) 2-word entries giving subtree pointers */
+	DECLARE_FLEX_ARRAY(struct bplus_internal_node, internal);
+	/* (external) 3-word entries giving sector runs */
+	DECLARE_FLEX_ARRAY(struct bplus_leaf_node, external);
   } u;
 };
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..4298c5e428a3 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -586,8 +586,10 @@ struct bpf_prog {
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	/* Instructions for interpreter */
-	struct sock_filter	insns[0];
-	struct bpf_insn		insnsi[];
+	union {
+		DECLARE_FLEX_ARRAY(struct sock_filter, insns);
+		DECLARE_FLEX_ARRAY(struct bpf_insn, insnsi);
+	};
 };
 
 struct sk_filter {
diff --git a/include/scsi/sas.h b/include/scsi/sas.h
index 4726c1bbec65..64154c1fed02 100644
--- a/include/scsi/sas.h
+++ b/include/scsi/sas.h
@@ -323,8 +323,10 @@ struct ssp_response_iu {
 	__be32 sense_data_len;
 	__be32 response_data_len;
 
-	u8     resp_data[0];
-	u8     sense_data[];
+	union {
+		DECLARE_FLEX_ARRAY(u8, resp_data);
+		DECLARE_FLEX_ARRAY(u8, sense_data);
+	};
 } __attribute__ ((packed));
 
 struct ssp_command_iu {
@@ -554,8 +556,10 @@ struct ssp_response_iu {
 	__be32 sense_data_len;
 	__be32 response_data_len;
 
-	u8     resp_data[0];
-	u8     sense_data[];
+	union {
+		DECLARE_FLEX_ARRAY(u8, resp_data);
+		DECLARE_FLEX_ARRAY(u8, sense_data);
+	};
 } __attribute__ ((packed));
 
 struct ssp_command_iu {
diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index e283c2220aba..7f44d54bb0ab 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -141,8 +141,8 @@ struct rxe_dma_info {
 	__u32			sge_offset;
 	__u32			reserved;
 	union {
-		__u8		inline_data[0];
-		struct rxe_sge	sge[0];
+		__DECLARE_FLEX_ARRAY(__u8, inline_data);
+		__DECLARE_FLEX_ARRAY(struct rxe_sge, sge);
 	};
 };
 
diff --git a/include/uapi/sound/asoc.h b/include/uapi/sound/asoc.h
index da61398b1f8f..053949287ce8 100644
--- a/include/uapi/sound/asoc.h
+++ b/include/uapi/sound/asoc.h
@@ -240,8 +240,8 @@ struct snd_soc_tplg_vendor_array {
 struct snd_soc_tplg_private {
 	__le32 size;	/* in bytes of private data */
 	union {
-		char data[0];
-		struct snd_soc_tplg_vendor_array array[0];
+		__DECLARE_FLEX_ARRAY(char, data);
+		__DECLARE_FLEX_ARRAY(struct snd_soc_tplg_vendor_array, array);
 	};
 } __attribute__((packed));
 
-- 
cgit v1.2.3


From 47c662486cccf03e7062139d069b07ab0126ef59 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 13 Aug 2021 12:19:24 -0700
Subject: treewide: Replace 0-element memcpy() destinations with flexible
 arrays

The 0-element arrays that are used as memcpy() destinations are actually
flexible arrays. Adjust their structures accordingly so that memcpy()
can better reason able their destination size (i.e. they need to be seen
as "unknown" length rather than "zero").

In some cases, use of the DECLARE_FLEX_ARRAY() helper is needed when a
flexible array is alone in a struct.

Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Nilesh Javali <njavali@marvell.com>
Cc: Manish Rangankar <mrangankar@marvell.com>
Cc: GR-QLogic-Storage-Upstream@marvell.com
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Phillip Potter <phil@philpotter.co.uk>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Florian Schilhabel <florian.c.schilhabel@googlemail.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Fabio Aiuto <fabioaiuto83@gmail.com>
Cc: Ross Schmidt <ross.schm.dev@gmail.com>
Cc: Marco Cesati <marcocesati@gmail.com>
Cc: ath10k@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-staging@lists.linux.dev
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 drivers/net/wireless/ath/ath10k/bmi.h         | 10 ++++-----
 drivers/scsi/qla4xxx/ql4_def.h                |  4 ++--
 drivers/staging/r8188eu/include/ieee80211.h   |  6 +++---
 drivers/staging/rtl8712/ieee80211.h           |  4 ++--
 drivers/staging/rtl8723bs/include/ieee80211.h |  6 +++---
 include/linux/ieee80211.h                     | 30 +++++++++++++--------------
 include/uapi/linux/dlm_device.h               |  4 ++--
 7 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath10k/bmi.h b/drivers/net/wireless/ath/ath10k/bmi.h
index f6fadcbdd86e..0685c0d2d4ea 100644
--- a/drivers/net/wireless/ath/ath10k/bmi.h
+++ b/drivers/net/wireless/ath/ath10k/bmi.h
@@ -109,7 +109,7 @@ struct bmi_cmd {
 		struct {
 			__le32 addr;
 			__le32 len;
-			u8 payload[0];
+			u8 payload[];
 		} write_mem;
 		struct {
 			__le32 addr;
@@ -138,18 +138,18 @@ struct bmi_cmd {
 		} rompatch_uninstall;
 		struct {
 			__le32 count;
-			__le32 patch_ids[0]; /* length of @count */
+			__le32 patch_ids[]; /* length of @count */
 		} rompatch_activate;
 		struct {
 			__le32 count;
-			__le32 patch_ids[0]; /* length of @count */
+			__le32 patch_ids[]; /* length of @count */
 		} rompatch_deactivate;
 		struct {
 			__le32 addr;
 		} lz_start;
 		struct {
 			__le32 len; /* max BMI_MAX_DATA_SIZE */
-			u8 payload[0]; /* length of @len */
+			u8 payload[]; /* length of @len */
 		} lz_data;
 		struct {
 			u8 name[BMI_NVRAM_SEG_NAME_SZ];
@@ -160,7 +160,7 @@ struct bmi_cmd {
 
 union bmi_resp {
 	struct {
-		u8 payload[0];
+		DECLARE_FLEX_ARRAY(u8, payload);
 	} read_mem;
 	struct {
 		__le32 result;
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h
index 031569c496e5..69a590546bf9 100644
--- a/drivers/scsi/qla4xxx/ql4_def.h
+++ b/drivers/scsi/qla4xxx/ql4_def.h
@@ -366,13 +366,13 @@ struct qla4_work_evt {
 		struct {
 			enum iscsi_host_event_code code;
 			uint32_t data_size;
-			uint8_t data[0];
+			uint8_t data[];
 		} aen;
 		struct {
 			uint32_t status;
 			uint32_t pid;
 			uint32_t data_size;
-			uint8_t data[0];
+			uint8_t data[];
 		} ping;
 	} u;
 };
diff --git a/drivers/staging/r8188eu/include/ieee80211.h b/drivers/staging/r8188eu/include/ieee80211.h
index bc5b030e9c40..9204dd42f319 100644
--- a/drivers/staging/r8188eu/include/ieee80211.h
+++ b/drivers/staging/r8188eu/include/ieee80211.h
@@ -185,7 +185,7 @@ struct ieee_param {
 		struct {
 			u32 len;
 			u8 reserved[32];
-			u8 data[0];
+			u8 data[];
 		} wpa_ie;
 		struct {
 			int command;
@@ -198,7 +198,7 @@ struct ieee_param {
 			u8 idx;
 			u8 seq[8]; /* sequence counter (set: RX, get: TX) */
 			u16 key_len;
-			u8 key[0];
+			u8 key[];
 		} crypt;
 #ifdef CONFIG_88EU_AP_MODE
 		struct {
@@ -210,7 +210,7 @@ struct ieee_param {
 		} add_sta;
 		struct {
 			u8	reserved[2];/* for set max_num_sta */
-			u8	buf[0];
+			u8	buf[];
 		} bcn_ie;
 #endif
 
diff --git a/drivers/staging/rtl8712/ieee80211.h b/drivers/staging/rtl8712/ieee80211.h
index 61eff7c5746b..65ceaca9b51e 100644
--- a/drivers/staging/rtl8712/ieee80211.h
+++ b/drivers/staging/rtl8712/ieee80211.h
@@ -78,7 +78,7 @@ struct ieee_param {
 		struct {
 			u32 len;
 			u8 reserved[32];
-			u8 data[0];
+			u8 data[];
 		} wpa_ie;
 		struct {
 			int command;
@@ -91,7 +91,7 @@ struct ieee_param {
 			u8 idx;
 			u8 seq[8]; /* sequence counter (set: RX, get: TX) */
 			u16 key_len;
-			u8 key[0];
+			u8 key[];
 		} crypt;
 	} u;
 };
diff --git a/drivers/staging/rtl8723bs/include/ieee80211.h b/drivers/staging/rtl8723bs/include/ieee80211.h
index d6236f5b069d..c11d7e2d2347 100644
--- a/drivers/staging/rtl8723bs/include/ieee80211.h
+++ b/drivers/staging/rtl8723bs/include/ieee80211.h
@@ -172,7 +172,7 @@ struct ieee_param {
 		struct {
 			u32 len;
 			u8 reserved[32];
-			u8 data[0];
+			u8 data[];
 		} wpa_ie;
 	        struct{
 			int command;
@@ -185,7 +185,7 @@ struct ieee_param {
 			u8 idx;
 			u8 seq[8]; /* sequence counter (set: RX, get: TX) */
 			u16 key_len;
-			u8 key[0];
+			u8 key[];
 		} crypt;
 		struct {
 			u16 aid;
@@ -196,7 +196,7 @@ struct ieee_param {
 		} add_sta;
 		struct {
 			u8 reserved[2];/* for set max_num_sta */
-			u8 buf[0];
+			u8 buf[];
 		} bcn_ie;
 	} u;
 };
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 694264503119..ada3dd79cd08 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1143,7 +1143,7 @@ struct ieee80211_mgmt {
 			__le16 auth_transaction;
 			__le16 status_code;
 			/* possibly followed by Challenge text */
-			u8 variable[0];
+			u8 variable[];
 		} __packed auth;
 		struct {
 			__le16 reason_code;
@@ -1152,26 +1152,26 @@ struct ieee80211_mgmt {
 			__le16 capab_info;
 			__le16 listen_interval;
 			/* followed by SSID and Supported rates */
-			u8 variable[0];
+			u8 variable[];
 		} __packed assoc_req;
 		struct {
 			__le16 capab_info;
 			__le16 status_code;
 			__le16 aid;
 			/* followed by Supported rates */
-			u8 variable[0];
+			u8 variable[];
 		} __packed assoc_resp, reassoc_resp;
 		struct {
 			__le16 capab_info;
 			__le16 status_code;
-			u8 variable[0];
+			u8 variable[];
 		} __packed s1g_assoc_resp, s1g_reassoc_resp;
 		struct {
 			__le16 capab_info;
 			__le16 listen_interval;
 			u8 current_ap[ETH_ALEN];
 			/* followed by SSID and Supported rates */
-			u8 variable[0];
+			u8 variable[];
 		} __packed reassoc_req;
 		struct {
 			__le16 reason_code;
@@ -1182,11 +1182,11 @@ struct ieee80211_mgmt {
 			__le16 capab_info;
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params, TIM */
-			u8 variable[0];
+			u8 variable[];
 		} __packed beacon;
 		struct {
 			/* only variable items: SSID, Supported rates */
-			u8 variable[0];
+			DECLARE_FLEX_ARRAY(u8, variable);
 		} __packed probe_req;
 		struct {
 			__le64 timestamp;
@@ -1194,7 +1194,7 @@ struct ieee80211_mgmt {
 			__le16 capab_info;
 			/* followed by some of SSID, Supported rates,
 			 * FH Params, DS Params, CF Params, IBSS Params */
-			u8 variable[0];
+			u8 variable[];
 		} __packed probe_resp;
 		struct {
 			u8 category;
@@ -1203,16 +1203,16 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 dialog_token;
 					u8 status_code;
-					u8 variable[0];
+					u8 variable[];
 				} __packed wme_action;
 				struct{
 					u8 action_code;
-					u8 variable[0];
+					u8 variable[];
 				} __packed chan_switch;
 				struct{
 					u8 action_code;
 					struct ieee80211_ext_chansw_ie data;
-					u8 variable[0];
+					u8 variable[];
 				} __packed ext_chan_switch;
 				struct{
 					u8 action_code;
@@ -1228,7 +1228,7 @@ struct ieee80211_mgmt {
 					__le16 timeout;
 					__le16 start_seq_num;
 					/* followed by BA Extension */
-					u8 variable[0];
+					u8 variable[];
 				} __packed addba_req;
 				struct{
 					u8 action_code;
@@ -1244,11 +1244,11 @@ struct ieee80211_mgmt {
 				} __packed delba;
 				struct {
 					u8 action_code;
-					u8 variable[0];
+					u8 variable[];
 				} __packed self_prot;
 				struct{
 					u8 action_code;
-					u8 variable[0];
+					u8 variable[];
 				} __packed mesh_action;
 				struct {
 					u8 action;
@@ -1292,7 +1292,7 @@ struct ieee80211_mgmt {
 					u8 toa[6];
 					__le16 tod_error;
 					__le16 toa_error;
-					u8 variable[0];
+					u8 variable[];
 				} __packed ftm;
 				struct {
 					u8 action_code;
diff --git a/include/uapi/linux/dlm_device.h b/include/uapi/linux/dlm_device.h
index f880d2831160..e83954c69fff 100644
--- a/include/uapi/linux/dlm_device.h
+++ b/include/uapi/linux/dlm_device.h
@@ -45,13 +45,13 @@ struct dlm_lock_params {
 	void __user *bastaddr;
 	struct dlm_lksb __user *lksb;
 	char lvb[DLM_USER_LVB_LEN];
-	char name[0];
+	char name[];
 };
 
 struct dlm_lspace_params {
 	__u32 flags;
 	__u32 minor;
-	char name[0];
+	char name[];
 };
 
 struct dlm_purge_params {
-- 
cgit v1.2.3


From afd7de03c5268f74202c1dd4780a8532a11f4c6b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Oct 2021 08:53:19 -0600
Subject: block: remove some blk_mq_hw_ctx debugfs entries

Just like the blk_mq_ctx counterparts, we've got a bunch of counters
in here that are only for debugfs and are of questionnable value. They
are:

- dispatched, index of how many requests were dispatched in one go

- poll_{considered,invoked,success}, which track poll sucess rates. We're
  confident in the iopoll implementation at this point, don't bother
  tracking these.

As a bonus, this shrinks each hardware queue from 576 bytes to 512 bytes,
dropping a whole cacheline.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c | 67 --------------------------------------------------
 block/blk-mq.c         | 16 ------------
 include/linux/blk-mq.h | 10 --------
 3 files changed, 93 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 928a16af9175..68ca5d21cda7 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -529,70 +529,6 @@ out:
 	return res;
 }
 
-static int hctx_io_poll_show(void *data, struct seq_file *m)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	seq_printf(m, "considered=%lu\n", hctx->poll_considered);
-	seq_printf(m, "invoked=%lu\n", hctx->poll_invoked);
-	seq_printf(m, "success=%lu\n", hctx->poll_success);
-	return 0;
-}
-
-static ssize_t hctx_io_poll_write(void *data, const char __user *buf,
-				  size_t count, loff_t *ppos)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	hctx->poll_considered = hctx->poll_invoked = hctx->poll_success = 0;
-	return count;
-}
-
-static int hctx_dispatched_show(void *data, struct seq_file *m)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-	int i;
-
-	seq_printf(m, "%8u\t%lu\n", 0U, hctx->dispatched[0]);
-
-	for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER - 1; i++) {
-		unsigned int d = 1U << (i - 1);
-
-		seq_printf(m, "%8u\t%lu\n", d, hctx->dispatched[i]);
-	}
-
-	seq_printf(m, "%8u+\t%lu\n", 1U << (i - 1), hctx->dispatched[i]);
-	return 0;
-}
-
-static ssize_t hctx_dispatched_write(void *data, const char __user *buf,
-				     size_t count, loff_t *ppos)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-	int i;
-
-	for (i = 0; i < BLK_MQ_MAX_DISPATCH_ORDER; i++)
-		hctx->dispatched[i] = 0;
-	return count;
-}
-
-static int hctx_queued_show(void *data, struct seq_file *m)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	seq_printf(m, "%lu\n", hctx->queued);
-	return 0;
-}
-
-static ssize_t hctx_queued_write(void *data, const char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	struct blk_mq_hw_ctx *hctx = data;
-
-	hctx->queued = 0;
-	return count;
-}
-
 static int hctx_run_show(void *data, struct seq_file *m)
 {
 	struct blk_mq_hw_ctx *hctx = data;
@@ -738,9 +674,6 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
 	{"tags_bitmap", 0400, hctx_tags_bitmap_show},
 	{"sched_tags", 0400, hctx_sched_tags_show},
 	{"sched_tags_bitmap", 0400, hctx_sched_tags_bitmap_show},
-	{"io_poll", 0600, hctx_io_poll_show, hctx_io_poll_write},
-	{"dispatched", 0600, hctx_dispatched_show, hctx_dispatched_write},
-	{"queued", 0600, hctx_queued_show, hctx_queued_write},
 	{"run", 0600, hctx_run_show, hctx_run_write},
 	{"active", 0400, hctx_active_show},
 	{"dispatch_busy", 0400, hctx_dispatch_busy_show},
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 990d214a7658..bd241fd7ee49 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -382,7 +382,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		}
 	}
 
-	data->hctx->queued++;
 	return rq;
 }
 
@@ -1301,14 +1300,6 @@ struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
 	return data.rq;
 }
 
-static inline unsigned int queued_to_index(unsigned int queued)
-{
-	if (!queued)
-		return 0;
-
-	return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
-}
-
 static bool __blk_mq_get_driver_tag(struct request *rq)
 {
 	struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
@@ -1632,8 +1623,6 @@ out:
 	if (!list_empty(&zone_list))
 		list_splice_tail_init(&zone_list, list);
 
-	hctx->dispatched[queued_to_index(queued)]++;
-
 	/* If we didn't flush the entire list, we could have told the driver
 	 * there was more coming, but that turned out to be a lie.
 	 */
@@ -4200,14 +4189,9 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
 	long state = get_current_state();
 	int ret;
 
-	hctx->poll_considered++;
-
 	do {
-		hctx->poll_invoked++;
-
 		ret = q->mq_ops->poll(hctx);
 		if (ret > 0) {
-			hctx->poll_success++;
 			__set_current_state(TASK_RUNNING);
 			return ret;
 		}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 95c3bd3a008e..9fb8618fb957 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -341,9 +341,6 @@ struct blk_mq_hw_ctx {
 	unsigned long		queued;
 	/** @run: Number of dispatched requests. */
 	unsigned long		run;
-#define BLK_MQ_MAX_DISPATCH_ORDER	7
-	/** @dispatched: Number of dispatch requests by queue. */
-	unsigned long		dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
 
 	/** @numa_node: NUMA node the storage adapter has been connected to. */
 	unsigned int		numa_node;
@@ -363,13 +360,6 @@ struct blk_mq_hw_ctx {
 	/** @kobj: Kernel object for sysfs. */
 	struct kobject		kobj;
 
-	/** @poll_considered: Count times blk_mq_poll() was called. */
-	unsigned long		poll_considered;
-	/** @poll_invoked: Count how many requests blk_mq_poll() polled. */
-	unsigned long		poll_invoked;
-	/** @poll_success: Count how many polled requests were completed. */
-	unsigned long		poll_success;
-
 #ifdef CONFIG_BLK_DEBUG_FS
 	/**
 	 * @debugfs_dir: debugfs directory for this hardware queue. Named
-- 
cgit v1.2.3


From 013a7f95438144f4ab39a1017a0bff2765d2551a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 13 Oct 2021 07:58:52 -0600
Subject: block: provide helpers for rq_list manipulation

Instead of open-coding the list additions, traversal, and removal,
provide a basic set of helpers.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 19 +++++--------------
 include/linux/blkdev.h | 29 +++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bd241fd7ee49..74505b545dd3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -404,17 +404,11 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
 		tag = tag_offset + i;
 		tags &= ~(1UL << i);
 		rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns);
-		rq->rq_next = *data->cached_rq;
-		*data->cached_rq = rq;
+		rq_list_add(data->cached_rq, rq);
 	}
 	data->nr_tags -= nr;
 
-	if (!data->cached_rq)
-		return NULL;
-
-	rq = *data->cached_rq;
-	*data->cached_rq = rq->rq_next;
-	return rq;
+	return rq_list_pop(data->cached_rq);
 }
 
 static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
@@ -622,11 +616,9 @@ EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
 void blk_mq_free_plug_rqs(struct blk_plug *plug)
 {
-	while (plug->cached_rq) {
-		struct request *rq;
+	struct request *rq;
 
-		rq = plug->cached_rq;
-		plug->cached_rq = rq->rq_next;
+	while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
 		percpu_ref_get(&rq->q->q_usage_counter);
 		blk_mq_free_request(rq);
 	}
@@ -2418,8 +2410,7 @@ void blk_mq_submit_bio(struct bio *bio)
 
 	plug = blk_mq_plug(q, bio);
 	if (plug && plug->cached_rq) {
-		rq = plug->cached_rq;
-		plug->cached_rq = rq->rq_next;
+		rq = rq_list_pop(&plug->cached_rq);
 		INIT_LIST_HEAD(&rq->queuelist);
 	} else {
 		struct blk_mq_alloc_data data = {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d5b21fc8f49e..b0a322172965 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1298,4 +1298,33 @@ int fsync_bdev(struct block_device *bdev);
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 
+#define rq_list_add(listptr, rq)	do {		\
+	(rq)->rq_next = *(listptr);			\
+	*(listptr) = rq;				\
+} while (0)
+
+#define rq_list_pop(listptr)				\
+({							\
+	struct request *__req = NULL;			\
+	if ((listptr) && *(listptr))	{		\
+		__req = *(listptr);			\
+		*(listptr) = __req->rq_next;		\
+	}						\
+	__req;						\
+})
+
+#define rq_list_peek(listptr)				\
+({							\
+	struct request *__req = NULL;			\
+	if ((listptr) && *(listptr))			\
+		__req = *(listptr);			\
+	__req;						\
+})
+
+#define rq_list_for_each(listptr, pos)			\
+	for (pos = rq_list_peek((listptr)); pos; pos = rq_list_next(pos)) \
+
+#define rq_list_next(rq)	(rq)->rq_next
+#define rq_list_empty(list)	((list) == (struct request *) NULL)
+
 #endif /* _LINUX_BLKDEV_H */
-- 
cgit v1.2.3


From 5a72e899ceb465d731c413d57c6c12cdbf88303c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 12 Oct 2021 09:24:29 -0600
Subject: block: add a struct io_comp_batch argument to fops->iopoll()

struct io_comp_batch contains a list head and a completion handler, which
will allow completions to more effciently completed batches of IO.

For now, no functional changes in this patch, we just define the
io_comp_batch structure and add the argument to the file_operations iopoll
handler.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              |  9 +++++----
 block/blk-exec.c              |  2 +-
 block/blk-mq.c                |  9 +++++----
 block/blk-mq.h                |  3 ++-
 block/fops.c                  |  4 ++--
 drivers/block/rnbd/rnbd-clt.c |  2 +-
 drivers/nvme/host/pci.c       |  4 ++--
 drivers/nvme/host/rdma.c      |  2 +-
 drivers/nvme/host/tcp.c       |  2 +-
 drivers/scsi/scsi_lib.c       |  2 +-
 fs/io_uring.c                 |  2 +-
 fs/iomap/direct-io.c          |  2 +-
 include/linux/blk-mq.h        |  2 +-
 include/linux/blkdev.h        | 13 +++++++++++--
 include/linux/fs.h            |  4 +++-
 mm/page_io.c                  |  2 +-
 16 files changed, 39 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 20b6cc06461a..d0c2e11411d0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1078,7 +1078,7 @@ EXPORT_SYMBOL(submit_bio);
  * Note: the caller must either be the context that submitted @bio, or
  * be in a RCU critical section to prevent freeing of @bio.
  */
-int bio_poll(struct bio *bio, unsigned int flags)
+int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 {
 	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
 	blk_qc_t cookie = READ_ONCE(bio->bi_cookie);
@@ -1096,7 +1096,7 @@ int bio_poll(struct bio *bio, unsigned int flags)
 	if (WARN_ON_ONCE(!queue_is_mq(q)))
 		ret = 0;	/* not yet implemented, should not happen */
 	else
-		ret = blk_mq_poll(q, cookie, flags);
+		ret = blk_mq_poll(q, cookie, iob, flags);
 	blk_queue_exit(q);
 	return ret;
 }
@@ -1106,7 +1106,8 @@ EXPORT_SYMBOL_GPL(bio_poll);
  * Helper to implement file_operations.iopoll.  Requires the bio to be stored
  * in iocb->private, and cleared before freeing the bio.
  */
-int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags)
+int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
+		    unsigned int flags)
 {
 	struct bio *bio;
 	int ret = 0;
@@ -1134,7 +1135,7 @@ int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags)
 	rcu_read_lock();
 	bio = READ_ONCE(kiocb->private);
 	if (bio && bio->bi_bdev)
-		ret = bio_poll(bio, flags);
+		ret = bio_poll(bio, iob, flags);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/block/blk-exec.c b/block/blk-exec.c
index 55f0cd34b37b..1b8b47f6e79b 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -77,7 +77,7 @@ static bool blk_rq_is_poll(struct request *rq)
 static void blk_rq_poll_completion(struct request *rq, struct completion *wait)
 {
 	do {
-		bio_poll(rq->bio, 0);
+		bio_poll(rq->bio, NULL, 0);
 		cond_resched();
 	} while (!completion_done(wait));
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 74505b545dd3..79c25b64e8b0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4174,14 +4174,14 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, blk_qc_t qc)
 }
 
 static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
-		unsigned int flags)
+			       struct io_comp_batch *iob, unsigned int flags)
 {
 	struct blk_mq_hw_ctx *hctx = blk_qc_to_hctx(q, cookie);
 	long state = get_current_state();
 	int ret;
 
 	do {
-		ret = q->mq_ops->poll(hctx);
+		ret = q->mq_ops->poll(hctx, iob);
 		if (ret > 0) {
 			__set_current_state(TASK_RUNNING);
 			return ret;
@@ -4201,14 +4201,15 @@ static int blk_mq_poll_classic(struct request_queue *q, blk_qc_t cookie,
 	return 0;
 }
 
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags)
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
+		unsigned int flags)
 {
 	if (!(flags & BLK_POLL_NOSLEEP) &&
 	    q->poll_nsec != BLK_MQ_POLL_CLASSIC) {
 		if (blk_mq_poll_hybrid(q, cookie))
 			return 1;
 	}
-	return blk_mq_poll_classic(q, cookie, flags);
+	return blk_mq_poll_classic(q, cookie, iob, flags);
 }
 
 unsigned int blk_mq_rq_cpu(struct request *rq)
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 1b91a3fdaa01..ebf67f4d4f2e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -31,7 +31,8 @@ struct blk_mq_ctx {
 } ____cacheline_aligned_in_smp;
 
 void blk_mq_submit_bio(struct bio *bio);
-int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, unsigned int flags);
+int blk_mq_poll(struct request_queue *q, blk_qc_t cookie, struct io_comp_batch *iob,
+		unsigned int flags);
 void blk_mq_exit_queue(struct request_queue *q);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
diff --git a/block/fops.c b/block/fops.c
index 1d4f862950bb..2c43e493e37c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -105,7 +105,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!READ_ONCE(bio.bi_private))
 			break;
-		if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, 0))
+		if (!(iocb->ki_flags & IOCB_HIPRI) || !bio_poll(&bio, NULL, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
@@ -291,7 +291,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 		if (!READ_ONCE(dio->waiter))
 			break;
 
-		if (!do_poll || !bio_poll(bio, 0))
+		if (!do_poll || !bio_poll(bio, NULL, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index bd4a41afbbfc..0ec0191d4196 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1176,7 +1176,7 @@ static blk_status_t rnbd_queue_rq(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx)
+static int rnbd_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct rnbd_queue *q = hctx->driver_data;
 	struct rnbd_clt_dev *dev = q->dev;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 896328271471..bb0482dfab3c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1092,7 +1092,7 @@ static void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
 	enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
 }
 
-static int nvme_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 	bool found;
@@ -1274,7 +1274,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	 * Did we miss an interrupt?
 	 */
 	if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
-		nvme_poll(req->mq_hctx);
+		nvme_poll(req->mq_hctx, NULL);
 	else
 		nvme_poll_irqdisable(nvmeq);
 
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 40317e1b9183..1624da3702d4 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2106,7 +2106,7 @@ unmap_qe:
 	return ret;
 }
 
-static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct nvme_rdma_queue *queue = hctx->driver_data;
 
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3c1c29dd3020..9ce3458ee1dd 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2429,7 +2429,7 @@ static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
 	return 0;
 }
 
-static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
+static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct nvme_tcp_queue *queue = hctx->driver_data;
 	struct sock *sk = queue->sock->sk;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 33fd9a01330c..30f7d0b4eb73 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1784,7 +1784,7 @@ static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq,
 }
 
 
-static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx)
+static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
 {
 	struct Scsi_Host *shost = hctx->driver_data;
 
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c5066146b8de..cd77a137f2d8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2483,7 +2483,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 		if (!list_empty(&done))
 			break;
 
-		ret = kiocb->ki_filp->f_op->iopoll(kiocb, poll_flags);
+		ret = kiocb->ki_filp->f_op->iopoll(kiocb, NULL, poll_flags);
 		if (unlikely(ret < 0))
 			return ret;
 		else if (ret)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 8efab177011d..83ecfba53abe 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -630,7 +630,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 				break;
 
 			if (!dio->submit.poll_bio ||
-			    !bio_poll(dio->submit.poll_bio, 0))
+			    !bio_poll(dio->submit.poll_bio, NULL, 0))
 				blk_io_schedule();
 		}
 		__set_current_state(TASK_RUNNING);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 9fb8618fb957..4c79439af2f2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -532,7 +532,7 @@ struct blk_mq_ops {
 	/**
 	 * @poll: Called to poll for completion of a specific tag.
 	 */
-	int (*poll)(struct blk_mq_hw_ctx *);
+	int (*poll)(struct blk_mq_hw_ctx *, struct io_comp_batch *);
 
 	/**
 	 * @complete: Mark the request as complete.
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b0a322172965..fd9771a1da09 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -569,8 +569,9 @@ blk_status_t errno_to_blk_status(int errno);
 #define BLK_POLL_ONESHOT		(1 << 0)
 /* do not sleep to wait for the expected completion time */
 #define BLK_POLL_NOSLEEP		(1 << 1)
-int bio_poll(struct bio *bio, unsigned int flags);
-int iocb_bio_iopoll(struct kiocb *kiocb, unsigned int flags);
+int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags);
+int iocb_bio_iopoll(struct kiocb *kiocb, struct io_comp_batch *iob,
+			unsigned int flags);
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
@@ -1298,6 +1299,14 @@ int fsync_bdev(struct block_device *bdev);
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 
+struct io_comp_batch {
+	struct request *req_list;
+	bool need_ts;
+	void (*complete)(struct io_comp_batch *);
+};
+
+#define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
+
 #define rq_list_add(listptr, rq)	do {		\
 	(rq)->rq_next = *(listptr);			\
 	*(listptr) = rq;				\
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f595f4097cb7..31029a91f440 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -48,6 +48,7 @@
 struct backing_dev_info;
 struct bdi_writeback;
 struct bio;
+struct io_comp_batch;
 struct export_operations;
 struct fiemap_extent_info;
 struct hd_geometry;
@@ -2071,7 +2072,8 @@ struct file_operations {
 	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
 	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
 	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
-	int (*iopoll)(struct kiocb *kiocb, unsigned int flags);
+	int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
+			unsigned int flags);
 	int (*iterate) (struct file *, struct dir_context *);
 	int (*iterate_shared) (struct file *, struct dir_context *);
 	__poll_t (*poll) (struct file *, struct poll_table_struct *);
diff --git a/mm/page_io.c b/mm/page_io.c
index a68faab5b310..6010fb07f231 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -424,7 +424,7 @@ int swap_readpage(struct page *page, bool synchronous)
 		if (!READ_ONCE(bio->bi_private))
 			break;
 
-		if (!bio_poll(bio, 0))
+		if (!bio_poll(bio, NULL, 0))
 			blk_io_schedule();
 	}
 	__set_current_state(TASK_RUNNING);
-- 
cgit v1.2.3


From 1aec5e4a2962f7e0b3fb3e7308dd726be2472c26 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 8 Oct 2021 05:44:23 -0600
Subject: sbitmap: add helper to clear a batch of tags

sbitmap currently only supports clearing tags one-by-one, add a helper
that allows the caller to pass in an array of tags to clear.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 11 +++++++++++
 lib/sbitmap.c           | 44 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index e30b56023ead..4a6ff274335a 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -528,6 +528,17 @@ void sbitmap_queue_min_shallow_depth(struct sbitmap_queue *sbq,
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu);
 
+/**
+ * sbitmap_queue_clear_batch() - Free a batch of allocated bits
+ * &struct sbitmap_queue.
+ * @sbq: Bitmap to free from.
+ * @offset: offset for each tag in array
+ * @tags: array of tags
+ * @nr_tags: number of tags in array
+ */
+void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
+				int *tags, int nr_tags);
+
 static inline int sbq_index_inc(int index)
 {
 	return (index + 1) & (SBQ_WAIT_QUEUES - 1);
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index f398e0ae548e..c6e2f1f2c4d2 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -628,6 +628,46 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq)
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
 
+static inline void sbitmap_update_cpu_hint(struct sbitmap *sb, int cpu, int tag)
+{
+	if (likely(!sb->round_robin && tag < sb->depth))
+		*per_cpu_ptr(sb->alloc_hint, cpu) = tag;
+}
+
+void sbitmap_queue_clear_batch(struct sbitmap_queue *sbq, int offset,
+				int *tags, int nr_tags)
+{
+	struct sbitmap *sb = &sbq->sb;
+	unsigned long *addr = NULL;
+	unsigned long mask = 0;
+	int i;
+
+	smp_mb__before_atomic();
+	for (i = 0; i < nr_tags; i++) {
+		const int tag = tags[i] - offset;
+		unsigned long *this_addr;
+
+		/* since we're clearing a batch, skip the deferred map */
+		this_addr = &sb->map[SB_NR_TO_INDEX(sb, tag)].word;
+		if (!addr) {
+			addr = this_addr;
+		} else if (addr != this_addr) {
+			atomic_long_andnot(mask, (atomic_long_t *) addr);
+			mask = 0;
+			addr = this_addr;
+		}
+		mask |= (1UL << SB_NR_TO_BIT(sb, tag));
+	}
+
+	if (mask)
+		atomic_long_andnot(mask, (atomic_long_t *) addr);
+
+	smp_mb__after_atomic();
+	sbitmap_queue_wake_up(sbq);
+	sbitmap_update_cpu_hint(&sbq->sb, raw_smp_processor_id(),
+					tags[nr_tags - 1] - offset);
+}
+
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
@@ -652,9 +692,7 @@ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 	 */
 	smp_mb__after_atomic();
 	sbitmap_queue_wake_up(sbq);
-
-	if (likely(!sbq->sb.round_robin && nr < sbq->sb.depth))
-		*per_cpu_ptr(sbq->sb.alloc_hint, cpu) = nr;
+	sbitmap_update_cpu_hint(&sbq->sb, cpu, nr);
 }
 EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
 
-- 
cgit v1.2.3


From f794f3351f2672d782b8df0fa59f3cef38cffa59 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 8 Oct 2021 05:50:46 -0600
Subject: block: add support for blk_mq_end_request_batch()

Instead of calling blk_mq_end_request() on a single request, add a helper
that takes the new struct io_comp_batch and completes any request stored
in there.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.c     |  6 ++++
 block/blk-mq-tag.h     |  1 +
 block/blk-mq.c         | 82 ++++++++++++++++++++++++++++++++++++++------------
 include/linux/blk-mq.h | 29 ++++++++++++++++++
 4 files changed, 99 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c43b97201161..b94c3e8ef392 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -207,6 +207,12 @@ void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 	}
 }
 
+void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)
+{
+	sbitmap_queue_clear_batch(&tags->bitmap_tags, tags->nr_reserved_tags,
+					tag_array, nr_tags);
+}
+
 struct bt_iter_data {
 	struct blk_mq_hw_ctx *hctx;
 	busy_iter_fn *fn;
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 71c2f7d8e9b7..78ae2fb8e2a4 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -42,6 +42,7 @@ unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
 			      unsigned int *offset);
 extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
 			   unsigned int tag);
+void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags);
 extern int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
 					struct blk_mq_tags **tags,
 					unsigned int depth, bool can_grow);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 79c25b64e8b0..9248edd8a7d3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -300,15 +300,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
 			blk_mq_tag_wakeup_all(hctx->tags, true);
 }
 
-/*
- * Only need start/end time stamping if we have iostat or
- * blk stats enabled, or using an IO scheduler.
- */
-static inline bool blk_mq_need_time_stamp(struct request *rq)
-{
-	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
-}
-
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 		unsigned int tag, u64 alloc_time_ns)
 {
@@ -768,19 +759,21 @@ bool blk_update_request(struct request *req, blk_status_t error,
 }
 EXPORT_SYMBOL_GPL(blk_update_request);
 
-inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
+static inline void __blk_mq_end_request_acct(struct request *rq, u64 now)
 {
-	if (blk_mq_need_time_stamp(rq)) {
-		u64 now = ktime_get_ns();
+	if (rq->rq_flags & RQF_STATS) {
+		blk_mq_poll_stats_start(rq->q);
+		blk_stat_add(rq, now);
+	}
 
-		if (rq->rq_flags & RQF_STATS) {
-			blk_mq_poll_stats_start(rq->q);
-			blk_stat_add(rq, now);
-		}
+	blk_mq_sched_completed_request(rq, now);
+	blk_account_io_done(rq, now);
+}
 
-		blk_mq_sched_completed_request(rq, now);
-		blk_account_io_done(rq, now);
-	}
+inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
+{
+	if (blk_mq_need_time_stamp(rq))
+		__blk_mq_end_request_acct(rq, ktime_get_ns());
 
 	if (rq->end_io) {
 		rq_qos_done(rq->q, rq);
@@ -799,6 +792,57 @@ void blk_mq_end_request(struct request *rq, blk_status_t error)
 }
 EXPORT_SYMBOL(blk_mq_end_request);
 
+#define TAG_COMP_BATCH		32
+
+static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
+					  int *tag_array, int nr_tags)
+{
+	struct request_queue *q = hctx->queue;
+
+	blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
+	percpu_ref_put_many(&q->q_usage_counter, nr_tags);
+}
+
+void blk_mq_end_request_batch(struct io_comp_batch *iob)
+{
+	int tags[TAG_COMP_BATCH], nr_tags = 0;
+	struct blk_mq_hw_ctx *last_hctx = NULL;
+	struct request *rq;
+	u64 now = 0;
+
+	if (iob->need_ts)
+		now = ktime_get_ns();
+
+	while ((rq = rq_list_pop(&iob->req_list)) != NULL) {
+		prefetch(rq->bio);
+		prefetch(rq->rq_next);
+
+		blk_update_request(rq, BLK_STS_OK, blk_rq_bytes(rq));
+		if (iob->need_ts)
+			__blk_mq_end_request_acct(rq, now);
+
+		WRITE_ONCE(rq->state, MQ_RQ_IDLE);
+		if (!refcount_dec_and_test(&rq->ref))
+			continue;
+
+		blk_crypto_free_request(rq);
+		blk_pm_mark_last_busy(rq);
+		rq_qos_done(rq->q, rq);
+
+		if (nr_tags == TAG_COMP_BATCH ||
+		    (last_hctx && last_hctx != rq->mq_hctx)) {
+			blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+			nr_tags = 0;
+		}
+		tags[nr_tags++] = rq->tag;
+		last_hctx = rq->mq_hctx;
+	}
+
+	if (nr_tags)
+		blk_mq_flush_tag_batch(last_hctx, tags, nr_tags);
+}
+EXPORT_SYMBOL_GPL(blk_mq_end_request_batch);
+
 static void blk_complete_reqs(struct llist_head *list)
 {
 	struct llist_node *entry = llist_reverse_order(llist_del_all(list));
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 4c79439af2f2..656fe34bdb6c 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -728,6 +728,35 @@ static inline void blk_mq_set_request_complete(struct request *rq)
 void blk_mq_start_request(struct request *rq);
 void blk_mq_end_request(struct request *rq, blk_status_t error);
 void __blk_mq_end_request(struct request *rq, blk_status_t error);
+void blk_mq_end_request_batch(struct io_comp_batch *ib);
+
+/*
+ * Only need start/end time stamping if we have iostat or
+ * blk stats enabled, or using an IO scheduler.
+ */
+static inline bool blk_mq_need_time_stamp(struct request *rq)
+{
+	return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_ELV));
+}
+
+/*
+ * Batched completions only work when there is no I/O error and no special
+ * ->end_io handler.
+ */
+static inline bool blk_mq_add_to_batch(struct request *req,
+				       struct io_comp_batch *iob, int ioerror,
+				       void (*complete)(struct io_comp_batch *))
+{
+	if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror)
+		return false;
+	if (!iob->complete)
+		iob->complete = complete;
+	else if (iob->complete != complete)
+		return false;
+	iob->need_ts |= blk_mq_need_time_stamp(req);
+	rq_list_add(&iob->req_list, req);
+	return true;
+}
 
 void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
-- 
cgit v1.2.3


From 99457db8b40c66941072a383a5ab4e36bc53fd3d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Oct 2021 12:11:01 +0200
Subject: block: move the SECTOR_SIZE related definitions to blk_types.h

Ensure these are always available for inlines in the various block layer
headers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20211018101130.1838532-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 17 +++++++++++++++++
 include/linux/blkdev.h    | 17 -----------------
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1e370929c89e..472e55e0e94f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -20,6 +20,23 @@ struct cgroup_subsys_state;
 typedef void (bio_end_io_t) (struct bio *);
 struct bio_crypt_ctx;
 
+/*
+ * The basic unit of block I/O is a sector. It is used in a number of contexts
+ * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
+ * bytes. Variables of type sector_t represent an offset or size that is a
+ * multiple of 512 bytes. Hence these two constants.
+ */
+#ifndef SECTOR_SHIFT
+#define SECTOR_SHIFT 9
+#endif
+#ifndef SECTOR_SIZE
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#endif
+
+#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
+#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
+#define SECTOR_MASK		(PAGE_SECTORS - 1)
+
 struct block_device {
 	sector_t		bd_start_sect;
 	struct disk_stats __percpu *bd_stats;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fd9771a1da09..abe721591e80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -578,23 +578,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 	return bdev->bd_queue;	/* this is never NULL */
 }
 
-/*
- * The basic unit of block I/O is a sector. It is used in a number of contexts
- * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
- * bytes. Variables of type sector_t represent an offset or size that is a
- * multiple of 512 bytes. Hence these two constants.
- */
-#ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
-#endif
-#ifndef SECTOR_SIZE
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
-#endif
-
-#define PAGE_SECTORS_SHIFT	(PAGE_SHIFT - SECTOR_SHIFT)
-#define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
-#define SECTOR_MASK		(PAGE_SECTORS - 1)
-
 #ifdef CONFIG_BLK_DEV_ZONED
 
 /* Helper to convert BLK_ZONE_ZONE_XXX to its string format XXX */
-- 
cgit v1.2.3


From 6436bd90f76e75d2c5786a50203b05a9b7f7100d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Oct 2021 12:11:02 +0200
Subject: block: add a bdev_nr_bytes helper

Add a helper to query the size of a block device in bytes.  This
will be used to remove open coded access to ->bd_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20211018101130.1838532-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index cd4038fd5743..01d27f3a970e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -236,9 +236,14 @@ static inline sector_t get_start_sect(struct block_device *bdev)
 	return bdev->bd_start_sect;
 }
 
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+{
+	return i_size_read(bdev->bd_inode);
+}
+
 static inline sector_t bdev_nr_sectors(struct block_device *bdev)
 {
-	return i_size_read(bdev->bd_inode) >> 9;
+	return bdev_nr_bytes(bdev) >> SECTOR_SHIFT;
 }
 
 static inline sector_t get_capacity(struct gendisk *disk)
-- 
cgit v1.2.3


From bcc6e2cfaa48a4ad2e17719194f6d97a8e03f6c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 Oct 2021 12:11:25 +0200
Subject: block: add a sb_bdev_nr_blocks helper

Add a helper to return the size of sb->s_bdev in sb->s_blocksize_bits
based unites.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20211018101130.1838532-26-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 01d27f3a970e..7b0326661a1e 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -251,6 +251,12 @@ static inline sector_t get_capacity(struct gendisk *disk)
 	return bdev_nr_sectors(disk->part0);
 }
 
+static inline u64 sb_bdev_nr_blocks(struct super_block *sb)
+{
+	return bdev_nr_sectors(sb->s_bdev) >>
+		(sb->s_blocksize_bits - SECTOR_SHIFT);
+}
+
 int bdev_disk_changed(struct gendisk *disk, bool invalidate);
 void blk_drop_partitions(struct gendisk *disk);
 
-- 
cgit v1.2.3


From f09313c57a17683cbcb305989daf1d94b49fd32c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Oct 2021 11:39:45 -0600
Subject: block: cache inode size in bdev

Reading the inode size brings in a new cacheline for IO submit, and
it's in the hot path being checked for every single IO. When doing
millions of IOs per core per second, this is noticeable overhead.

Cache the nr_sectors in the bdev itself.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c             | 1 +
 block/partitions/core.c   | 1 +
 include/linux/blk_types.h | 1 +
 include/linux/genhd.h     | 8 ++++----
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 759bc06810f8..53495e3391e3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors)
 
 	spin_lock(&bdev->bd_size_lock);
 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+	bdev->bd_nr_sectors = sectors;
 	spin_unlock(&bdev->bd_size_lock);
 }
 EXPORT_SYMBOL(set_capacity);
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 9dbddc355b40..66ef9bc6d6a1 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
 	spin_lock(&bdev->bd_size_lock);
 	i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
+	bdev->bd_nr_sectors = sectors;
 	spin_unlock(&bdev->bd_size_lock);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 472e55e0e94f..fe065c394fff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,6 +39,7 @@ struct bio_crypt_ctx;
 
 struct block_device {
 	sector_t		bd_start_sect;
+	sector_t		bd_nr_sectors;
 	struct disk_stats __percpu *bd_stats;
 	unsigned long		bd_stamp;
 	bool			bd_read_only;	/* read-only policy */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7b0326661a1e..900325aa5d31 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -236,14 +236,14 @@ static inline sector_t get_start_sect(struct block_device *bdev)
 	return bdev->bd_start_sect;
 }
 
-static inline loff_t bdev_nr_bytes(struct block_device *bdev)
+static inline sector_t bdev_nr_sectors(struct block_device *bdev)
 {
-	return i_size_read(bdev->bd_inode);
+	return bdev->bd_nr_sectors;
 }
 
-static inline sector_t bdev_nr_sectors(struct block_device *bdev)
+static inline loff_t bdev_nr_bytes(struct block_device *bdev)
 {
-	return bdev_nr_bytes(bdev) >> SECTOR_SHIFT;
+	return bdev_nr_sectors(bdev) << SECTOR_SHIFT;
 }
 
 static inline sector_t get_capacity(struct gendisk *disk)
-- 
cgit v1.2.3


From 15bc01effefe97757ef02ca09e9d1b927ab22725 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 16 Oct 2021 15:59:49 -0500
Subject: ucounts: Fix signal ucount refcounting

In commit fda31c50292a ("signal: avoid double atomic counter
increments for user accounting") Linus made a clever optimization to
how rlimits and the struct user_struct.  Unfortunately that
optimization does not work in the obvious way when moved to nested
rlimits.  The problem is that the last decrement of the per user
namespace per user sigpending counter might also be the last decrement
of the sigpending counter in the parent user namespace as well.  Which
means that simply freeing the leaf ucount in __free_sigqueue is not
enough.

Maintain the optimization and handle the tricky cases by introducing
inc_rlimit_get_ucounts and dec_rlimit_put_ucounts.

By moving the entire optimization into functions that perform all of
the work it becomes possible to ensure that every level is handled
properly.

The new function inc_rlimit_get_ucounts returns 0 on failure to
increment the ucount.  This is different than inc_rlimit_ucounts which
increments the ucounts and returns LONG_MAX if the ucount counter has
exceeded it's maximum or it wrapped (to indicate the counter needs to
decremented).

I wish we had a single user to account all pending signals to across
all of the threads of a process so this complexity was not necessary

Cc: stable@vger.kernel.org
Fixes: d64696905554 ("Reimplement RLIMIT_SIGPENDING on top of ucounts")
v1: https://lkml.kernel.org/r/87mtnavszx.fsf_-_@disp2133
Link: https://lkml.kernel.org/r/87fssytizw.fsf_-_@disp2133
Reviewed-by: Alexey Gladkov <legion@kernel.org>
Tested-by: Rune Kleveland <rune.kleveland@infomedia.dk>
Tested-by: Yu Zhao <yuzhao@google.com>
Tested-by: Jordan Glover <Golden_Miller83@protonmail.ch>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  2 ++
 kernel/signal.c                | 25 ++++++---------------
 kernel/ucount.c                | 49 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb70cabe6e7f..33a4240e6a6f 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -127,6 +127,8 @@ static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type t
 
 long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v);
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type);
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type);
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max);
 
 static inline void set_rlimit_ucount_max(struct user_namespace *ns,
diff --git a/kernel/signal.c b/kernel/signal.c
index a3229add4455..13d2505a14a0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -425,22 +425,10 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 	 */
 	rcu_read_lock();
 	ucounts = task_ucounts(t);
-	sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
-	switch (sigpending) {
-	case 1:
-		if (likely(get_ucounts(ucounts)))
-			break;
-		fallthrough;
-	case LONG_MAX:
-		/*
-		 * we need to decrease the ucount in the userns tree on any
-		 * failure to avoid counts leaking.
-		 */
-		dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1);
-		rcu_read_unlock();
-		return NULL;
-	}
+	sigpending = inc_rlimit_get_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
 	rcu_read_unlock();
+	if (!sigpending)
+		return NULL;
 
 	if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
 		q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
@@ -449,8 +437,7 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
 	}
 
 	if (unlikely(q == NULL)) {
-		if (dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1))
-			put_ucounts(ucounts);
+		dec_rlimit_put_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING);
 	} else {
 		INIT_LIST_HEAD(&q->list);
 		q->flags = sigqueue_flags;
@@ -463,8 +450,8 @@ static void __sigqueue_free(struct sigqueue *q)
 {
 	if (q->flags & SIGQUEUE_PREALLOC)
 		return;
-	if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) {
-		put_ucounts(q->ucounts);
+	if (q->ucounts) {
+		dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
 		q->ucounts = NULL;
 	}
 	kmem_cache_free(sigqueue_cachep, q);
diff --git a/kernel/ucount.c b/kernel/ucount.c
index bb51849e6375..eb03f3c68375 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -284,6 +284,55 @@ bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 	return (new == 0);
 }
 
+static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts,
+				struct ucounts *last, enum ucount_type type)
+{
+	struct ucounts *iter, *next;
+	for (iter = ucounts; iter != last; iter = next) {
+		long dec = atomic_long_add_return(-1, &iter->ucount[type]);
+		WARN_ON_ONCE(dec < 0);
+		next = iter->ns->ucounts;
+		if (dec == 0)
+			put_ucounts(iter);
+	}
+}
+
+void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum ucount_type type)
+{
+	do_dec_rlimit_put_ucounts(ucounts, NULL, type);
+}
+
+long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum ucount_type type)
+{
+	/* Caller must hold a reference to ucounts */
+	struct ucounts *iter;
+	long dec, ret = 0;
+
+	for (iter = ucounts; iter; iter = iter->ns->ucounts) {
+		long max = READ_ONCE(iter->ns->ucount_max[type]);
+		long new = atomic_long_add_return(1, &iter->ucount[type]);
+		if (new < 0 || new > max)
+			goto unwind;
+		if (iter == ucounts)
+			ret = new;
+		/*
+		 * Grab an extra ucount reference for the caller when
+		 * the rlimit count was previously 0.
+		 */
+		if (new != 1)
+			continue;
+		if (!get_ucounts(iter))
+			goto dec_unwind;
+	}
+	return ret;
+dec_unwind:
+	dec = atomic_long_add_return(-1, &iter->ucount[type]);
+	WARN_ON_ONCE(dec < 0);
+unwind:
+	do_dec_rlimit_put_ucounts(ucounts, iter, type);
+	return 0;
+}
+
 bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
 {
 	struct ucounts *iter;
-- 
cgit v1.2.3


From ed65df63a39a3f6ed04f7258de8b6789e5021c18 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Mon, 18 Oct 2021 15:44:12 -0400
Subject: tracing: Have all levels of checks prevent recursion

While writing an email explaining the "bit = 0" logic for a discussion on
making ftrace_test_recursion_trylock() disable preemption, I discovered a
path that makes the "not do the logic if bit is zero" unsafe.

The recursion logic is done in hot paths like the function tracer. Thus,
any code executed causes noticeable overhead. Thus, tricks are done to try
to limit the amount of code executed. This included the recursion testing
logic.

Having recursion testing is important, as there are many paths that can
end up in an infinite recursion cycle when tracing every function in the
kernel. Thus protection is needed to prevent that from happening.

Because it is OK to recurse due to different running context levels (e.g.
an interrupt preempts a trace, and then a trace occurs in the interrupt
handler), a set of bits are used to know which context one is in (normal,
softirq, irq and NMI). If a recursion occurs in the same level, it is
prevented*.

Then there are infrastructure levels of recursion as well. When more than
one callback is attached to the same function to trace, it calls a loop
function to iterate over all the callbacks. Both the callbacks and the
loop function have recursion protection. The callbacks use the
"ftrace_test_recursion_trylock()" which has a "function" set of context
bits to test, and the loop function calls the internal
trace_test_and_set_recursion() directly, with an "internal" set of bits.

If an architecture does not implement all the features supported by ftrace
then the callbacks are never called directly, and the loop function is
called instead, which will implement the features of ftrace.

Since both the loop function and the callbacks do recursion protection, it
was seemed unnecessary to do it in both locations. Thus, a trick was made
to have the internal set of recursion bits at a more significant bit
location than the function bits. Then, if any of the higher bits were set,
the logic of the function bits could be skipped, as any new recursion
would first have to go through the loop function.

This is true for architectures that do not support all the ftrace
features, because all functions being traced must first go through the
loop function before going to the callbacks. But this is not true for
architectures that support all the ftrace features. That's because the
loop function could be called due to two callbacks attached to the same
function, but then a recursion function inside the callback could be
called that does not share any other callback, and it will be called
directly.

i.e.

 traced_function_1: [ more than one callback tracing it ]
   call loop_func

 loop_func:
   trace_recursion set internal bit
   call callback

 callback:
   trace_recursion [ skipped because internal bit is set, return 0 ]
   call traced_function_2

 traced_function_2: [ only traced by above callback ]
   call callback

 callback:
   trace_recursion [ skipped because internal bit is set, return 0 ]
   call traced_function_2

 [ wash, rinse, repeat, BOOM! out of shampoo! ]

Thus, the "bit == 0 skip" trick is not safe, unless the loop function is
call for all functions.

Since we want to encourage architectures to implement all ftrace features,
having them slow down due to this extra logic may encourage the
maintainers to update to the latest ftrace features. And because this
logic is only safe for them, remove it completely.

 [*] There is on layer of recursion that is allowed, and that is to allow
     for the transition between interrupt context (normal -> softirq ->
     irq -> NMI), because a trace may occur before the context update is
     visible to the trace recursion logic.

Link: https://lore.kernel.org/all/609b565a-ed6e-a1da-f025-166691b5d994@linux.alibaba.com/
Link: https://lkml.kernel.org/r/20211018154412.09fcad3c@gandalf.local.home

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <James.Bottomley@hansenpartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Colin Ian King <colin.king@canonical.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Jisheng Zhang <jszhang@kernel.org>
Cc: =?utf-8?b?546L6LSH?= <yun.wang@linux.alibaba.com>
Cc: Guo Ren <guoren@kernel.org>
Cc: stable@vger.kernel.org
Fixes: edc15cafcbfa3 ("tracing: Avoid unnecessary multiple recursion checks")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 49 ++++++++---------------------------------
 kernel/trace/ftrace.c           |  4 ++--
 2 files changed, 11 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c5714e65..fe95f0922526 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -16,23 +16,8 @@
  *  When function tracing occurs, the following steps are made:
  *   If arch does not support a ftrace feature:
  *    call internal function (uses INTERNAL bits) which calls...
- *   If callback is registered to the "global" list, the list
- *    function is called and recursion checks the GLOBAL bits.
- *    then this function calls...
  *   The function callback, which can use the FTRACE bits to
  *    check for recursion.
- *
- * Now if the arch does not support a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
  */
 enum {
 	/* Function recursion bits */
@@ -40,12 +25,14 @@ enum {
 	TRACE_FTRACE_NMI_BIT,
 	TRACE_FTRACE_IRQ_BIT,
 	TRACE_FTRACE_SIRQ_BIT,
+	TRACE_FTRACE_TRANSITION_BIT,
 
-	/* INTERNAL_BITs must be greater than FTRACE_BITs */
+	/* Internal use recursion bits */
 	TRACE_INTERNAL_BIT,
 	TRACE_INTERNAL_NMI_BIT,
 	TRACE_INTERNAL_IRQ_BIT,
 	TRACE_INTERNAL_SIRQ_BIT,
+	TRACE_INTERNAL_TRANSITION_BIT,
 
 	TRACE_BRANCH_BIT,
 /*
@@ -86,12 +73,6 @@ enum {
 	 */
 	TRACE_GRAPH_NOTRACE_BIT,
 
-	/*
-	 * When transitioning between context, the preempt_count() may
-	 * not be correct. Allow for a single recursion to cover this case.
-	 */
-	TRACE_TRANSITION_BIT,
-
 	/* Used to prevent recursion recording from recursing. */
 	TRACE_RECORD_RECURSION_BIT,
 };
@@ -113,12 +94,10 @@ enum {
 #define TRACE_CONTEXT_BITS	4
 
 #define TRACE_FTRACE_START	TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX	((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
 
 #define TRACE_LIST_START	TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX		((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
 
-#define TRACE_CONTEXT_MASK	TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK	((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
 
 /*
  * Used for setting context
@@ -132,6 +111,7 @@ enum {
 	TRACE_CTX_IRQ,
 	TRACE_CTX_SOFTIRQ,
 	TRACE_CTX_NORMAL,
+	TRACE_CTX_TRANSITION,
 };
 
 static __always_inline int trace_get_context_bit(void)
@@ -160,45 +140,34 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
 #endif
 
 static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
-							int start, int max)
+							int start)
 {
 	unsigned int val = READ_ONCE(current->trace_recursion);
 	int bit;
 
-	/* A previous recursion check was made */
-	if ((val & TRACE_CONTEXT_MASK) > max)
-		return 0;
-
 	bit = trace_get_context_bit() + start;
 	if (unlikely(val & (1 << bit))) {
 		/*
 		 * It could be that preempt_count has not been updated during
 		 * a switch between contexts. Allow for a single recursion.
 		 */
-		bit = TRACE_TRANSITION_BIT;
+		bit = TRACE_CTX_TRANSITION + start;
 		if (val & (1 << bit)) {
 			do_ftrace_record_recursion(ip, pip);
 			return -1;
 		}
-	} else {
-		/* Normal check passed, clear the transition to allow it again */
-		val &= ~(1 << TRACE_TRANSITION_BIT);
 	}
 
 	val |= 1 << bit;
 	current->trace_recursion = val;
 	barrier();
 
-	return bit + 1;
+	return bit;
 }
 
 static __always_inline void trace_clear_recursion(int bit)
 {
-	if (!bit)
-		return;
-
 	barrier();
-	bit--;
 	trace_recursion_clear(bit);
 }
 
@@ -214,7 +183,7 @@ static __always_inline void trace_clear_recursion(int bit)
 static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
 							 unsigned long parent_ip)
 {
-	return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+	return trace_test_and_set_recursion(ip, parent_ip, TRACE_FTRACE_START);
 }
 
 /**
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7efbc8aaf7f6..635fbdc9d589 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6977,7 +6977,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	struct ftrace_ops *op;
 	int bit;
 
-	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START);
 	if (bit < 0)
 		return;
 
@@ -7052,7 +7052,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 {
 	int bit;
 
-	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
+	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START);
 	if (bit < 0)
 		return;
 
-- 
cgit v1.2.3


From 425a563acb1d1872c0036fbcb644247640edbbc6 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 23 May 2021 13:34:28 +0300
Subject: net/mlx5: Introduce port selection namespace

Add new port selection flow steering namespace. Flow steering rules in
this namespaceare are used to determine the physical port for egress
packets.

Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 26 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  7 ++++--
 drivers/net/ethernet/mellanox/mlx5/core/fw.c      |  6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c    |  1 +
 include/linux/mlx5/device.h                       | 15 +++++++++++++
 include/linux/mlx5/fs.h                           |  1 +
 include/linux/mlx5/mlx5_ifc.h                     | 25 ++++++++++++++++++++--
 8 files changed, 78 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 7db8df64a60e..caefdb7dfefe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -969,6 +969,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
 	case FS_FT_NIC_TX:
 	case FS_FT_RDMA_RX:
 	case FS_FT_RDMA_TX:
+	case FS_FT_PORT_SEL:
 		return mlx5_fs_cmd_get_fw_cmds();
 	default:
 		return mlx5_fs_cmd_get_stub_cmds();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index fe501ba88bea..8d8696f7c3f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2191,6 +2191,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		if (steering->fdb_root_ns)
 			return &steering->fdb_root_ns->ns;
 		return NULL;
+	case MLX5_FLOW_NAMESPACE_PORT_SEL:
+		if (steering->port_sel_root_ns)
+			return &steering->port_sel_root_ns->ns;
+		return NULL;
 	case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
 		if (steering->sniffer_rx_root_ns)
 			return &steering->sniffer_rx_root_ns->ns;
@@ -2596,6 +2600,7 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	steering->fdb_root_ns = NULL;
 	kfree(steering->fdb_sub_ns);
 	steering->fdb_sub_ns = NULL;
+	cleanup_root_ns(steering->port_sel_root_ns);
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
 	cleanup_root_ns(steering->rdma_rx_root_ns);
@@ -2634,6 +2639,21 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
 	return PTR_ERR_OR_ZERO(prio);
 }
 
+#define PORT_SEL_NUM_LEVELS 3
+static int init_port_sel_root_ns(struct mlx5_flow_steering *steering)
+{
+	struct fs_prio *prio;
+
+	steering->port_sel_root_ns = create_root_ns(steering, FS_FT_PORT_SEL);
+	if (!steering->port_sel_root_ns)
+		return -ENOMEM;
+
+	/* Create single prio */
+	prio = fs_create_prio(&steering->port_sel_root_ns->ns, 0,
+			      PORT_SEL_NUM_LEVELS);
+	return PTR_ERR_OR_ZERO(prio);
+}
+
 static int init_rdma_rx_root_ns(struct mlx5_flow_steering *steering)
 {
 	int err;
@@ -3020,6 +3040,12 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
+	if (MLX5_CAP_FLOWTABLE_PORT_SELECTION(dev, ft_support)) {
+		err = init_port_sel_root_ns(steering);
+		if (err)
+			goto err;
+	}
+
 	if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
 	    MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) {
 		err = init_rdma_rx_root_ns(steering);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 98240badc342..79d37530afb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -97,7 +97,8 @@ enum fs_flow_table_type {
 	FS_FT_SNIFFER_TX	= 0X6,
 	FS_FT_RDMA_RX		= 0X7,
 	FS_FT_RDMA_TX		= 0X8,
-	FS_FT_MAX_TYPE = FS_FT_RDMA_TX,
+	FS_FT_PORT_SEL		= 0X9,
+	FS_FT_MAX_TYPE = FS_FT_PORT_SEL,
 };
 
 enum fs_flow_table_op_mod {
@@ -129,6 +130,7 @@ struct mlx5_flow_steering {
 	struct mlx5_flow_root_namespace	*rdma_rx_root_ns;
 	struct mlx5_flow_root_namespace	*rdma_tx_root_ns;
 	struct mlx5_flow_root_namespace	*egress_root_ns;
+	struct mlx5_flow_root_namespace	*port_sel_root_ns;
 	int esw_egress_acl_vports;
 	int esw_ingress_acl_vports;
 };
@@ -341,7 +343,8 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
 	(type == FS_FT_SNIFFER_TX) ? MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) :		\
 	(type == FS_FT_RDMA_RX) ? MLX5_CAP_FLOWTABLE_RDMA_RX(mdev, cap) :		\
 	(type == FS_FT_RDMA_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) :      \
-	(BUILD_BUG_ON_ZERO(FS_FT_RDMA_TX != FS_FT_MAX_TYPE))\
+	(type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) :      \
+	(BUILD_BUG_ON_ZERO(FS_FT_PORT_SEL != FS_FT_MAX_TYPE))\
 	)
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index f4f8993eac17..1037e3629e7e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -149,6 +149,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
+	if (MLX5_CAP_GEN(dev, port_selection_cap)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION);
+		if (err)
+			return err;
+	}
+
 	if (MLX5_CAP_GEN(dev, hca_cap_2)) {
 		err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2);
 		if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 47d92fb459ed..f8446395163a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1416,6 +1416,7 @@ static const int types[] = {
 	MLX5_CAP_TLS,
 	MLX5_CAP_VDPA_EMULATION,
 	MLX5_CAP_IPSEC,
+	MLX5_CAP_PORT_SELECTION,
 };
 
 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 109cc8106d16..347167c18802 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1185,6 +1185,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_DEV_EVENT = 0x14,
 	MLX5_CAP_IPSEC,
 	MLX5_CAP_GENERAL_2 = 0x20,
+	MLX5_CAP_PORT_SELECTION = 0x25,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1342,6 +1343,20 @@ enum mlx5_qcam_feature_groups {
 	MLX5_GET(e_switch_cap, \
 		 mdev->caps.hca[MLX5_CAP_ESWITCH]->max, cap)
 
+#define MLX5_CAP_PORT_SELECTION(mdev, cap) \
+	MLX5_GET(port_selection_cap, \
+		 mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, cap)
+
+#define MLX5_CAP_PORT_SELECTION_MAX(mdev, cap) \
+	MLX5_GET(port_selection_cap, \
+		 mdev->caps.hca[MLX5_CAP_PORT_SELECTION]->max, cap)
+
+#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \
+	MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap)
+
+#define MLX5_CAP_FLOWTABLE_PORT_SELECTION_MAX(mdev, cap) \
+	MLX5_CAP_PORT_SELECTION_MAX(mdev, flow_table_properties_port_selection.cap)
+
 #define MLX5_CAP_ODP(mdev, cap)\
 	MLX5_GET(odp_cap, mdev->caps.hca[MLX5_CAP_ODP]->cur, cap)
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0106c67e8ccb..259fcc168340 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -83,6 +83,7 @@ enum mlx5_flow_namespace_type {
 	MLX5_FLOW_NAMESPACE_RDMA_RX,
 	MLX5_FLOW_NAMESPACE_RDMA_RX_KERNEL,
 	MLX5_FLOW_NAMESPACE_RDMA_TX,
+	MLX5_FLOW_NAMESPACE_PORT_SEL,
 };
 
 enum {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c614ad1da44d..db1d9c69c1fa 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -767,6 +767,18 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	u8         reserved_at_20c0[0x5f40];
 };
 
+struct mlx5_ifc_port_selection_cap_bits {
+	u8         reserved_at_0[0x10];
+	u8         port_select_flow_table[0x1];
+	u8         reserved_at_11[0xf];
+
+	u8         reserved_at_20[0x1e0];
+
+	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_port_selection;
+
+	u8         reserved_at_400[0x7c00];
+};
+
 enum {
 	MLX5_FDB_TO_VPORT_REG_C_0 = 0x01,
 	MLX5_FDB_TO_VPORT_REG_C_1 = 0x02,
@@ -1515,7 +1527,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         uar_4k[0x1];
 	u8         reserved_at_241[0x9];
 	u8         uar_sz[0x6];
-	u8         reserved_at_248[0x2];
+	u8         port_selection_cap[0x1];
+	u8         reserved_at_248[0x1];
 	u8         umem_uid_0[0x1];
 	u8         reserved_at_250[0x5];
 	u8         log_pg_sz[0x8];
@@ -3164,6 +3177,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_flow_table_nic_cap_bits flow_table_nic_cap;
 	struct mlx5_ifc_flow_table_eswitch_cap_bits flow_table_eswitch_cap;
 	struct mlx5_ifc_e_switch_cap_bits e_switch_cap;
+	struct mlx5_ifc_port_selection_cap_bits port_selection_cap;
 	struct mlx5_ifc_vector_calc_cap_bits vector_calc_cap;
 	struct mlx5_ifc_qos_cap_bits qos_cap;
 	struct mlx5_ifc_debug_cap_bits debug_cap;
@@ -10434,9 +10448,16 @@ struct mlx5_ifc_dcbx_param_bits {
 	u8         reserved_at_a0[0x160];
 };
 
+enum {
+	MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY = 0,
+	MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT,
+};
+
 struct mlx5_ifc_lagc_bits {
 	u8         fdb_selection_mode[0x1];
-	u8         reserved_at_1[0x1c];
+	u8         reserved_at_1[0x14];
+	u8         port_select_mode[0x3];
+	u8         reserved_at_18[0x5];
 	u8         lag_state[0x3];
 
 	u8         reserved_at_20[0x14];
-- 
cgit v1.2.3


From e7e2519e3632396a25031b7e828ed35332e5dd07 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 6 Jul 2021 17:48:26 +0300
Subject: net/mlx5: Add support to create match definer

Introduce new APIs to create and destroy flow matcher
for given format id.

Flow match definer object is used for defining the fields and
mask used for the hash calculation. User should mask the desired
fields like done in the match criteria.

This object is assigned to flow group of type hash. In this flow
group type, packets lookup is done based on the hash result.

This patch also adds the required bits to create such flow group.

Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   |  57 +++++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h   |   4 +
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  46 ++++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |   5 +
 .../ethernet/mellanox/mlx5/core/steering/fs_dr.c   |  15 ++
 include/linux/mlx5/fs.h                            |   8 +
 include/linux/mlx5/mlx5_ifc.h                      | 272 +++++++++++++++++++--
 7 files changed, 380 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index caefdb7dfefe..2c82dc118460 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -185,6 +185,20 @@ static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master,
 	return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out));
 }
 
+static int
+mlx5_cmd_stub_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+				    int definer_id)
+{
+	return 0;
+}
+
+static int
+mlx5_cmd_stub_create_match_definer(struct mlx5_flow_root_namespace *ns,
+				   u16 format_id, u32 *match_mask)
+{
+	return 0;
+}
+
 static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 				   struct mlx5_flow_table *ft, u32 underlay_qpn,
 				   bool disconnect)
@@ -909,6 +923,45 @@ static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
 	mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
 }
 
+static int mlx5_cmd_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+					  int definer_id)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type,
+		 MLX5_OBJ_TYPE_MATCH_DEFINER);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id);
+
+	return mlx5_cmd_exec(ns->dev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5_cmd_create_match_definer(struct mlx5_flow_root_namespace *ns,
+					 u16 format_id, u32 *match_mask)
+{
+	u32 out[MLX5_ST_SZ_DW(create_match_definer_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {};
+	struct mlx5_core_dev *dev = ns->dev;
+	void *ptr;
+	int err;
+
+	MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(create_match_definer_in, in, general_obj_in_cmd_hdr.obj_type,
+		 MLX5_OBJ_TYPE_MATCH_DEFINER);
+
+	ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context);
+	MLX5_SET(match_definer, ptr, format_id, format_id);
+
+	ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask);
+	memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask));
+
+	err = mlx5_cmd_exec_inout(dev, create_match_definer, in, out);
+	return err ? err : MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+}
+
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.create_flow_table = mlx5_cmd_create_flow_table,
 	.destroy_flow_table = mlx5_cmd_destroy_flow_table,
@@ -923,6 +976,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.packet_reformat_dealloc = mlx5_cmd_packet_reformat_dealloc,
 	.modify_header_alloc = mlx5_cmd_modify_header_alloc,
 	.modify_header_dealloc = mlx5_cmd_modify_header_dealloc,
+	.create_match_definer = mlx5_cmd_create_match_definer,
+	.destroy_match_definer = mlx5_cmd_destroy_match_definer,
 	.set_peer = mlx5_cmd_stub_set_peer,
 	.create_ns = mlx5_cmd_stub_create_ns,
 	.destroy_ns = mlx5_cmd_stub_destroy_ns,
@@ -942,6 +997,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmd_stubs = {
 	.packet_reformat_dealloc = mlx5_cmd_stub_packet_reformat_dealloc,
 	.modify_header_alloc = mlx5_cmd_stub_modify_header_alloc,
 	.modify_header_dealloc = mlx5_cmd_stub_modify_header_dealloc,
+	.create_match_definer = mlx5_cmd_stub_create_match_definer,
+	.destroy_match_definer = mlx5_cmd_stub_destroy_match_definer,
 	.set_peer = mlx5_cmd_stub_set_peer,
 	.create_ns = mlx5_cmd_stub_create_ns,
 	.destroy_ns = mlx5_cmd_stub_destroy_ns,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index 5ecd33cdc087..220ec632d35a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -97,6 +97,10 @@ struct mlx5_flow_cmds {
 
 	int (*create_ns)(struct mlx5_flow_root_namespace *ns);
 	int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
+	int (*create_match_definer)(struct mlx5_flow_root_namespace *ns,
+				    u16 format_id, u32 *match_mask);
+	int (*destroy_match_definer)(struct mlx5_flow_root_namespace *ns,
+				     int definer_id);
 };
 
 int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8d8696f7c3f5..873efde0d458 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -3250,6 +3250,52 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
 
+int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer)
+{
+	return definer->id;
+}
+
+struct mlx5_flow_definer *
+mlx5_create_match_definer(struct mlx5_core_dev *dev,
+			  enum mlx5_flow_namespace_type ns_type, u16 format_id,
+			  u32 *match_mask)
+{
+	struct mlx5_flow_root_namespace *root;
+	struct mlx5_flow_definer *definer;
+	int id;
+
+	root = get_root_namespace(dev, ns_type);
+	if (!root)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	definer = kzalloc(sizeof(*definer), GFP_KERNEL);
+	if (!definer)
+		return ERR_PTR(-ENOMEM);
+
+	definer->ns_type = ns_type;
+	id = root->cmds->create_match_definer(root, format_id, match_mask);
+	if (id < 0) {
+		mlx5_core_warn(root->dev, "Failed to create match definer (%d)\n", id);
+		kfree(definer);
+		return ERR_PTR(id);
+	}
+	definer->id = id;
+	return definer;
+}
+
+void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
+				struct mlx5_flow_definer *definer)
+{
+	struct mlx5_flow_root_namespace *root;
+
+	root = get_root_namespace(dev, definer->ns_type);
+	if (WARN_ON(!root))
+		return;
+
+	root->cmds->destroy_match_definer(root, definer->id);
+	kfree(definer);
+}
+
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
 				 struct mlx5_flow_root_namespace *peer_ns)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 79d37530afb3..7711db245c63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -49,6 +49,11 @@
 #define FDB_TC_MAX_PRIO 16
 #define FDB_TC_LEVELS_PER_PRIO 2
 
+struct mlx5_flow_definer {
+	enum mlx5_flow_namespace_type ns_type;
+	u32 id;
+};
+
 struct mlx5_modify_hdr {
 	enum mlx5_flow_namespace_type ns_type;
 	union {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 230e920e3845..2632d5ae9bc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -625,6 +625,19 @@ static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *n
 	mlx5dr_action_destroy(modify_hdr->action.dr_action);
 }
 
+static int
+mlx5_cmd_dr_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+				  int definer_id)
+{
+	return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_dr_create_match_definer(struct mlx5_flow_root_namespace *ns,
+					    u16 format_id, u32 *match_mask)
+{
+	return -EOPNOTSUPP;
+}
+
 static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
 				  struct mlx5_flow_table *ft,
 				  struct fs_fte *fte)
@@ -727,6 +740,8 @@ static const struct mlx5_flow_cmds mlx5_flow_cmds_dr = {
 	.packet_reformat_dealloc = mlx5_cmd_dr_packet_reformat_dealloc,
 	.modify_header_alloc = mlx5_cmd_dr_modify_header_alloc,
 	.modify_header_dealloc = mlx5_cmd_dr_modify_header_dealloc,
+	.create_match_definer = mlx5_cmd_dr_create_match_definer,
+	.destroy_match_definer = mlx5_cmd_dr_destroy_match_definer,
 	.set_peer = mlx5_cmd_dr_set_peer,
 	.create_ns = mlx5_cmd_dr_create_ns,
 	.destroy_ns = mlx5_cmd_dr_destroy_ns,
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 259fcc168340..7a43fec63a35 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -98,6 +98,7 @@ enum {
 
 struct mlx5_pkt_reformat;
 struct mlx5_modify_hdr;
+struct mlx5_flow_definer;
 struct mlx5_flow_table;
 struct mlx5_flow_group;
 struct mlx5_flow_namespace;
@@ -258,6 +259,13 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 						 void *modify_actions);
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 				struct mlx5_modify_hdr *modify_hdr);
+struct mlx5_flow_definer *
+mlx5_create_match_definer(struct mlx5_core_dev *dev,
+			  enum mlx5_flow_namespace_type ns_type, u16 format_id,
+			  u32 *match_mask);
+void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
+				struct mlx5_flow_definer *definer);
+int mlx5_get_match_definer_id(struct mlx5_flow_definer *definer);
 
 struct mlx5_pkt_reformat_params {
 	int type;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index db1d9c69c1fa..8f41145bc6ef 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -94,6 +94,7 @@ enum {
 enum {
 	MLX5_OBJ_TYPE_GENEVE_TLV_OPT = 0x000b,
 	MLX5_OBJ_TYPE_VIRTIO_NET_Q = 0x000d,
+	MLX5_OBJ_TYPE_MATCH_DEFINER = 0x0018,
 	MLX5_OBJ_TYPE_MKEY = 0xff01,
 	MLX5_OBJ_TYPE_QP = 0xff02,
 	MLX5_OBJ_TYPE_PSV = 0xff03,
@@ -1731,7 +1732,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         flex_parser_id_outer_first_mpls_over_gre[0x4];
 	u8         flex_parser_id_outer_first_mpls_over_udp_label[0x4];
 
-	u8	   reserved_at_6e0[0x10];
+	u8         max_num_match_definer[0x10];
 	u8	   sf_base_id[0x10];
 
 	u8         flex_parser_id_gtpu_dw_2[0x4];
@@ -1746,7 +1747,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8	   reserved_at_760[0x20];
 	u8	   vhca_tunnel_commands[0x40];
-	u8	   reserved_at_7c0[0x40];
+	u8         match_definer_format_supported[0x40];
 };
 
 struct mlx5_ifc_cmd_hca_cap_2_bits {
@@ -5666,6 +5667,236 @@ struct mlx5_ifc_query_fte_in_bits {
 	u8         reserved_at_120[0xe0];
 };
 
+struct mlx5_ifc_match_definer_format_0_bits {
+	u8         reserved_at_0[0x100];
+
+	u8         metadata_reg_c_0[0x20];
+
+	u8         metadata_reg_c_1[0x20];
+
+	u8         outer_dmac_47_16[0x20];
+
+	u8         outer_dmac_15_0[0x10];
+	u8         outer_ethertype[0x10];
+
+	u8         reserved_at_180[0x1];
+	u8         sx_sniffer[0x1];
+	u8         functional_lb[0x1];
+	u8         outer_ip_frag[0x1];
+	u8         outer_qp_type[0x2];
+	u8         outer_encap_type[0x2];
+	u8         port_number[0x2];
+	u8         outer_l3_type[0x2];
+	u8         outer_l4_type[0x2];
+	u8         outer_first_vlan_type[0x2];
+	u8         outer_first_vlan_prio[0x3];
+	u8         outer_first_vlan_cfi[0x1];
+	u8         outer_first_vlan_vid[0xc];
+
+	u8         outer_l4_type_ext[0x4];
+	u8         reserved_at_1a4[0x2];
+	u8         outer_ipsec_layer[0x2];
+	u8         outer_l2_type[0x2];
+	u8         force_lb[0x1];
+	u8         outer_l2_ok[0x1];
+	u8         outer_l3_ok[0x1];
+	u8         outer_l4_ok[0x1];
+	u8         outer_second_vlan_type[0x2];
+	u8         outer_second_vlan_prio[0x3];
+	u8         outer_second_vlan_cfi[0x1];
+	u8         outer_second_vlan_vid[0xc];
+
+	u8         outer_smac_47_16[0x20];
+
+	u8         outer_smac_15_0[0x10];
+	u8         inner_ipv4_checksum_ok[0x1];
+	u8         inner_l4_checksum_ok[0x1];
+	u8         outer_ipv4_checksum_ok[0x1];
+	u8         outer_l4_checksum_ok[0x1];
+	u8         inner_l3_ok[0x1];
+	u8         inner_l4_ok[0x1];
+	u8         outer_l3_ok_duplicate[0x1];
+	u8         outer_l4_ok_duplicate[0x1];
+	u8         outer_tcp_cwr[0x1];
+	u8         outer_tcp_ece[0x1];
+	u8         outer_tcp_urg[0x1];
+	u8         outer_tcp_ack[0x1];
+	u8         outer_tcp_psh[0x1];
+	u8         outer_tcp_rst[0x1];
+	u8         outer_tcp_syn[0x1];
+	u8         outer_tcp_fin[0x1];
+};
+
+struct mlx5_ifc_match_definer_format_22_bits {
+	u8         reserved_at_0[0x100];
+
+	u8         outer_ip_src_addr[0x20];
+
+	u8         outer_ip_dest_addr[0x20];
+
+	u8         outer_l4_sport[0x10];
+	u8         outer_l4_dport[0x10];
+
+	u8         reserved_at_160[0x1];
+	u8         sx_sniffer[0x1];
+	u8         functional_lb[0x1];
+	u8         outer_ip_frag[0x1];
+	u8         outer_qp_type[0x2];
+	u8         outer_encap_type[0x2];
+	u8         port_number[0x2];
+	u8         outer_l3_type[0x2];
+	u8         outer_l4_type[0x2];
+	u8         outer_first_vlan_type[0x2];
+	u8         outer_first_vlan_prio[0x3];
+	u8         outer_first_vlan_cfi[0x1];
+	u8         outer_first_vlan_vid[0xc];
+
+	u8         metadata_reg_c_0[0x20];
+
+	u8         outer_dmac_47_16[0x20];
+
+	u8         outer_smac_47_16[0x20];
+
+	u8         outer_smac_15_0[0x10];
+	u8         outer_dmac_15_0[0x10];
+};
+
+struct mlx5_ifc_match_definer_format_23_bits {
+	u8         reserved_at_0[0x100];
+
+	u8         inner_ip_src_addr[0x20];
+
+	u8         inner_ip_dest_addr[0x20];
+
+	u8         inner_l4_sport[0x10];
+	u8         inner_l4_dport[0x10];
+
+	u8         reserved_at_160[0x1];
+	u8         sx_sniffer[0x1];
+	u8         functional_lb[0x1];
+	u8         inner_ip_frag[0x1];
+	u8         inner_qp_type[0x2];
+	u8         inner_encap_type[0x2];
+	u8         port_number[0x2];
+	u8         inner_l3_type[0x2];
+	u8         inner_l4_type[0x2];
+	u8         inner_first_vlan_type[0x2];
+	u8         inner_first_vlan_prio[0x3];
+	u8         inner_first_vlan_cfi[0x1];
+	u8         inner_first_vlan_vid[0xc];
+
+	u8         tunnel_header_0[0x20];
+
+	u8         inner_dmac_47_16[0x20];
+
+	u8         inner_smac_47_16[0x20];
+
+	u8         inner_smac_15_0[0x10];
+	u8         inner_dmac_15_0[0x10];
+};
+
+struct mlx5_ifc_match_definer_format_29_bits {
+	u8         reserved_at_0[0xc0];
+
+	u8         outer_ip_dest_addr[0x80];
+
+	u8         outer_ip_src_addr[0x80];
+
+	u8         outer_l4_sport[0x10];
+	u8         outer_l4_dport[0x10];
+
+	u8         reserved_at_1e0[0x20];
+};
+
+struct mlx5_ifc_match_definer_format_30_bits {
+	u8         reserved_at_0[0xa0];
+
+	u8         outer_ip_dest_addr[0x80];
+
+	u8         outer_ip_src_addr[0x80];
+
+	u8         outer_dmac_47_16[0x20];
+
+	u8         outer_smac_47_16[0x20];
+
+	u8         outer_smac_15_0[0x10];
+	u8         outer_dmac_15_0[0x10];
+};
+
+struct mlx5_ifc_match_definer_format_31_bits {
+	u8         reserved_at_0[0xc0];
+
+	u8         inner_ip_dest_addr[0x80];
+
+	u8         inner_ip_src_addr[0x80];
+
+	u8         inner_l4_sport[0x10];
+	u8         inner_l4_dport[0x10];
+
+	u8         reserved_at_1e0[0x20];
+};
+
+struct mlx5_ifc_match_definer_format_32_bits {
+	u8         reserved_at_0[0xa0];
+
+	u8         inner_ip_dest_addr[0x80];
+
+	u8         inner_ip_src_addr[0x80];
+
+	u8         inner_dmac_47_16[0x20];
+
+	u8         inner_smac_47_16[0x20];
+
+	u8         inner_smac_15_0[0x10];
+	u8         inner_dmac_15_0[0x10];
+};
+
+struct mlx5_ifc_match_definer_bits {
+	u8         modify_field_select[0x40];
+
+	u8         reserved_at_40[0x40];
+
+	u8         reserved_at_80[0x10];
+	u8         format_id[0x10];
+
+	u8         reserved_at_a0[0x160];
+
+	u8         match_mask[16][0x20];
+};
+
+struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
+	u8         opcode[0x10];
+	u8         uid[0x10];
+
+	u8         vhca_tunnel_id[0x10];
+	u8         obj_type[0x10];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         obj_id[0x20];
+
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_create_match_definer_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+
+	struct mlx5_ifc_match_definer_bits obj_context;
+};
+
+struct mlx5_ifc_create_match_definer_out_bits {
+	struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+};
+
 enum {
 	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_OUTER_HEADERS    = 0x0,
 	MLX5_QUERY_FLOW_GROUP_OUT_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS  = 0x1,
@@ -8139,6 +8370,11 @@ struct mlx5_ifc_create_flow_group_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
+enum {
+	MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_TCAM_SUBTABLE  = 0x0,
+	MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT     = 0x1,
+};
+
 enum {
 	MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS     = 0x0,
 	MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS   = 0x1,
@@ -8160,7 +8396,9 @@ struct mlx5_ifc_create_flow_group_in_bits {
 	u8         reserved_at_60[0x20];
 
 	u8         table_type[0x8];
-	u8         reserved_at_88[0x18];
+	u8         reserved_at_88[0x4];
+	u8         group_type[0x4];
+	u8         reserved_at_90[0x10];
 
 	u8         reserved_at_a0[0x8];
 	u8         table_id[0x18];
@@ -8175,7 +8413,10 @@ struct mlx5_ifc_create_flow_group_in_bits {
 
 	u8         end_flow_index[0x20];
 
-	u8         reserved_at_140[0xa0];
+	u8         reserved_at_140[0x10];
+	u8         match_definer_id[0x10];
+
+	u8         reserved_at_160[0x80];
 
 	u8         reserved_at_1e0[0x18];
 	u8         match_criteria_enable[0x8];
@@ -10671,29 +10912,6 @@ struct mlx5_ifc_dealloc_memic_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_general_obj_in_cmd_hdr_bits {
-	u8         opcode[0x10];
-	u8         uid[0x10];
-
-	u8         vhca_tunnel_id[0x10];
-	u8         obj_type[0x10];
-
-	u8         obj_id[0x20];
-
-	u8         reserved_at_60[0x20];
-};
-
-struct mlx5_ifc_general_obj_out_cmd_hdr_bits {
-	u8         status[0x8];
-	u8         reserved_at_8[0x18];
-
-	u8         syndrome[0x20];
-
-	u8         obj_id[0x20];
-
-	u8         reserved_at_60[0x20];
-};
-
 struct mlx5_ifc_umem_bits {
 	u8         reserved_at_0[0x80];
 
-- 
cgit v1.2.3


From 58a606dba708f114e7f412b5c3024027e8a73e34 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Tue, 3 Aug 2021 10:04:41 +0300
Subject: net/mlx5: Introduce new uplink destination type

The uplink destination type should be used in rules to steer the
packet to the uplink when the device is in steering based LAG mode.

Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 3 +++
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c             | 8 +++++++-
 include/linux/mlx5/mlx5_ifc.h                                | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 87d65f6b5310..7841ef6c193c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -235,6 +235,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	const char *ret = trace_seq_buffer_ptr(p);
 
 	switch (dst->type) {
+	case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+		trace_seq_printf(p, "uplink\n");
+		break;
 	case MLX5_FLOW_DESTINATION_TYPE_VPORT:
 		trace_seq_printf(p, "vport=%u\n", dst->vport.num);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 2c82dc118460..750b21124a1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -577,8 +577,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 				id = dst->dest_attr.ft->id;
 				break;
+			case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
 			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
-				id = dst->dest_attr.vport.num;
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id_valid,
 					 !!(dst->dest_attr.vport.flags &
@@ -586,6 +586,12 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id,
 					 dst->dest_attr.vport.vhca_id);
+				if (type == MLX5_FLOW_DESTINATION_TYPE_UPLINK) {
+					/* destination_id is reserved */
+					id = 0;
+					break;
+				}
+				id = dst->dest_attr.vport.num;
 				if (extended_dest &&
 				    dst->dest_attr.vport.pkt_reformat) {
 					MLX5_SET(dest_format_struct, in_dests,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 8f41145bc6ef..09e43019d877 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1766,6 +1766,7 @@ enum mlx5_flow_destination_type {
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE   = 0x1,
 	MLX5_FLOW_DESTINATION_TYPE_TIR          = 0x2,
 	MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER = 0x6,
+	MLX5_FLOW_DESTINATION_TYPE_UPLINK       = 0x8,
 
 	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
-- 
cgit v1.2.3


From 76af6a054da4055305ddb28c5eb151b9ee4f74f9 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Mon, 18 Oct 2021 15:15:32 -0700
Subject: mm/migrate: add CPU hotplug to demotion #ifdef

Once upon a time, the node demotion updates were driven solely by memory
hotplug events.  But now, there are handlers for both CPU and memory
hotplug.

However, the #ifdef around the code checks only memory hotplug.  A
system that has HOTPLUG_CPU=y but MEMORY_HOTPLUG=n would miss CPU
hotplug events.

Update the #ifdef around the common code.  Add memory and CPU-specific
#ifdefs for their handlers.  These memory/CPU #ifdefs avoid unused
function warnings when their Kconfig option is off.

[arnd@arndb.de: rework hotplug_memory_notifier() stub]
  Link: https://lkml.kernel.org/r/20211013144029.2154629-1-arnd@kernel.org

Link: https://lkml.kernel.org/r/20210924161255.E5FE8F7E@davehans-spike.ostc.intel.com
Fixes: 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events")
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory.h |  5 ++++-
 mm/migrate.c           | 42 +++++++++++++++++++++---------------------
 mm/page_ext.c          |  4 +---
 mm/slab.c              |  4 ++--
 4 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 7efc0a7c14c9..182c606adb06 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 #define register_hotmemory_notifier(nb)		register_memory_notifier(nb)
 #define unregister_hotmemory_notifier(nb) 	unregister_memory_notifier(nb)
 #else
-#define hotplug_memory_notifier(fn, pri)	({ 0; })
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+	return 0;
+}
 /* These aren't inline functions due to a GCC bug. */
 #define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
 #define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
diff --git a/mm/migrate.c b/mm/migrate.c
index a6311e46f605..700112c0123d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3066,7 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
 EXPORT_SYMBOL(migrate_vma_finalize);
 #endif /* CONFIG_DEVICE_PRIVATE */
 
-#if defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_HOTPLUG_CPU)
 /* Disable reclaim-based migration. */
 static void __disable_all_migrate_targets(void)
 {
@@ -3208,25 +3208,6 @@ static void set_migration_target_nodes(void)
 	put_online_mems();
 }
 
-/*
- * React to hotplug events that might affect the migration targets
- * like events that online or offline NUMA nodes.
- *
- * The ordering is also currently dependent on which nodes have
- * CPUs.  That means we need CPU on/offline notification too.
- */
-static int migration_online_cpu(unsigned int cpu)
-{
-	set_migration_target_nodes();
-	return 0;
-}
-
-static int migration_offline_cpu(unsigned int cpu)
-{
-	set_migration_target_nodes();
-	return 0;
-}
-
 /*
  * This leaves migrate-on-reclaim transiently disabled between
  * the MEM_GOING_OFFLINE and MEM_OFFLINE events.  This runs
@@ -3284,6 +3265,25 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
 	return notifier_from_errno(0);
 }
 
+/*
+ * React to hotplug events that might affect the migration targets
+ * like events that online or offline NUMA nodes.
+ *
+ * The ordering is also currently dependent on which nodes have
+ * CPUs.  That means we need CPU on/offline notification too.
+ */
+static int migration_online_cpu(unsigned int cpu)
+{
+	set_migration_target_nodes();
+	return 0;
+}
+
+static int migration_offline_cpu(unsigned int cpu)
+{
+	set_migration_target_nodes();
+	return 0;
+}
+
 static int __init migrate_on_reclaim_init(void)
 {
 	int ret;
@@ -3303,4 +3303,4 @@ static int __init migrate_on_reclaim_init(void)
 	return 0;
 }
 late_initcall(migrate_on_reclaim_init);
-#endif /* CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/mm/page_ext.c b/mm/page_ext.c
index dfb91653d359..2a52fd9ed464 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -269,7 +269,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
 	total_usage += table_size;
 	return 0;
 }
-#ifdef CONFIG_MEMORY_HOTPLUG
+
 static void free_page_ext(void *addr)
 {
 	if (is_vmalloc_addr(addr)) {
@@ -374,8 +374,6 @@ static int __meminit page_ext_callback(struct notifier_block *self,
 	return notifier_from_errno(ret);
 }
 
-#endif
-
 void __init page_ext_init(void)
 {
 	unsigned long pfn;
diff --git a/mm/slab.c b/mm/slab.c
index d0f725637663..874b3f8fe80d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1095,7 +1095,7 @@ static int slab_offline_cpu(unsigned int cpu)
 	return 0;
 }
 
-#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
+#if defined(CONFIG_NUMA)
 /*
  * Drains freelist for a node on each slab cache, used for memory hot-remove.
  * Returns -EBUSY if all objects cannot be drained so that the node is not
@@ -1157,7 +1157,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
 out:
 	return notifier_from_errno(ret);
 }
-#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
+#endif /* CONFIG_NUMA */
 
 /*
  * swap the static kmem_cache_node with kmalloced memory
-- 
cgit v1.2.3


From a6a0251c6fce496744121b4e08c899f45270dbcc Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 18 Oct 2021 15:15:35 -0700
Subject: mm/migrate: fix CPUHP state to update node demotion order

The node demotion order needs to be updated during CPU hotplug.  Because
whether a NUMA node has CPU may influence the demotion order.  The
update function should be called during CPU online/offline after the
node_states[N_CPU] has been updated.  That is done in
CPUHP_AP_ONLINE_DYN during CPU online and in CPUHP_MM_VMSTAT_DEAD during
CPU offline.  But in commit 884a6e5d1f93 ("mm/migrate: update node
demotion order on hotplug events"), the function to update node demotion
order is called in CPUHP_AP_ONLINE_DYN during CPU online/offline.  This
doesn't satisfy the order requirement.

For example, there are 4 CPUs (P0, P1, P2, P3) in 2 sockets (P0, P1 in S0
and P2, P3 in S1), the demotion order is

 - S0 -> NUMA_NO_NODE
 - S1 -> NUMA_NO_NODE

After P2 and P3 is offlined, because S1 has no CPU now, the demotion
order should have been changed to

 - S0 -> S1
 - S1 -> NO_NODE

but it isn't changed, because the order updating callback for CPU
hotplug doesn't see the new nodemask.  After that, if P1 is offlined,
the demotion order is changed to the expected order as above.

So in this patch, we added CPUHP_AP_MM_DEMOTION_ONLINE and
CPUHP_MM_DEMOTION_DEAD to be called after CPUHP_AP_ONLINE_DYN and
CPUHP_MM_VMSTAT_DEAD during CPU online and offline, and register the
update function on them.

Link: https://lkml.kernel.org/r/20210929060351.7293-1-ying.huang@intel.com
Fixes: 884a6e5d1f93 ("mm/migrate: update node demotion order on hotplug events")
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Keith Busch <kbusch@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuhotplug.h | 4 ++++
 mm/migrate.c               | 8 +++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 832d8a74fa59..991911048857 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -72,6 +72,8 @@ enum cpuhp_state {
 	CPUHP_SLUB_DEAD,
 	CPUHP_DEBUG_OBJ_DEAD,
 	CPUHP_MM_WRITEBACK_DEAD,
+	/* Must be after CPUHP_MM_VMSTAT_DEAD */
+	CPUHP_MM_DEMOTION_DEAD,
 	CPUHP_MM_VMSTAT_DEAD,
 	CPUHP_SOFTIRQ_DEAD,
 	CPUHP_NET_MVNETA_DEAD,
@@ -240,6 +242,8 @@ enum cpuhp_state {
 	CPUHP_AP_BASE_CACHEINFO_ONLINE,
 	CPUHP_AP_ONLINE_DYN,
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
+	/* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
+	CPUHP_AP_MM_DEMOTION_ONLINE,
 	CPUHP_AP_X86_HPET_ONLINE,
 	CPUHP_AP_X86_KVM_CLK_ONLINE,
 	CPUHP_AP_DTPM_CPU_ONLINE,
diff --git a/mm/migrate.c b/mm/migrate.c
index 700112c0123d..1852d787e6ab 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3288,9 +3288,8 @@ static int __init migrate_on_reclaim_init(void)
 {
 	int ret;
 
-	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
-				migration_online_cpu,
-				migration_offline_cpu);
+	ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
+					NULL, migration_offline_cpu);
 	/*
 	 * In the unlikely case that this fails, the automatic
 	 * migration targets may become suboptimal for nodes
@@ -3298,6 +3297,9 @@ static int __init migrate_on_reclaim_init(void)
 	 * rare case, do not bother trying to do anything special.
 	 */
 	WARN_ON(ret < 0);
+	ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
+				migration_online_cpu, NULL);
+	WARN_ON(ret < 0);
 
 	hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
 	return 0;
-- 
cgit v1.2.3


From b0e901280d9860a0a35055f220e8e457f300f40a Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 18 Oct 2021 15:16:09 -0700
Subject: elfcore: correct reference to CONFIG_UML

Commit 6e7b64b9dd6d ("elfcore: fix building with clang") introduces
special handling for two architectures, ia64 and User Mode Linux.
However, the wrong name, i.e., CONFIG_UM, for the intended Kconfig
symbol for User-Mode Linux was used.

Although the directory for User Mode Linux is ./arch/um; the Kconfig
symbol for this architecture is called CONFIG_UML.

Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs:

  UM
  Referencing files: include/linux/elfcore.h
  Similar symbols: UML, NUMA

Correct the name of the config to the intended one.

[akpm@linux-foundation.org: fix um/x86_64, per Catalin]
  Link: https://lkml.kernel.org/r/20211006181119.2851441-1-catalin.marinas@arm.com
  Link: https://lkml.kernel.org/r/YV6pejGzLy5ppEpt@arm.com

Link: https://lkml.kernel.org/r/20211006082209.417-1-lukas.bulwahn@gmail.com
Fixes: 6e7b64b9dd6d ("elfcore: fix building with clang")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Barret Rhoden <brho@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/elfcore.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 2aaa15779d50..957ebec35aad 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
 #endif
 }
 
-#if defined(CONFIG_UM) || defined(CONFIG_IA64)
+#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
 /*
  * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
  * extra segments containing the gate DSO contents.  Dumping its
-- 
cgit v1.2.3


From 79f9bc5843142b649575f887dccdf1c07ad75c20 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 18 Oct 2021 15:16:16 -0700
Subject: mm/secretmem: fix NULL page->mapping dereference in
 page_is_secretmem()

Check for a NULL page->mapping before dereferencing the mapping in
page_is_secretmem(), as the page's mapping can be nullified while gup()
is running, e.g.  by reclaim or truncation.

  BUG: kernel NULL pointer dereference, address: 0000000000000068
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: 0000 [#1] PREEMPT SMP NOPTI
  CPU: 6 PID: 4173897 Comm: CPU 3/KVM Tainted: G        W
  RIP: 0010:internal_get_user_pages_fast+0x621/0x9d0
  Code: <48> 81 7a 68 80 08 04 bc 0f 85 21 ff ff 8 89 c7 be
  RSP: 0018:ffffaa90087679b0 EFLAGS: 00010046
  RAX: ffffe3f37905b900 RBX: 00007f2dd561e000 RCX: ffffe3f37905b934
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffe3f37905b900
  ...
  CR2: 0000000000000068 CR3: 00000004c5898003 CR4: 00000000001726e0
  Call Trace:
   get_user_pages_fast_only+0x13/0x20
   hva_to_pfn+0xa9/0x3e0
   try_async_pf+0xa1/0x270
   direct_page_fault+0x113/0xad0
   kvm_mmu_page_fault+0x69/0x680
   vmx_handle_exit+0xe1/0x5d0
   kvm_arch_vcpu_ioctl_run+0xd81/0x1c70
   kvm_vcpu_ioctl+0x267/0x670
   __x64_sys_ioctl+0x83/0xa0
   do_syscall_64+0x56/0x80
   entry_SYSCALL_64_after_hwframe+0x44/0xae

Link: https://lkml.kernel.org/r/20211007231502.3552715-1-seanjc@google.com
Fixes: 1507f51255c9 ("mm: introduce memfd_secret system call to create "secret" memory areas")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reported-by: Darrick J. Wong <djwong@kernel.org>
Reported-by: Stephen <stephenackerman16@gmail.com>
Tested-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/secretmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 21c3771e6a56..988528b5da43 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
 	mapping = (struct address_space *)
 		((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
 
-	if (mapping != page->mapping)
+	if (!mapping || mapping != page->mapping)
 		return false;
 
 	return mapping->a_ops == &secretmem_aops;
-- 
cgit v1.2.3


From 25c02edfd41f0dd7aad9115149625d7e7f441b7d Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <aardelean@deviqon.com>
Date: Fri, 3 Sep 2021 10:29:13 +0300
Subject: iio: inkern: introduce devm_iio_map_array_register() short-hand
 function

This change introduces a device-managed variant to the
iio_map_array_register() function. It's a simple implementation of calling
iio_map_array_register() and registering a callback to
iio_map_array_unregister() with the devm_add_action_or_reset().

The function uses an explicit 'dev' parameter to bind the unwinding to. It
could have been implemented to implicitly use the parent of the IIO device,
however it shouldn't be too expensive to callers to just specify to which
device object to bind this unwind call.
It would make the API a bit more flexible.

Signed-off-by: Alexandru Ardelean <aardelean@deviqon.com>
Link: https://lore.kernel.org/r/20210903072917.45769-2-aardelean@deviqon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/driver-api/driver-model/devres.rst |  1 +
 drivers/iio/inkern.c                             | 17 +++++++++++++++++
 include/linux/iio/driver.h                       | 14 ++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 650096523f4f..148e19381b79 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -287,6 +287,7 @@ IIO
   devm_iio_device_register()
   devm_iio_dmaengine_buffer_setup()
   devm_iio_kfifo_buffer_setup()
+  devm_iio_map_array_register()
   devm_iio_triggered_buffer_setup()
   devm_iio_trigger_alloc()
   devm_iio_trigger_register()
diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index 391a3380a1d1..0222885b334c 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -85,6 +85,23 @@ int iio_map_array_unregister(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL_GPL(iio_map_array_unregister);
 
+static void iio_map_array_unregister_cb(void *indio_dev)
+{
+	iio_map_array_unregister(indio_dev);
+}
+
+int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps)
+{
+	int ret;
+
+	ret = iio_map_array_register(indio_dev, maps);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(dev, iio_map_array_unregister_cb, indio_dev);
+}
+EXPORT_SYMBOL_GPL(devm_iio_map_array_register);
+
 static const struct iio_chan_spec
 *iio_chan_spec_from_name(const struct iio_dev *indio_dev, const char *name)
 {
diff --git a/include/linux/iio/driver.h b/include/linux/iio/driver.h
index 36de60a5da7a..7a157ed218f6 100644
--- a/include/linux/iio/driver.h
+++ b/include/linux/iio/driver.h
@@ -8,6 +8,7 @@
 #ifndef _IIO_INKERN_H_
 #define _IIO_INKERN_H_
 
+struct device;
 struct iio_dev;
 struct iio_map;
 
@@ -26,4 +27,17 @@ int iio_map_array_register(struct iio_dev *indio_dev,
  */
 int iio_map_array_unregister(struct iio_dev *indio_dev);
 
+/**
+ * devm_iio_map_array_register - device-managed version of iio_map_array_register
+ * @dev:	Device object to which to bind the unwinding of this registration
+ * @indio_dev:	Pointer to the iio_dev structure
+ * @maps:	Pointer to an IIO map object which is to be registered to this IIO device
+ *
+ * This function will call iio_map_array_register() to register an IIO map object
+ * and will also hook a callback to the iio_map_array_unregister() function to
+ * handle de-registration of the IIO map object when the device's refcount goes to
+ * zero.
+ */
+int devm_iio_map_array_register(struct device *dev, struct iio_dev *indio_dev, struct iio_map *maps);
+
 #endif
-- 
cgit v1.2.3


From 31fa357ac809affd9f9a7d0b5d1991951e16beec Mon Sep 17 00:00:00 2001
From: Nuno Sá <nuno.sa@analog.com>
Date: Fri, 3 Sep 2021 16:14:20 +0200
Subject: iio: adis: handle devices that cannot unmask the drdy pin
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some devices can't mask/unmask the data ready pin and in those cases
each driver was just calling '{dis}enable_irq()' to control the trigger
state. This change, moves that handling into the library by introducing
a new boolean in the data structure that tells the library that the
device cannot unmask the pin.

On top of controlling the trigger state, we can also use this flag to
automatically request the IRQ with 'IRQF_NO_AUTOEN' in case it is set.
So far, all users of the library want to start operation with IRQs/DRDY
pin disabled so it should be fairly safe to do this inside the library.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20210903141423.517028-3-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/adis.c         | 15 ++++++++++++++-
 drivers/iio/imu/adis_trigger.c |  4 ++++
 include/linux/iio/imu/adis.h   |  2 ++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index d4e692b187cd..cb0d66bf6561 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -286,6 +286,13 @@ int adis_enable_irq(struct adis *adis, bool enable)
 	if (adis->data->enable_irq) {
 		ret = adis->data->enable_irq(adis, enable);
 		goto out_unlock;
+	} else if (adis->data->unmasked_drdy) {
+		if (enable)
+			enable_irq(adis->spi->irq);
+		else
+			disable_irq(adis->spi->irq);
+
+		goto out_unlock;
 	}
 
 	ret = __adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
@@ -430,7 +437,13 @@ int __adis_initial_startup(struct adis *adis)
 	if (ret)
 		return ret;
 
-	adis_enable_irq(adis, false);
+	/*
+	 * don't bother calling this if we can't unmask the IRQ as in this case
+	 * the IRQ is most likely not yet requested and we will request it
+	 * with 'IRQF_NO_AUTOEN' anyways.
+	 */
+	if (!adis->data->unmasked_drdy)
+		adis_enable_irq(adis, false);
 
 	if (!adis->data->prod_id_reg)
 		return 0;
diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c
index 48eedc29b28a..c461bd1e8e69 100644
--- a/drivers/iio/imu/adis_trigger.c
+++ b/drivers/iio/imu/adis_trigger.c
@@ -30,6 +30,10 @@ static const struct iio_trigger_ops adis_trigger_ops = {
 static int adis_validate_irq_flag(struct adis *adis)
 {
 	unsigned long direction = adis->irq_flag & IRQF_TRIGGER_MASK;
+
+	/* We cannot mask the interrupt so ensure it's not enabled at request */
+	if (adis->data->unmasked_drdy)
+		adis->irq_flag |= IRQF_NO_AUTOEN;
 	/*
 	 * Typically this devices have data ready either on the rising edge or
 	 * on the falling edge of the data ready pin. This checks enforces that
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index cf49997d5903..7c02f5292eea 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -49,6 +49,7 @@ struct adis_timeout {
  * @status_error_mask: Bitmask of errors supported by the device
  * @timeouts: Chip specific delays
  * @enable_irq: Hook for ADIS devices that have a special IRQ enable/disable
+ * @unmasked_drdy: True for devices that cannot mask/unmask the data ready pin
  * @has_paging: True if ADIS device has paged registers
  * @burst_reg_cmd:	Register command that triggers burst
  * @burst_len:		Burst size in the SPI RX buffer. If @burst_max_len is defined,
@@ -78,6 +79,7 @@ struct adis_data {
 	unsigned int status_error_mask;
 
 	int (*enable_irq)(struct adis *adis, bool enable);
+	bool unmasked_drdy;
 
 	bool has_paging;
 
-- 
cgit v1.2.3


From 95ec3fdf2b79eaff79e78688bbc2f7dbb98d68b6 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 13 Jun 2021 16:10:36 +0100
Subject: iio: core: Introduce iio_push_to_buffers_with_ts_unaligned()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Whilst it is almost always possible to arrange for scan data to be
read directly into a buffer that is suitable for passing to
iio_push_to_buffers_with_timestamp(), there are a few places where
leading data needs to be skipped over.

For these cases introduce a function that will allocate an appropriate
sized and aligned bounce buffer (if not already allocated) and copy
the unaligned data into that before calling
iio_push_to_buffers_with_timestamp() on the bounce buffer.
We tie the lifespace of this buffer to that of the iio_dev.dev
which should ensure no memory leaks occur.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20210613151039.569883-2-jic23@kernel.org
---
 drivers/iio/industrialio-buffer.c | 46 +++++++++++++++++++++++++++++++++++++++
 include/linux/iio/buffer.h        |  4 ++++
 include/linux/iio/iio-opaque.h    |  4 ++++
 3 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index a95cc2da56be..4209e933ab80 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -1731,6 +1731,52 @@ int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data)
 }
 EXPORT_SYMBOL_GPL(iio_push_to_buffers);
 
+/**
+ * iio_push_to_buffers_with_ts_unaligned() - push to registered buffer,
+ *    no alignment or space requirements.
+ * @indio_dev:		iio_dev structure for device.
+ * @data:		channel data excluding the timestamp.
+ * @data_sz:		size of data.
+ * @timestamp:		timestamp for the sample data.
+ *
+ * This special variant of iio_push_to_buffers_with_timestamp() does
+ * not require space for the timestamp, or 8 byte alignment of data.
+ * It does however require an allocation on first call and additional
+ * copies on all calls, so should be avoided if possible.
+ */
+int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev,
+					  const void *data,
+					  size_t data_sz,
+					  int64_t timestamp)
+{
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
+
+	/*
+	 * Conservative estimate - we can always safely copy the minimum
+	 * of either the data provided or the length of the destination buffer.
+	 * This relaxed limit allows the calling drivers to be lax about
+	 * tracking the size of the data they are pushing, at the cost of
+	 * unnecessary copying of padding.
+	 */
+	data_sz = min_t(size_t, indio_dev->scan_bytes, data_sz);
+	if (iio_dev_opaque->bounce_buffer_size !=  indio_dev->scan_bytes) {
+		void *bb;
+
+		bb = devm_krealloc(&indio_dev->dev,
+				   iio_dev_opaque->bounce_buffer,
+				   indio_dev->scan_bytes, GFP_KERNEL);
+		if (!bb)
+			return -ENOMEM;
+		iio_dev_opaque->bounce_buffer = bb;
+		iio_dev_opaque->bounce_buffer_size = indio_dev->scan_bytes;
+	}
+	memcpy(iio_dev_opaque->bounce_buffer, data, data_sz);
+	return iio_push_to_buffers_with_timestamp(indio_dev,
+						  iio_dev_opaque->bounce_buffer,
+						  timestamp);
+}
+EXPORT_SYMBOL_GPL(iio_push_to_buffers_with_ts_unaligned);
+
 /**
  * iio_buffer_release() - Free a buffer's resources
  * @ref: Pointer to the kref embedded in the iio_buffer struct
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index b6928ac5c63d..451379a3984a 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -38,6 +38,10 @@ static inline int iio_push_to_buffers_with_timestamp(struct iio_dev *indio_dev,
 	return iio_push_to_buffers(indio_dev, data);
 }
 
+int iio_push_to_buffers_with_ts_unaligned(struct iio_dev *indio_dev,
+					  const void *data, size_t data_sz,
+					  int64_t timestamp);
+
 bool iio_validate_scan_mask_onehot(struct iio_dev *indio_dev,
 				   const unsigned long *mask);
 
diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h
index c9504e9da571..2be12b7b5dc5 100644
--- a/include/linux/iio/iio-opaque.h
+++ b/include/linux/iio/iio-opaque.h
@@ -23,6 +23,8 @@
  * @groupcounter:		index of next attribute group
  * @legacy_scan_el_group:	attribute group for legacy scan elements attribute group
  * @legacy_buffer_group:	attribute group for legacy buffer attributes group
+ * @bounce_buffer:		for devices that call iio_push_to_buffers_with_timestamp_unaligned()
+ * @bounce_buffer_size:		size of currently allocate bounce buffer
  * @scan_index_timestamp:	cache of the index to the timestamp
  * @clock_id:			timestamping clock posix identifier
  * @chrdev:			associated character device
@@ -50,6 +52,8 @@ struct iio_dev_opaque {
 	int				groupcounter;
 	struct attribute_group		legacy_scan_el_group;
 	struct attribute_group		legacy_buffer_group;
+	void				*bounce_buffer;
+	size_t				bounce_buffer_size;
 
 	unsigned int			scan_index_timestamp;
 	clockid_t			clock_id;
-- 
cgit v1.2.3


From 9eeee3b0bf190b4f677af27e24ba0cd1c030e49b Mon Sep 17 00:00:00 2001
From: Mihail Chindris <mihail.chindris@analog.com>
Date: Thu, 7 Oct 2021 08:00:30 +0000
Subject: iio: Add output buffer support

Currently IIO only supports buffer mode for capture devices like ADCs. Add
support for buffered mode for output devices like DACs.

The output buffer implementation is analogous to the input buffer
implementation. Instead of using read() to get data from the buffer write()
is used to copy data into the buffer.

poll() with POLLOUT will wakeup if there is space available.

Drivers can remove data from a buffer using iio_pop_from_buffer(), the
function can e.g. called from a trigger handler to write the data to
hardware.

A buffer can only be either a output buffer or an input, but not both. So,
for a device that has an ADC and DAC path, this will mean 2 IIO buffers
(one for each direction).

The direction of the buffer is decided by the new direction field of the
iio_buffer struct and should be set after allocating and before registering
it.

Co-developed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Co-developed-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: Mihail Chindris <mihail.chindris@analog.com>
Link: https://lore.kernel.org/r/20211007080035.2531-2-mihail.chindris@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/iio_core.h            |   4 ++
 drivers/iio/industrialio-buffer.c | 127 +++++++++++++++++++++++++++++++++++++-
 drivers/iio/industrialio-core.c   |   1 +
 include/linux/iio/buffer.h        |   7 +++
 include/linux/iio/buffer_impl.h   |  11 ++++
 5 files changed, 148 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/iio_core.h b/drivers/iio/iio_core.h
index 8f4a9b264962..61e318431de9 100644
--- a/drivers/iio/iio_core.h
+++ b/drivers/iio/iio_core.h
@@ -68,12 +68,15 @@ __poll_t iio_buffer_poll_wrapper(struct file *filp,
 				 struct poll_table_struct *wait);
 ssize_t iio_buffer_read_wrapper(struct file *filp, char __user *buf,
 				size_t n, loff_t *f_ps);
+ssize_t iio_buffer_write_wrapper(struct file *filp, const char __user *buf,
+				 size_t n, loff_t *f_ps);
 
 int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev);
 void iio_buffers_free_sysfs_and_mask(struct iio_dev *indio_dev);
 
 #define iio_buffer_poll_addr (&iio_buffer_poll_wrapper)
 #define iio_buffer_read_outer_addr (&iio_buffer_read_wrapper)
+#define iio_buffer_write_outer_addr (&iio_buffer_write_wrapper)
 
 void iio_disable_all_buffers(struct iio_dev *indio_dev);
 void iio_buffer_wakeup_poll(struct iio_dev *indio_dev);
@@ -83,6 +86,7 @@ void iio_device_detach_buffers(struct iio_dev *indio_dev);
 
 #define iio_buffer_poll_addr NULL
 #define iio_buffer_read_outer_addr NULL
+#define iio_buffer_write_outer_addr NULL
 
 static inline int iio_buffers_alloc_sysfs_and_mask(struct iio_dev *indio_dev)
 {
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 4209e933ab80..b884d78657cb 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -120,6 +120,9 @@ static ssize_t iio_buffer_read(struct file *filp, char __user *buf,
 	if (!rb || !rb->access->read)
 		return -EINVAL;
 
+	if (rb->direction != IIO_BUFFER_DIRECTION_IN)
+		return -EPERM;
+
 	datum_size = rb->bytes_per_datum;
 
 	/*
@@ -161,6 +164,65 @@ static ssize_t iio_buffer_read(struct file *filp, char __user *buf,
 	return ret;
 }
 
+static size_t iio_buffer_space_available(struct iio_buffer *buf)
+{
+	if (buf->access->space_available)
+		return buf->access->space_available(buf);
+
+	return SIZE_MAX;
+}
+
+static ssize_t iio_buffer_write(struct file *filp, const char __user *buf,
+				size_t n, loff_t *f_ps)
+{
+	struct iio_dev_buffer_pair *ib = filp->private_data;
+	struct iio_buffer *rb = ib->buffer;
+	struct iio_dev *indio_dev = ib->indio_dev;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret;
+	size_t written;
+
+	if (!indio_dev->info)
+		return -ENODEV;
+
+	if (!rb || !rb->access->write)
+		return -EINVAL;
+
+	if (rb->direction != IIO_BUFFER_DIRECTION_OUT)
+		return -EPERM;
+
+	written = 0;
+	add_wait_queue(&rb->pollq, &wait);
+	do {
+		if (indio_dev->info == NULL)
+			return -ENODEV;
+
+		if (!iio_buffer_space_available(rb)) {
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+
+			wait_woken(&wait, TASK_INTERRUPTIBLE,
+					MAX_SCHEDULE_TIMEOUT);
+			continue;
+		}
+
+		ret = rb->access->write(rb, n - written, buf + written);
+		if (ret == 0 && (filp->f_flags & O_NONBLOCK))
+			ret = -EAGAIN;
+
+		if (ret > 0) {
+			written += ret;
+			if (written != n && !(filp->f_flags & O_NONBLOCK))
+				continue;
+		}
+	} while (ret == 0);
+	remove_wait_queue(&rb->pollq, &wait);
+
+	return ret < 0 ? ret : n;
+}
+
 /**
  * iio_buffer_poll() - poll the buffer to find out if it has data
  * @filp:	File structure pointer for device access
@@ -181,8 +243,18 @@ static __poll_t iio_buffer_poll(struct file *filp,
 		return 0;
 
 	poll_wait(filp, &rb->pollq, wait);
-	if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0))
-		return EPOLLIN | EPOLLRDNORM;
+
+	switch (rb->direction) {
+	case IIO_BUFFER_DIRECTION_IN:
+		if (iio_buffer_ready(indio_dev, rb, rb->watermark, 0))
+			return EPOLLIN | EPOLLRDNORM;
+		break;
+	case IIO_BUFFER_DIRECTION_OUT:
+		if (iio_buffer_space_available(rb))
+			return EPOLLOUT | EPOLLWRNORM;
+		break;
+	}
+
 	return 0;
 }
 
@@ -199,6 +271,19 @@ ssize_t iio_buffer_read_wrapper(struct file *filp, char __user *buf,
 	return iio_buffer_read(filp, buf, n, f_ps);
 }
 
+ssize_t iio_buffer_write_wrapper(struct file *filp, const char __user *buf,
+				 size_t n, loff_t *f_ps)
+{
+	struct iio_dev_buffer_pair *ib = filp->private_data;
+	struct iio_buffer *rb = ib->buffer;
+
+	/* check if buffer was opened through new API */
+	if (test_bit(IIO_BUSY_BIT_POS, &rb->flags))
+		return -EBUSY;
+
+	return iio_buffer_write(filp, buf, n, f_ps);
+}
+
 __poll_t iio_buffer_poll_wrapper(struct file *filp,
 				 struct poll_table_struct *wait)
 {
@@ -231,6 +316,15 @@ void iio_buffer_wakeup_poll(struct iio_dev *indio_dev)
 	}
 }
 
+int iio_pop_from_buffer(struct iio_buffer *buffer, void *data)
+{
+	if (!buffer || !buffer->access || !buffer->access->remove_from)
+		return -EINVAL;
+
+	return buffer->access->remove_from(buffer, data);
+}
+EXPORT_SYMBOL_GPL(iio_pop_from_buffer);
+
 void iio_buffer_init(struct iio_buffer *buffer)
 {
 	INIT_LIST_HEAD(&buffer->demux_list);
@@ -1156,6 +1250,10 @@ int iio_update_buffers(struct iio_dev *indio_dev,
 	if (insert_buffer == remove_buffer)
 		return 0;
 
+	if (insert_buffer &&
+	    (insert_buffer->direction == IIO_BUFFER_DIRECTION_OUT))
+		return -EINVAL;
+
 	mutex_lock(&iio_dev_opaque->info_exist_lock);
 	mutex_lock(&indio_dev->mlock);
 
@@ -1277,6 +1375,22 @@ static ssize_t iio_dma_show_data_available(struct device *dev,
 	return sysfs_emit(buf, "%zu\n", iio_buffer_data_available(buffer));
 }
 
+static ssize_t direction_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
+
+	switch (buffer->direction) {
+	case IIO_BUFFER_DIRECTION_IN:
+		return sprintf(buf, "in\n");
+	case IIO_BUFFER_DIRECTION_OUT:
+		return sprintf(buf, "out\n");
+	default:
+		return -EINVAL;
+	}
+}
+
 static DEVICE_ATTR(length, S_IRUGO | S_IWUSR, iio_buffer_read_length,
 		   iio_buffer_write_length);
 static struct device_attribute dev_attr_length_ro = __ATTR(length,
@@ -1289,12 +1403,20 @@ static struct device_attribute dev_attr_watermark_ro = __ATTR(watermark,
 	S_IRUGO, iio_buffer_show_watermark, NULL);
 static DEVICE_ATTR(data_available, S_IRUGO,
 		iio_dma_show_data_available, NULL);
+static DEVICE_ATTR_RO(direction);
 
+/*
+ * When adding new attributes here, put the at the end, at least until
+ * the code that handles the length/length_ro & watermark/watermark_ro
+ * assignments gets cleaned up. Otherwise these can create some weird
+ * duplicate attributes errors under some setups.
+ */
 static struct attribute *iio_buffer_attrs[] = {
 	&dev_attr_length.attr,
 	&dev_attr_enable.attr,
 	&dev_attr_watermark.attr,
 	&dev_attr_data_available.attr,
+	&dev_attr_direction.attr,
 };
 
 #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
@@ -1397,6 +1519,7 @@ static const struct file_operations iio_buffer_chrdev_fileops = {
 	.owner = THIS_MODULE,
 	.llseek = noop_llseek,
 	.read = iio_buffer_read,
+	.write = iio_buffer_write,
 	.poll = iio_buffer_poll,
 	.release = iio_buffer_chrdev_release,
 };
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 2dbb37e09b8c..537a08549a69 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1822,6 +1822,7 @@ static const struct file_operations iio_buffer_fileops = {
 	.owner = THIS_MODULE,
 	.llseek = noop_llseek,
 	.read = iio_buffer_read_outer_addr,
+	.write = iio_buffer_write_outer_addr,
 	.poll = iio_buffer_poll_addr,
 	.unlocked_ioctl = iio_ioctl,
 	.compat_ioctl = compat_ptr_ioctl,
diff --git a/include/linux/iio/buffer.h b/include/linux/iio/buffer.h
index 451379a3984a..418b1307d3f2 100644
--- a/include/linux/iio/buffer.h
+++ b/include/linux/iio/buffer.h
@@ -11,8 +11,15 @@
 
 struct iio_buffer;
 
+enum iio_buffer_direction {
+	IIO_BUFFER_DIRECTION_IN,
+	IIO_BUFFER_DIRECTION_OUT,
+};
+
 int iio_push_to_buffers(struct iio_dev *indio_dev, const void *data);
 
+int iio_pop_from_buffer(struct iio_buffer *buffer, void *data);
+
 /**
  * iio_push_to_buffers_with_timestamp() - push data and timestamp to buffers
  * @indio_dev:		iio_dev structure for device.
diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
index 245b32918ae1..e2ca8ea23e19 100644
--- a/include/linux/iio/buffer_impl.h
+++ b/include/linux/iio/buffer_impl.h
@@ -7,6 +7,7 @@
 #ifdef CONFIG_IIO_BUFFER
 
 #include <uapi/linux/iio/buffer.h>
+#include <linux/iio/buffer.h>
 
 struct iio_dev;
 struct iio_buffer;
@@ -23,6 +24,10 @@ struct iio_buffer;
  * @read:		try to get a specified number of bytes (must exist)
  * @data_available:	indicates how much data is available for reading from
  *			the buffer.
+ * @remove_from:	remove scan from buffer. Drivers should calls this to
+ *			remove a scan from a buffer.
+ * @write:		try to write a number of bytes
+ * @space_available:	returns the amount of bytes available in a buffer
  * @request_update:	if a parameter change has been marked, update underlying
  *			storage.
  * @set_bytes_per_datum:set number of bytes per datum
@@ -49,6 +54,9 @@ struct iio_buffer_access_funcs {
 	int (*store_to)(struct iio_buffer *buffer, const void *data);
 	int (*read)(struct iio_buffer *buffer, size_t n, char __user *buf);
 	size_t (*data_available)(struct iio_buffer *buffer);
+	int (*remove_from)(struct iio_buffer *buffer, void *data);
+	int (*write)(struct iio_buffer *buffer, size_t n, const char __user *buf);
+	size_t (*space_available)(struct iio_buffer *buffer);
 
 	int (*request_update)(struct iio_buffer *buffer);
 
@@ -80,6 +88,9 @@ struct iio_buffer {
 	/**  @bytes_per_datum: Size of individual datum including timestamp. */
 	size_t bytes_per_datum;
 
+	/* @direction: Direction of the data stream (in/out). */
+	enum iio_buffer_direction direction;
+
 	/**
 	 * @access: Buffer access functions associated with the
 	 * implementation.
-- 
cgit v1.2.3


From c02cd5c19c17698f12b731e898127095f9bc2921 Mon Sep 17 00:00:00 2001
From: Alexandru Ardelean <alexandru.ardelean@analog.com>
Date: Thu, 7 Oct 2021 08:00:32 +0000
Subject: iio: triggered-buffer: extend support to configure output buffers

Now that output (kfifo) buffers are supported, we need to extend the
{devm_}iio_triggered_buffer_setup_ext() parameter list to take a direction
parameter.

This allows us to attach an output triggered buffer to a DAC device.
Unfortunately it's a bit difficult to add another macro to avoid changing 5
drivers where {devm_}iio_triggered_buffer_setup_ext() is used.
Well, it's doable, but may not be worth the trouble vs just updating all
these 5 drivers.

Signed-off-by: Alexandru Ardelean <alexandru.ardelean@analog.com>
Signed-off-by: Mihail Chindris <mihail.chindris@analog.com>
Link: https://lore.kernel.org/r/20211007080035.2531-4-mihail.chindris@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl372.c                         |  1 +
 drivers/iio/accel/bmc150-accel-core.c               |  1 +
 drivers/iio/adc/at91-sama5d2_adc.c                  |  4 ++--
 drivers/iio/buffer/industrialio-triggered-buffer.c  |  8 ++++++--
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c |  5 +++--
 include/linux/iio/triggered_buffer.h                | 11 +++++++++--
 6 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index fc9592407717..758952584f8c 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -1214,6 +1214,7 @@ int adxl372_probe(struct device *dev, struct regmap *regmap,
 	ret = devm_iio_triggered_buffer_setup_ext(dev,
 						  indio_dev, NULL,
 						  adxl372_trigger_handler,
+						  IIO_BUFFER_DIRECTION_IN,
 						  &adxl372_buffer_ops,
 						  adxl372_fifo_attributes);
 	if (ret < 0)
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index e8693a42ad46..63216321cdb5 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -1734,6 +1734,7 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
 	ret = iio_triggered_buffer_setup_ext(indio_dev,
 					     &iio_pollfunc_store_time,
 					     bmc150_accel_trigger_handler,
+					     IIO_BUFFER_DIRECTION_IN,
 					     &bmc150_accel_buffer_ops,
 					     fifo_attrs);
 	if (ret < 0) {
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index dabe8cdcfd08..4c922ef634f8 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -1894,8 +1894,8 @@ static int at91_adc_buffer_and_trigger_init(struct device *dev,
 		fifo_attrs = NULL;
 
 	ret = devm_iio_triggered_buffer_setup_ext(&indio->dev, indio,
-		&iio_pollfunc_store_time,
-		&at91_adc_trigger_handler, &at91_buffer_setup_ops, fifo_attrs);
+		&iio_pollfunc_store_time, &at91_adc_trigger_handler,
+		IIO_BUFFER_DIRECTION_IN, &at91_buffer_setup_ops, fifo_attrs);
 	if (ret < 0) {
 		dev_err(dev, "couldn't initialize the buffer.\n");
 		return ret;
diff --git a/drivers/iio/buffer/industrialio-triggered-buffer.c b/drivers/iio/buffer/industrialio-triggered-buffer.c
index f77c4538141e..8d4fc97d1005 100644
--- a/drivers/iio/buffer/industrialio-triggered-buffer.c
+++ b/drivers/iio/buffer/industrialio-triggered-buffer.c
@@ -19,6 +19,7 @@
  * @indio_dev:		IIO device structure
  * @h:			Function which will be used as pollfunc top half
  * @thread:		Function which will be used as pollfunc bottom half
+ * @direction:		Direction of the data stream (in/out).
  * @setup_ops:		Buffer setup functions to use for this device.
  *			If NULL the default setup functions for triggered
  *			buffers will be used.
@@ -38,6 +39,7 @@
 int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
 	irqreturn_t (*h)(int irq, void *p),
 	irqreturn_t (*thread)(int irq, void *p),
+	enum iio_buffer_direction direction,
 	const struct iio_buffer_setup_ops *setup_ops,
 	const struct attribute **buffer_attrs)
 {
@@ -68,6 +70,7 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
 	/* Flag that polled ring buffering is possible */
 	indio_dev->modes |= INDIO_BUFFER_TRIGGERED;
 
+	buffer->direction = direction;
 	buffer->attrs = buffer_attrs;
 
 	ret = iio_device_attach_buffer(indio_dev, buffer);
@@ -105,13 +108,14 @@ int devm_iio_triggered_buffer_setup_ext(struct device *dev,
 					struct iio_dev *indio_dev,
 					irqreturn_t (*h)(int irq, void *p),
 					irqreturn_t (*thread)(int irq, void *p),
+					enum iio_buffer_direction direction,
 					const struct iio_buffer_setup_ops *ops,
 					const struct attribute **buffer_attrs)
 {
 	int ret;
 
-	ret = iio_triggered_buffer_setup_ext(indio_dev, h, thread, ops,
-					     buffer_attrs);
+	ret = iio_triggered_buffer_setup_ext(indio_dev, h, thread, direction,
+					     ops, buffer_attrs);
 	if (ret)
 		return ret;
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index a4ec11a3b68a..1151434038d4 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -241,8 +241,9 @@ int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 		fifo_attrs = NULL;
 
 	ret = iio_triggered_buffer_setup_ext(indio_dev,
-					     &iio_pollfunc_store_time,
-					     NULL, NULL, fifo_attrs);
+					     &iio_pollfunc_store_time, NULL,
+					     IIO_BUFFER_DIRECTION_IN,
+					     NULL, fifo_attrs);
 	if (ret) {
 		dev_err(&indio_dev->dev, "Triggered Buffer Setup Failed\n");
 		return ret;
diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h
index 7f154d1f8739..7490b05fc5b2 100644
--- a/include/linux/iio/triggered_buffer.h
+++ b/include/linux/iio/triggered_buffer.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_IIO_TRIGGERED_BUFFER_H_
 #define _LINUX_IIO_TRIGGERED_BUFFER_H_
 
+#include <linux/iio/buffer.h>
 #include <linux/interrupt.h>
 
 struct attribute;
@@ -11,21 +12,27 @@ struct iio_buffer_setup_ops;
 int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
 	irqreturn_t (*h)(int irq, void *p),
 	irqreturn_t (*thread)(int irq, void *p),
+	enum iio_buffer_direction direction,
 	const struct iio_buffer_setup_ops *setup_ops,
 	const struct attribute **buffer_attrs);
 void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev);
 
 #define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops)		\
-	iio_triggered_buffer_setup_ext((indio_dev), (h), (thread), (setup_ops), NULL)
+	iio_triggered_buffer_setup_ext((indio_dev), (h), (thread),		\
+					IIO_BUFFER_DIRECTION_IN, (setup_ops),	\
+					NULL)
 
 int devm_iio_triggered_buffer_setup_ext(struct device *dev,
 					struct iio_dev *indio_dev,
 					irqreturn_t (*h)(int irq, void *p),
 					irqreturn_t (*thread)(int irq, void *p),
+					enum iio_buffer_direction direction,
 					const struct iio_buffer_setup_ops *ops,
 					const struct attribute **buffer_attrs);
 
 #define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops)	\
-	devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread), (setup_ops), NULL)
+	devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread),	\
+					    IIO_BUFFER_DIRECTION_IN,		\
+					    (setup_ops), NULL)
 
 #endif
-- 
cgit v1.2.3


From aafa1cafedca7c64d6a43cd21488d28d1d1be884 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Sat, 2 Oct 2021 12:32:30 +0300
Subject: platform_data/mlxreg: Add new type to support modular systems

Add new types for the Nvidia modular systems MSN4800 which could
be equipped with the different types of replaceable line cards.

Add new type to specify the kind of hotplug events for the line cards.
The line card events are generated by the programmable device located
on the main board. This device implements interrupt controller logic.
Line card interrupts are associated with different line cards states
during its initialization: insertion, security signature validation,
power good state, security validation, hardware-firmware
synchronization state, line card PHYs readiness state, firmware
availability for line card ports. Also under some circumstances
hardware can generate thermal shutdown for particular line card.

Add new type specifying the action, which should be performed when
particular hotplug event is received. This action defines in which way
hotplug event should be handled by hotplug driver. There are the next
actions types:
- Connect I2C device with empty 'platform_data' field according to the
  platform topology, if device is configured (for example, power unit
  micro-controller driver, when power unit is connected to power source
  (this is what is currently supported).
- Connect device with 'platform_data' field set according to the
  platform topology. The purpose is to pass 'platform_data' through
  hotplug driver to underlying device (for example line card driver).
- No device is associated with hotplug event - just send "udev" event
 (this is what is currently supported).

Extend structure 'mlxreg_hotplug_device' with hotplug action field.

Extend structure 'mlxreg_core_data' with:
- Registers for line card power and enabling control.
- Slot number field, to indicate at which physical slot replaceable
  line card device is located.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Michael Shych <michaelsh@nvidia.com>
Link: https://lore.kernel.org/r/20211002093238.3771419-2-vadimp@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/mlxreg.h | 57 ++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 101333fe2b8d..49f0e15a10dd 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -24,6 +24,51 @@ enum mlxreg_wdt_type {
 	MLX_WDT_TYPE3,
 };
 
+/**
+ * enum mlxreg_hotplug_kind - kind of hotplug entry
+ *
+ * @MLXREG_HOTPLUG_DEVICE_NA: do not care;
+ * @MLXREG_HOTPLUG_LC_PRESENT: entry for line card presence in/out events;
+ * @MLXREG_HOTPLUG_LC_VERIFIED: entry for line card verification status events
+ *				coming after line card security signature validation;
+ * @MLXREG_HOTPLUG_LC_POWERED: entry for line card power on/off events;
+ * @MLXREG_HOTPLUG_LC_SYNCED: entry for line card synchronization events, coming
+ *			      after hardware-firmware synchronization handshake;
+ * @MLXREG_HOTPLUG_LC_READY: entry for line card ready events, indicating line card
+			     PHYs ready / unready state;
+ * @MLXREG_HOTPLUG_LC_ACTIVE: entry for line card active events, indicating firmware
+ *			      availability / unavailability for the ports on line card;
+ * @MLXREG_HOTPLUG_LC_THERMAL: entry for line card thermal shutdown events, positive
+ *			       event indicates that system should power off the line
+ *			       card for which this event has been received;
+ */
+enum mlxreg_hotplug_kind {
+	MLXREG_HOTPLUG_DEVICE_NA = 0,
+	MLXREG_HOTPLUG_LC_PRESENT = 1,
+	MLXREG_HOTPLUG_LC_VERIFIED = 2,
+	MLXREG_HOTPLUG_LC_POWERED = 3,
+	MLXREG_HOTPLUG_LC_SYNCED = 4,
+	MLXREG_HOTPLUG_LC_READY = 5,
+	MLXREG_HOTPLUG_LC_ACTIVE = 6,
+	MLXREG_HOTPLUG_LC_THERMAL = 7,
+};
+
+/**
+ * enum mlxreg_hotplug_device_action - hotplug device action required for
+ *				       driver's connectivity
+ *
+ * @MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION: probe device for 'on' event, remove
+ *					  for 'off' event;
+ * @MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION: probe platform device for 'on'
+ *					   event, remove for 'off' event;
+ * @MLXREG_HOTPLUG_DEVICE_NO_ACTION: no connectivity action is required;
+ */
+enum mlxreg_hotplug_device_action {
+	MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION = 0,
+	MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION = 1,
+	MLXREG_HOTPLUG_DEVICE_NO_ACTION = 2,
+};
+
 /**
  * struct mlxreg_hotplug_device - I2C device data:
  *
@@ -31,6 +76,7 @@ enum mlxreg_wdt_type {
  * @client: I2C device client;
  * @brdinfo: device board information;
  * @nr: I2C device adapter number, to which device is to be attached;
+ * @action: action to be performed upon event receiving;
  *
  * Structure represents I2C hotplug device static data (board topology) and
  * dynamic data (related kernel objects handles).
@@ -40,6 +86,7 @@ struct mlxreg_hotplug_device {
 	struct i2c_client *client;
 	struct i2c_board_info *brdinfo;
 	int nr;
+	enum mlxreg_hotplug_device_action action;
 };
 
 /**
@@ -51,12 +98,16 @@ struct mlxreg_hotplug_device {
  * @bit: attribute effective bit;
  * @capability: attribute capability register;
  * @reg_prsnt: attribute presence register;
+ * @reg_sync: attribute synch register;
+ * @reg_pwr: attribute power register;
+ * @reg_ena: attribute enable register;
  * @mode: access mode;
  * @np - pointer to node platform associated with attribute;
  * @hpdev - hotplug device data;
  * @health_cntr: dynamic device health indication counter;
  * @attached: true if device has been attached after good health indication;
  * @regnum: number of registers occupied by multi-register attribute;
+ * @slot: slot number, at which device is located;
  */
 struct mlxreg_core_data {
 	char label[MLXREG_CORE_LABEL_MAX_SIZE];
@@ -65,18 +116,23 @@ struct mlxreg_core_data {
 	u32 bit;
 	u32 capability;
 	u32 reg_prsnt;
+	u32 reg_sync;
+	u32 reg_pwr;
+	u32 reg_ena;
 	umode_t	mode;
 	struct device_node *np;
 	struct mlxreg_hotplug_device hpdev;
 	u32 health_cntr;
 	bool attached;
 	u8 regnum;
+	u8 slot;
 };
 
 /**
  * struct mlxreg_core_item - same type components controlled by the driver:
  *
  * @data: component data;
+ * @kind: kind of hotplug attribute;
  * @aggr_mask: group aggregation mask;
  * @reg: group interrupt status register;
  * @mask: group interrupt mask;
@@ -89,6 +145,7 @@ struct mlxreg_core_data {
  */
 struct mlxreg_core_item {
 	struct mlxreg_core_data *data;
+	enum mlxreg_hotplug_kind kind;
 	u32 aggr_mask;
 	u32 reg;
 	u32 mask;
-- 
cgit v1.2.3


From bb1023b6da379985ff888dcd7bc601735d02267a Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Sat, 2 Oct 2021 12:32:32 +0300
Subject: platform/mellanox: mlxreg-hotplug: Extend logic for hotplug devices
 operations

Extend the structure 'mlxreg_hotplug_device" with platform device field
to allow transition of the register map and system interrupt line number
to underlying hotplug devices, sharing the same register map and
same interrupt line with 'mlxreg-hotplug' driver.

Extend logic for hotplug devices creation and removing according to
the action associated with the hotplug device description. Previously
hotplug driver was capable to attach / de-attach upon hotplug events
only I2C devices handled by simple I2C drivers. Now it should be able
to attach also devices handled by the platform drivers.

The motivation is to allow transition of platform data like:
- system interrupt line number, sharing with 'mlxreg-hotplug' to
  underlying hotplug devices.
- shared register map of programmable devices on main board to
  underlying hotplug devices.

Additioanlly the number of 'sysfs' attributes is increased, since
modular system defines more 'sysfs' attributes.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Michael Shych <michaelsh@nvidia.com>
Link: https://lore.kernel.org/r/20211002093238.3771419-4-vadimp@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/mellanox/mlxreg-hotplug.c | 123 +++++++++++++++++++++--------
 include/linux/platform_data/mlxreg.h       |  24 ++++++
 2 files changed, 112 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
index b013445147dd..117bc3f395fd 100644
--- a/drivers/platform/mellanox/mlxreg-hotplug.c
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -28,7 +28,7 @@
 /* ASIC good health mask. */
 #define MLXREG_HOTPLUG_GOOD_HEALTH_MASK	0x02
 
-#define MLXREG_HOTPLUG_ATTRS_MAX	24
+#define MLXREG_HOTPLUG_ATTRS_MAX	128
 #define MLXREG_HOTPLUG_NOT_ASSERT	3
 
 /**
@@ -89,9 +89,20 @@ mlxreg_hotplug_udev_event_send(struct kobject *kobj,
 	return kobject_uevent_env(kobj, KOBJ_CHANGE, mlxreg_hotplug_udev_envp);
 }
 
+static void
+mlxreg_hotplug_pdata_export(void *pdata, void *regmap)
+{
+	struct mlxreg_core_hotplug_platform_data *dev_pdata = pdata;
+
+	/* Export regmap to underlying device. */
+	dev_pdata->regmap = regmap;
+}
+
 static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
-					struct mlxreg_core_data *data)
+					struct mlxreg_core_data *data,
+					enum mlxreg_hotplug_kind kind)
 {
+	struct i2c_board_info *brdinfo = data->hpdev.brdinfo;
 	struct mlxreg_core_hotplug_platform_data *pdata;
 	struct i2c_client *client;
 
@@ -106,46 +117,88 @@ static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
 		return 0;
 
 	pdata = dev_get_platdata(&priv->pdev->dev);
-	data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
-					      pdata->shift_nr);
-	if (!data->hpdev.adapter) {
-		dev_err(priv->dev, "Failed to get adapter for bus %d\n",
-			data->hpdev.nr + pdata->shift_nr);
-		return -EFAULT;
-	}
+	switch (data->hpdev.action) {
+	case MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION:
+		data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
+						      pdata->shift_nr);
+		if (!data->hpdev.adapter) {
+			dev_err(priv->dev, "Failed to get adapter for bus %d\n",
+				data->hpdev.nr + pdata->shift_nr);
+			return -EFAULT;
+		}
 
-	client = i2c_new_client_device(data->hpdev.adapter,
-				       data->hpdev.brdinfo);
-	if (IS_ERR(client)) {
-		dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
-			data->hpdev.brdinfo->type, data->hpdev.nr +
-			pdata->shift_nr, data->hpdev.brdinfo->addr);
+		/* Export platform data to underlying device. */
+		if (brdinfo->platform_data)
+			mlxreg_hotplug_pdata_export(brdinfo->platform_data, pdata->regmap);
 
-		i2c_put_adapter(data->hpdev.adapter);
-		data->hpdev.adapter = NULL;
-		return PTR_ERR(client);
+		client = i2c_new_client_device(data->hpdev.adapter,
+					       brdinfo);
+		if (IS_ERR(client)) {
+			dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+				brdinfo->type, data->hpdev.nr +
+				pdata->shift_nr, brdinfo->addr);
+
+			i2c_put_adapter(data->hpdev.adapter);
+			data->hpdev.adapter = NULL;
+			return PTR_ERR(client);
+		}
+
+		data->hpdev.client = client;
+		break;
+	case MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION:
+		/* Export platform data to underlying device. */
+		if (data->hpdev.brdinfo && data->hpdev.brdinfo->platform_data)
+			mlxreg_hotplug_pdata_export(data->hpdev.brdinfo->platform_data,
+						    pdata->regmap);
+		/* Pass parent hotplug device handle to underlying device. */
+		data->notifier = data->hpdev.notifier;
+		data->hpdev.pdev = platform_device_register_resndata(&priv->pdev->dev,
+								     brdinfo->type,
+								     data->hpdev.nr,
+								     NULL, 0, data,
+								     sizeof(*data));
+		if (IS_ERR(data->hpdev.pdev))
+			return PTR_ERR(data->hpdev.pdev);
+
+		break;
+	default:
+		break;
 	}
 
-	data->hpdev.client = client;
+	if (data->hpdev.notifier && data->hpdev.notifier->user_handler)
+		return data->hpdev.notifier->user_handler(data->hpdev.notifier->handle, kind, 1);
 
 	return 0;
 }
 
 static void
 mlxreg_hotplug_device_destroy(struct mlxreg_hotplug_priv_data *priv,
-			      struct mlxreg_core_data *data)
+			      struct mlxreg_core_data *data,
+			      enum mlxreg_hotplug_kind kind)
 {
 	/* Notify user by sending hwmon uevent. */
 	mlxreg_hotplug_udev_event_send(&priv->hwmon->kobj, data, false);
+	if (data->hpdev.notifier && data->hpdev.notifier->user_handler)
+		data->hpdev.notifier->user_handler(data->hpdev.notifier->handle, kind, 0);
+
+	switch (data->hpdev.action) {
+	case MLXREG_HOTPLUG_DEVICE_DEFAULT_ACTION:
+		if (data->hpdev.client) {
+			i2c_unregister_device(data->hpdev.client);
+			data->hpdev.client = NULL;
+		}
 
-	if (data->hpdev.client) {
-		i2c_unregister_device(data->hpdev.client);
-		data->hpdev.client = NULL;
-	}
-
-	if (data->hpdev.adapter) {
-		i2c_put_adapter(data->hpdev.adapter);
-		data->hpdev.adapter = NULL;
+		if (data->hpdev.adapter) {
+			i2c_put_adapter(data->hpdev.adapter);
+			data->hpdev.adapter = NULL;
+		}
+		break;
+	case MLXREG_HOTPLUG_DEVICE_PLATFORM_ACTION:
+		if (data->hpdev.pdev)
+			platform_device_unregister(data->hpdev.pdev);
+		break;
+	default:
+		break;
 	}
 }
 
@@ -317,14 +370,14 @@ mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv,
 		data = item->data + bit;
 		if (regval & BIT(bit)) {
 			if (item->inversed)
-				mlxreg_hotplug_device_destroy(priv, data);
+				mlxreg_hotplug_device_destroy(priv, data, item->kind);
 			else
-				mlxreg_hotplug_device_create(priv, data);
+				mlxreg_hotplug_device_create(priv, data, item->kind);
 		} else {
 			if (item->inversed)
-				mlxreg_hotplug_device_create(priv, data);
+				mlxreg_hotplug_device_create(priv, data, item->kind);
 			else
-				mlxreg_hotplug_device_destroy(priv, data);
+				mlxreg_hotplug_device_destroy(priv, data, item->kind);
 		}
 	}
 
@@ -381,7 +434,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
 				 * ASIC is in steady state. Connect associated
 				 * device, if configured.
 				 */
-				mlxreg_hotplug_device_create(priv, data);
+				mlxreg_hotplug_device_create(priv, data, item->kind);
 				data->attached = true;
 			}
 		} else {
@@ -391,7 +444,7 @@ mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
 				 * in steady state. Disconnect associated
 				 * device, if it has been connected.
 				 */
-				mlxreg_hotplug_device_destroy(priv, data);
+				mlxreg_hotplug_device_destroy(priv, data, item->kind);
 				data->attached = false;
 				data->health_cntr = 0;
 			}
@@ -630,7 +683,7 @@ static void mlxreg_hotplug_unset_irq(struct mlxreg_hotplug_priv_data *priv)
 		/* Remove all the attached devices in group. */
 		count = item->count;
 		for (j = 0; j < count; j++, data++)
-			mlxreg_hotplug_device_destroy(priv, data);
+			mlxreg_hotplug_device_destroy(priv, data, item->kind);
 	}
 }
 
diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 49f0e15a10dd..3122d550dc00 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -69,6 +69,19 @@ enum mlxreg_hotplug_device_action {
 	MLXREG_HOTPLUG_DEVICE_NO_ACTION = 2,
 };
 
+/**
+ * struct mlxreg_core_hotplug_notifier - hotplug notifier block:
+ *
+ * @identity: notifier identity name;
+ * @handle: user handle to be passed by user handler function;
+ * @user_handler: user handler function associated with the event;
+ */
+struct mlxreg_core_hotplug_notifier {
+	char identity[MLXREG_CORE_LABEL_MAX_SIZE];
+	void *handle;
+	int (*user_handler)(void *handle, enum mlxreg_hotplug_kind kind, u8 action);
+};
+
 /**
  * struct mlxreg_hotplug_device - I2C device data:
  *
@@ -76,7 +89,11 @@ enum mlxreg_hotplug_device_action {
  * @client: I2C device client;
  * @brdinfo: device board information;
  * @nr: I2C device adapter number, to which device is to be attached;
+ * @pdev: platform device, if device is instantiated as a platform device;
  * @action: action to be performed upon event receiving;
+ * @handle: user handle to be passed by user handler function;
+ * @user_handler: user handler function associated with the event;
+ * @notifier: pointer to event notifier block;
  *
  * Structure represents I2C hotplug device static data (board topology) and
  * dynamic data (related kernel objects handles).
@@ -86,7 +103,11 @@ struct mlxreg_hotplug_device {
 	struct i2c_client *client;
 	struct i2c_board_info *brdinfo;
 	int nr;
+	struct platform_device *pdev;
 	enum mlxreg_hotplug_device_action action;
+	void *handle;
+	int (*user_handler)(void *handle, enum mlxreg_hotplug_kind kind, u8 action);
+	struct mlxreg_core_hotplug_notifier *notifier;
 };
 
 /**
@@ -104,10 +125,12 @@ struct mlxreg_hotplug_device {
  * @mode: access mode;
  * @np - pointer to node platform associated with attribute;
  * @hpdev - hotplug device data;
+ * @notifier: pointer to event notifier block;
  * @health_cntr: dynamic device health indication counter;
  * @attached: true if device has been attached after good health indication;
  * @regnum: number of registers occupied by multi-register attribute;
  * @slot: slot number, at which device is located;
+ * @secured: if set indicates that entry access is secured;
  */
 struct mlxreg_core_data {
 	char label[MLXREG_CORE_LABEL_MAX_SIZE];
@@ -122,6 +145,7 @@ struct mlxreg_core_data {
 	umode_t	mode;
 	struct device_node *np;
 	struct mlxreg_hotplug_device hpdev;
+	struct mlxreg_core_hotplug_notifier *notifier;
 	u32 health_cntr;
 	bool attached;
 	u8 regnum;
-- 
cgit v1.2.3


From 9d93d7877c9158d059028681b66a0c192f85c64d Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Sat, 2 Oct 2021 12:32:35 +0300
Subject: platform_data/mlxreg: Add new field for secured access

Extend structure 'mlxreg_core_data' with the field "secured". The
purpose of this field is to restrict access to some attributes, if
kernel is configured with security options, like:
LOCK_DOWN_KERNEL_FORCE_CONFIDENTIALITY.
Access to some attributes, which for example, allow burning of some
hardware components, like FPGA, CPLD, SPI, etcetera can break the
system. In case user does not want to allow such access, it can disable
it by setting security options.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Michael Shych <michaelsh@nvidia.com>
Link: https://lore.kernel.org/r/20211002093238.3771419-7-vadimp@nvidia.com
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/platform_data/mlxreg.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/mlxreg.h b/include/linux/platform_data/mlxreg.h
index 3122d550dc00..40185f9d7c14 100644
--- a/include/linux/platform_data/mlxreg.h
+++ b/include/linux/platform_data/mlxreg.h
@@ -150,6 +150,7 @@ struct mlxreg_core_data {
 	bool attached;
 	u8 regnum;
 	u8 slot;
+	u8 secured;
 };
 
 /**
-- 
cgit v1.2.3


From 54f5b3615f199673ecb23f0a6847fd9c43aaa012 Mon Sep 17 00:00:00 2001
From: Wang Kefeng <wangkefeng.wang@huawei.com>
Date: Mon, 23 Aug 2021 10:41:41 +0100
Subject: ARM: 9121/1: amba: Drop unused functions about APB/AHB devices add

No one use the following functions, kill them.

  amba_aphb_device_add()
  amba_apb_device_add()
  amba_apb_device_add_res()
  amba_ahb_device_add()
  amba_ahb_device_add_res()

Cc: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
---
 drivers/amba/bus.c       | 72 ------------------------------------------------
 include/linux/amba/bus.h | 18 ------------
 2 files changed, 90 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 962041148482..2f2137518be0 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -579,78 +579,6 @@ int amba_device_add(struct amba_device *dev, struct resource *parent)
 }
 EXPORT_SYMBOL_GPL(amba_device_add);
 
-static struct amba_device *
-amba_aphb_device_add(struct device *parent, const char *name,
-		     resource_size_t base, size_t size, int irq1, int irq2,
-		     void *pdata, unsigned int periphid, u64 dma_mask,
-		     struct resource *resbase)
-{
-	struct amba_device *dev;
-	int ret;
-
-	dev = amba_device_alloc(name, base, size);
-	if (!dev)
-		return ERR_PTR(-ENOMEM);
-
-	dev->dev.coherent_dma_mask = dma_mask;
-	dev->irq[0] = irq1;
-	dev->irq[1] = irq2;
-	dev->periphid = periphid;
-	dev->dev.platform_data = pdata;
-	dev->dev.parent = parent;
-
-	ret = amba_device_add(dev, resbase);
-	if (ret) {
-		amba_device_put(dev);
-		return ERR_PTR(ret);
-	}
-
-	return dev;
-}
-
-struct amba_device *
-amba_apb_device_add(struct device *parent, const char *name,
-		    resource_size_t base, size_t size, int irq1, int irq2,
-		    void *pdata, unsigned int periphid)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, 0, &iomem_resource);
-}
-EXPORT_SYMBOL_GPL(amba_apb_device_add);
-
-struct amba_device *
-amba_ahb_device_add(struct device *parent, const char *name,
-		    resource_size_t base, size_t size, int irq1, int irq2,
-		    void *pdata, unsigned int periphid)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, ~0ULL, &iomem_resource);
-}
-EXPORT_SYMBOL_GPL(amba_ahb_device_add);
-
-struct amba_device *
-amba_apb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, 0, resbase);
-}
-EXPORT_SYMBOL_GPL(amba_apb_device_add_res);
-
-struct amba_device *
-amba_ahb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase)
-{
-	return amba_aphb_device_add(parent, name, base, size, irq1, irq2, pdata,
-				    periphid, ~0ULL, resbase);
-}
-EXPORT_SYMBOL_GPL(amba_ahb_device_add_res);
-
-
 static void amba_device_initialize(struct amba_device *dev, const char *name)
 {
 	device_initialize(&dev->dev);
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c68d87b87283..edfcf7a14dcd 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -122,24 +122,6 @@ struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t);
 void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
 int amba_device_register(struct amba_device *, struct resource *);
-struct amba_device *amba_apb_device_add(struct device *parent, const char *name,
-					resource_size_t base, size_t size,
-					int irq1, int irq2, void *pdata,
-					unsigned int periphid);
-struct amba_device *amba_ahb_device_add(struct device *parent, const char *name,
-					resource_size_t base, size_t size,
-					int irq1, int irq2, void *pdata,
-					unsigned int periphid);
-struct amba_device *
-amba_apb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase);
-struct amba_device *
-amba_ahb_device_add_res(struct device *parent, const char *name,
-			resource_size_t base, size_t size, int irq1,
-			int irq2, void *pdata, unsigned int periphid,
-			struct resource *resbase);
 void amba_device_unregister(struct amba_device *);
 struct amba_device *amba_find_device(const char *, struct device *, unsigned int, unsigned int);
 int amba_request_regions(struct amba_device *, const char *);
-- 
cgit v1.2.3


From f5245a5fdf757f50a6c905fc16cceb1a6146ccf5 Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Sun, 17 Oct 2021 13:55:21 -0500
Subject: counter: drop chrdev_lock

This removes the chrdev_lock from the counter subsystem. This was
intended to prevent opening the chrdev more than once. However, this
doesn't work in practice since userspace can duplicate file descriptors
and pass file descriptors to other processes. Since this protection
can't be relied on, it is best to just remove it.

Suggested-by: Greg KH <gregkh@linuxfoundation.org>
Acked-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Signed-off-by: David Lechner <david@lechnology.com>
Link: https://lore.kernel.org/r/20211017185521.3468640-1-david@lechnology.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/counter/counter-chrdev.c |  6 ------
 drivers/counter/counter-sysfs.c  | 13 +++----------
 include/linux/counter.h          |  7 -------
 3 files changed, 3 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c
index 967c94ae95bb..b747dc81cfc6 100644
--- a/drivers/counter/counter-chrdev.c
+++ b/drivers/counter/counter-chrdev.c
@@ -384,10 +384,6 @@ static int counter_chrdev_open(struct inode *inode, struct file *filp)
 							    typeof(*counter),
 							    chrdev);
 
-	/* Ensure chrdev is not opened more than 1 at a time */
-	if (!atomic_add_unless(&counter->chrdev_lock, 1, 1))
-		return -EBUSY;
-
 	get_device(&counter->dev);
 	filp->private_data = counter;
 
@@ -419,7 +415,6 @@ out_unlock:
 	mutex_unlock(&counter->ops_exist_lock);
 
 	put_device(&counter->dev);
-	atomic_dec(&counter->chrdev_lock);
 
 	return ret;
 }
@@ -445,7 +440,6 @@ int counter_chrdev_add(struct counter_device *const counter)
 	mutex_init(&counter->events_lock);
 
 	/* Initialize character device */
-	atomic_set(&counter->chrdev_lock, 0);
 	cdev_init(&counter->chrdev, &counter_fops);
 
 	/* Allocate Counter events queue */
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index c2fddbb0d442..8c2d7c29ea59 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -796,25 +796,18 @@ static int counter_events_queue_size_write(struct counter_device *counter,
 					   u64 val)
 {
 	DECLARE_KFIFO_PTR(events, struct counter_event);
-	int err = 0;
-
-	/* Ensure chrdev is not opened more than 1 at a time */
-	if (!atomic_add_unless(&counter->chrdev_lock, 1, 1))
-		return -EBUSY;
+	int err;
 
 	/* Allocate new events queue */
 	err = kfifo_alloc(&events, val, GFP_KERNEL);
 	if (err)
-		goto exit_early;
+		return err;
 
 	/* Swap in new events queue */
 	kfifo_free(&counter->events);
 	counter->events.kfifo = events.kfifo;
 
-exit_early:
-	atomic_dec(&counter->chrdev_lock);
-
-	return err;
+	return 0;
 }
 
 static struct counter_comp counter_num_signals_comp =
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 22b14a552b1d..0fd99e255a50 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -297,7 +297,6 @@ struct counter_ops {
  * @events:		queue of detected Counter events
  * @events_wait:	wait queue to allow blocking reads of Counter events
  * @events_lock:	lock to protect Counter events queue read operations
- * @chrdev_lock:	lock to limit chrdev to a single open at a time
  * @ops_exist_lock:	lock to prevent use during removal
  */
 struct counter_device {
@@ -325,12 +324,6 @@ struct counter_device {
 	DECLARE_KFIFO_PTR(events, struct counter_event);
 	wait_queue_head_t events_wait;
 	struct mutex events_lock;
-	/*
-	 * chrdev_lock is locked by counter_chrdev_open() and unlocked by
-	 * counter_chrdev_release(), so a mutex is not possible here because
-	 * chrdev_lock will invariably be held when returning to user space
-	 */
-	atomic_t chrdev_lock;
 	struct mutex ops_exist_lock;
 };
 
-- 
cgit v1.2.3


From 5c67aa59bd8f99d70c81b5e71bd02083056a8486 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 14 Oct 2021 16:26:11 +0300
Subject: mmc: sdhci-pci: Remove dead code (struct sdhci_pci_data et al)

The last user of this struct gone a couple of releases ago.
Besides that there were not so many users of this API for
more than 10 years:

1/ The one is Intel Merrifield, that had been added 2016-08-31
   by the commit 3976b0380b31 ("x86/platform/intel-mid: Enable
   SD card detection on Merrifield") and removed 2021-02-11 by
   the commit 4590d98f5a4f ("sfi: Remove framework for deprecated
   firmware").

2/ The other is Intel Sunrisepoint related, that had been added
   2015-02-06 by the commit e1bfad6d936d ("mmc: sdhci-pci: Add
   support for drive strength selection for SPT") and removed
   2017-03-20 by the commit 51ced59cc02e ("mmc: sdhci-pci: Use
   ACPI DSM to get driver strength for some Intel devices").

Effectively this is a revert of the commit 52c506f0bc72 ("mmc:
sdhci-pci: add platform data").

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20211014132613.27861-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Makefile          |  1 -
 drivers/mmc/host/sdhci-pci-core.c  | 31 ++++---------------------------
 drivers/mmc/host/sdhci-pci-data.c  |  6 ------
 drivers/mmc/host/sdhci-pci.h       |  1 -
 include/linux/mmc/sdhci-pci-data.h | 18 ------------------
 5 files changed, 4 insertions(+), 53 deletions(-)
 delete mode 100644 drivers/mmc/host/sdhci-pci-data.c
 delete mode 100644 include/linux/mmc/sdhci-pci-data.h

(limited to 'include/linux')

diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 14004cc09aaa..ea36d379bd3c 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_MMC_SDHCI)		+= sdhci.o
 obj-$(CONFIG_MMC_SDHCI_PCI)	+= sdhci-pci.o
 sdhci-pci-y			+= sdhci-pci-core.o sdhci-pci-o2micro.o sdhci-pci-arasan.o \
 				   sdhci-pci-dwc-mshc.o sdhci-pci-gli.o
-obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI))	+= sdhci-pci-data.o
 obj-$(CONFIG_MMC_SDHCI_ACPI)	+= sdhci-acpi.o
 obj-$(CONFIG_MMC_SDHCI_PXAV3)	+= sdhci-pxav3.o
 obj-$(CONFIG_MMC_SDHCI_PXAV2)	+= sdhci-pxav2.o
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index 19e13dfae593..8938c63b1e77 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -17,8 +17,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/device.h>
-#include <linux/mmc/host.h>
-#include <linux/mmc/mmc.h>
 #include <linux/scatterlist.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -26,11 +24,13 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm_qos.h>
 #include <linux/debugfs.h>
-#include <linux/mmc/slot-gpio.h>
-#include <linux/mmc/sdhci-pci-data.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/slot-gpio.h>
+
 #ifdef CONFIG_X86
 #include <asm/iosf_mbi.h>
 #endif
@@ -2131,22 +2131,6 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
 	slot->cd_gpio = -EINVAL;
 	slot->cd_idx = -1;
 
-	/* Retrieve platform data if there is any */
-	if (*sdhci_pci_get_data)
-		slot->data = sdhci_pci_get_data(pdev, slotno);
-
-	if (slot->data) {
-		if (slot->data->setup) {
-			ret = slot->data->setup(slot->data);
-			if (ret) {
-				dev_err(&pdev->dev, "platform setup failed\n");
-				goto free;
-			}
-		}
-		slot->rst_n_gpio = slot->data->rst_n_gpio;
-		slot->cd_gpio = slot->data->cd_gpio;
-	}
-
 	host->hw_name = "PCI";
 	host->ops = chip->fixes && chip->fixes->ops ?
 		    chip->fixes->ops :
@@ -2233,10 +2217,6 @@ remove:
 		chip->fixes->remove_slot(slot, 0);
 
 cleanup:
-	if (slot->data && slot->data->cleanup)
-		slot->data->cleanup(slot->data);
-
-free:
 	sdhci_free_host(host);
 
 	return ERR_PTR(ret);
@@ -2259,9 +2239,6 @@ static void sdhci_pci_remove_slot(struct sdhci_pci_slot *slot)
 	if (slot->chip->fixes && slot->chip->fixes->remove_slot)
 		slot->chip->fixes->remove_slot(slot, dead);
 
-	if (slot->data && slot->data->cleanup)
-		slot->data->cleanup(slot->data);
-
 	sdhci_free_host(slot->host);
 }
 
diff --git a/drivers/mmc/host/sdhci-pci-data.c b/drivers/mmc/host/sdhci-pci-data.c
deleted file mode 100644
index 18638fb363d8..000000000000
--- a/drivers/mmc/host/sdhci-pci-data.c
+++ /dev/null
@@ -1,6 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/module.h>
-#include <linux/mmc/sdhci-pci-data.h>
-
-struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno);
-EXPORT_SYMBOL_GPL(sdhci_pci_get_data);
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 8f90c4163bb5..15b36cd47860 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -156,7 +156,6 @@ struct sdhci_pci_fixes {
 struct sdhci_pci_slot {
 	struct sdhci_pci_chip	*chip;
 	struct sdhci_host	*host;
-	struct sdhci_pci_data	*data;
 
 	int			rst_n_gpio;
 	int			cd_gpio;
diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h
deleted file mode 100644
index 1d42872d22f3..000000000000
--- a/include/linux/mmc/sdhci-pci-data.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LINUX_MMC_SDHCI_PCI_DATA_H
-#define LINUX_MMC_SDHCI_PCI_DATA_H
-
-struct pci_dev;
-
-struct sdhci_pci_data {
-	struct pci_dev	*pdev;
-	int		slotno;
-	int		rst_n_gpio; /* Set to -EINVAL if unused */
-	int		cd_gpio;    /* Set to -EINVAL if unused */
-	int		(*setup)(struct sdhci_pci_data *data);
-	void		(*cleanup)(struct sdhci_pci_data *data);
-};
-
-extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev,
-				int slotno);
-#endif
-- 
cgit v1.2.3


From cf6a8b1b24d675afc35a01cccd081160014a0125 Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 12 Oct 2021 13:26:30 +0300
Subject: RDMA/mlx5: Remove iova from struct mlx5_core_mkey

iova is already stored in ibmr->iova, no need to store it here.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c            |  1 -
 drivers/infiniband/hw/mlx5/mr.c              | 16 ++++++++--------
 drivers/infiniband/hw/mlx5/odp.c             |  8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/mr.c |  1 -
 drivers/vdpa/mlx5/core/resources.c           |  1 -
 include/linux/mlx5/driver.h                  |  1 -
 6 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index e95967aefe78..00ad24044c3a 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
 	mkey->key = mlx5_idx_to_mkey(
 			MLX5_GET(create_mkey_out, out, mkey_index)) | key;
 	mkey->type = MLX5_MKEY_INDIRECT_DEVX;
-	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
 	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 3be36ebbf67a..6815500cfdaa 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -911,12 +911,13 @@ static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
 }
 
 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
-			  u64 length, int access_flags)
+			  u64 length, int access_flags, u64 iova)
 {
 	mr->ibmr.lkey = mr->mmkey.key;
 	mr->ibmr.rkey = mr->mmkey.key;
 	mr->ibmr.length = length;
 	mr->ibmr.device = &dev->ib_dev;
+	mr->ibmr.iova = iova;
 	mr->access_flags = access_flags;
 }
 
@@ -974,11 +975,10 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 
 	mr->ibmr.pd = pd;
 	mr->umem = umem;
-	mr->mmkey.iova = iova;
 	mr->mmkey.size = umem->length;
 	mr->mmkey.pd = to_mpd(pd)->pdn;
 	mr->page_shift = order_base_2(page_size);
-	set_mr_fields(dev, mr, umem->length, access_flags);
+	set_mr_fields(dev, mr, umem->length, access_flags, iova);
 
 	return mr;
 }
@@ -1088,7 +1088,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
 	wr->pd = mr->ibmr.pd;
 	wr->mkey = mr->mmkey.key;
 	wr->length = mr->mmkey.size;
-	wr->virt_addr = mr->mmkey.iova;
+	wr->virt_addr = mr->ibmr.iova;
 	wr->access_flags = mr->access_flags;
 	wr->page_shift = mr->page_shift;
 	wr->xlt_size = sg->length;
@@ -1341,7 +1341,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
 	mr->mmkey.type = MLX5_MKEY_MR;
 	mr->desc_size = sizeof(struct mlx5_mtt);
 	mr->umem = umem;
-	set_mr_fields(dev, mr, umem->length, access_flags);
+	set_mr_fields(dev, mr, umem->length, access_flags, iova);
 	kvfree(in);
 
 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
@@ -1388,7 +1388,7 @@ static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
 
 	kfree(in);
 
-	set_mr_fields(dev, mr, length, acc);
+	set_mr_fields(dev, mr, length, acc, start_addr);
 
 	return &mr->ibmr;
 
@@ -1763,7 +1763,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 	}
 
 	mr->ibmr.length = new_umem->length;
-	mr->mmkey.iova = iova;
+	mr->ibmr.iova = iova;
 	mr->mmkey.size = new_umem->length;
 	mr->page_shift = order_base_2(page_size);
 	mr->umem = new_umem;
@@ -1834,7 +1834,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
 		mr->umem = NULL;
 		atomic_sub(ib_umem_num_pages(umem), &dev->mdev->priv.reg_pages);
 
-		return create_real_mr(new_pd, umem, mr->mmkey.iova,
+		return create_real_mr(new_pd, umem, mr->ibmr.iova,
 				      new_access_flags);
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d0d98e584ebc..d119ba3101a3 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -430,7 +430,7 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
 	mr->umem = &odp->umem;
 	mr->ibmr.lkey = mr->mmkey.key;
 	mr->ibmr.rkey = mr->mmkey.key;
-	mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE;
+	mr->ibmr.iova = idx * MLX5_IMR_MTT_SIZE;
 	mr->parent = imr;
 	odp->private = mr;
 
@@ -500,7 +500,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
 	}
 
 	imr->ibmr.pd = &pd->ibpd;
-	imr->mmkey.iova = 0;
+	imr->ibmr.iova = 0;
 	imr->umem = &umem_odp->umem;
 	imr->ibmr.lkey = imr->mmkey.key;
 	imr->ibmr.rkey = imr->mmkey.key;
@@ -738,7 +738,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 {
 	struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
 
-	if (unlikely(io_virt < mr->mmkey.iova))
+	if (unlikely(io_virt < mr->ibmr.iova))
 		return -EFAULT;
 
 	if (mr->umem->is_dmabuf)
@@ -747,7 +747,7 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt,
 	if (!odp->is_implicit_odp) {
 		u64 user_va;
 
-		if (check_add_overflow(io_virt - mr->mmkey.iova,
+		if (check_add_overflow(io_virt - mr->ibmr.iova,
 				       (u64)odp->umem.address, &user_va))
 			return -EFAULT;
 		if (unlikely(user_va >= ib_umem_end(odp) ||
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 174f71ed5280..d239d559994f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -52,7 +52,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
 	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 15e266d0e27a..14d4314cdc29 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -215,7 +215,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->iova = MLX5_GET64(mkc, mkc, start_addr);
 	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->key |= mlx5_idx_to_mkey(mkey_index);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..669904f9986e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -364,7 +364,6 @@ enum {
 };
 
 struct mlx5_core_mkey {
-	u64			iova;
 	u64			size;
 	u32			key;
 	u32			pd;
-- 
cgit v1.2.3


From 062fd731e51ee29ba745b2fd1c7ac87dd460d4ca Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 12 Oct 2021 13:26:31 +0300
Subject: RDMA/mlx5: Remove size from struct mlx5_core_mkey

mkey->size is already stored in ibmr->length, no need to store it here.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c            | 1 -
 drivers/infiniband/hw/mlx5/mr.c              | 5 ++---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 1 -
 drivers/vdpa/mlx5/core/resources.c           | 1 -
 include/linux/mlx5/driver.h                  | 1 -
 5 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 00ad24044c3a..8f6e1350cd37 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
 	mkey->key = mlx5_idx_to_mkey(
 			MLX5_GET(create_mkey_out, out, mkey_index)) | key;
 	mkey->type = MLX5_MKEY_INDIRECT_DEVX;
-	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
 	init_waitqueue_head(&mkey->wait);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 6815500cfdaa..1b1367c87a6b 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -975,7 +975,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 
 	mr->ibmr.pd = pd;
 	mr->umem = umem;
-	mr->mmkey.size = umem->length;
 	mr->mmkey.pd = to_mpd(pd)->pdn;
 	mr->page_shift = order_base_2(page_size);
 	set_mr_fields(dev, mr, umem->length, access_flags, iova);
@@ -1087,7 +1086,7 @@ static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
 	wr->wr.opcode = MLX5_IB_WR_UMR;
 	wr->pd = mr->ibmr.pd;
 	wr->mkey = mr->mmkey.key;
-	wr->length = mr->mmkey.size;
+	wr->length = mr->ibmr.length;
 	wr->virt_addr = mr->ibmr.iova;
 	wr->access_flags = mr->access_flags;
 	wr->page_shift = mr->page_shift;
@@ -1764,7 +1763,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 
 	mr->ibmr.length = new_umem->length;
 	mr->ibmr.iova = iova;
-	mr->mmkey.size = new_umem->length;
+	mr->ibmr.length = new_umem->length;
 	mr->page_shift = order_base_2(page_size);
 	mr->umem = new_umem;
 	err = mlx5_ib_update_mr_pas(mr, upd_flags);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index d239d559994f..b5dd44944265 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -52,7 +52,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	init_waitqueue_head(&mkey->wait);
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 14d4314cdc29..d3d8b8b4e377 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -215,7 +215,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->size = MLX5_GET64(mkc, mkc, len);
 	mkey->key |= mlx5_idx_to_mkey(mkey_index);
 	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	return 0;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 669904f9986e..ff1e991314e2 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -364,7 +364,6 @@ enum {
 };
 
 struct mlx5_core_mkey {
-	u64			size;
 	u32			key;
 	u32			pd;
 	u32			type;
-- 
cgit v1.2.3


From c64674168b6a2f293e92caf33c917ccf10886801 Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 12 Oct 2021 13:26:32 +0300
Subject: RDMA/mlx5: Remove pd from struct mlx5_core_mkey

There is no read of mkey->pd, only write. Remove it.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c            | 1 -
 drivers/infiniband/hw/mlx5/mr.c              | 3 ---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 3 ---
 drivers/vdpa/mlx5/core/resources.c           | 3 ---
 include/linux/mlx5/driver.h                  | 1 -
 5 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 8f6e1350cd37..5322d787c094 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1303,7 +1303,6 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
 	mkey->key = mlx5_idx_to_mkey(
 			MLX5_GET(create_mkey_out, out, mkey_index)) | key;
 	mkey->type = MLX5_MKEY_INDIRECT_DEVX;
-	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size);
 	init_waitqueue_head(&mkey->wait);
 
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 1b1367c87a6b..9d7f1cadaa76 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -975,7 +975,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 
 	mr->ibmr.pd = pd;
 	mr->umem = umem;
-	mr->mmkey.pd = to_mpd(pd)->pdn;
 	mr->page_shift = order_base_2(page_size);
 	set_mr_fields(dev, mr, umem->length, access_flags, iova);
 
@@ -1708,7 +1707,6 @@ static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 		return err;
 
 	mr->access_flags = access_flags;
-	mr->mmkey.pd = to_mpd(pd)->pdn;
 	return 0;
 }
 
@@ -1753,7 +1751,6 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
 
 	if (flags & IB_MR_REREG_PD) {
 		mr->ibmr.pd = pd;
-		mr->mmkey.pd = to_mpd(pd)->pdn;
 		upd_flags |= MLX5_IB_UPD_XLT_PD;
 	}
 	if (flags & IB_MR_REREG_ACCESS) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index b5dd44944265..6e99fd166f98 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -41,7 +41,6 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 {
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
 	u32 mkey_index;
-	void *mkc;
 	int err;
 
 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
@@ -50,10 +49,8 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
-	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
 	mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
-	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	init_waitqueue_head(&mkey->wait);
 
 	mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key);
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index d3d8b8b4e377..72b2d80e75b0 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -203,7 +203,6 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
 {
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
 	u32 mkey_index;
-	void *mkc;
 	int err;
 
 	MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY);
@@ -213,10 +212,8 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
 	if (err)
 		return err;
 
-	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
 	mkey->key |= mlx5_idx_to_mkey(mkey_index);
-	mkey->pd = MLX5_GET(mkc, mkc, pd);
 	return 0;
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ff1e991314e2..f0ce7d4dc4ff 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -365,7 +365,6 @@ enum {
 
 struct mlx5_core_mkey {
 	u32			key;
-	u32			pd;
 	u32			type;
 	struct wait_queue_head wait;
 	refcount_t usecount;
-- 
cgit v1.2.3


From 83fec3f12a5904b62330fd1a89af6d892afc387e Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 12 Oct 2021 13:26:33 +0300
Subject: RDMA/mlx5: Replace struct mlx5_core_mkey by u32 key

In mlx5_core and vdpa there is no use of mlx5_core_mkey members except
for the key itself.

As preparation for moving mlx5_core_mkey to mlx5_ib, the occurrences of
struct mlx5_core_mkey in all modules except for mlx5_ib are replaced by
a u32 key.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/mr.c                    | 21 ++++++++++++++-------
 drivers/infiniband/hw/mlx5/odp.c                   |  2 +-
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.c   |  6 +++---
 .../ethernet/mellanox/mlx5/core/diag/fw_tracer.h   |  2 +-
 .../ethernet/mellanox/mlx5/core/diag/rsc_dump.c    | 10 +++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/trap.c  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c    |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 13 ++++++-------
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.c    | 10 +++++-----
 .../net/ethernet/mellanox/mlx5/core/fpga/core.h    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/mr.c       | 22 ++++++++++------------
 .../mellanox/mlx5/core/steering/dr_icm_pool.c      | 10 +++++-----
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 11 +++++------
 .../mellanox/mlx5/core/steering/dr_types.h         |  2 +-
 drivers/vdpa/mlx5/core/mlx5_vdpa.h                 |  8 ++++----
 drivers/vdpa/mlx5/core/mr.c                        |  8 ++++----
 drivers/vdpa/mlx5/core/resources.c                 |  8 ++++----
 drivers/vdpa/mlx5/net/mlx5_vnet.c                  |  2 +-
 include/linux/mlx5/driver.h                        | 14 ++++++--------
 21 files changed, 82 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9d7f1cadaa76..e2f020472ae2 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -104,8 +104,14 @@ static int
 mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
 		    u32 *in, int inlen)
 {
+	int ret;
+
 	assign_mkey_variant(dev, mkey, in);
-	return mlx5_core_create_mkey(dev->mdev, mkey, in, inlen);
+	ret = mlx5_core_create_mkey(dev->mdev, &mkey->key, in, inlen);
+	if (!ret)
+		init_waitqueue_head(&mkey->wait);
+
+	return ret;
 }
 
 static int
@@ -133,7 +139,7 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 	WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
 
-	return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+	return mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
 }
 
 static void create_mkey_callback(int status, struct mlx5_async_work *context)
@@ -260,10 +266,11 @@ static struct mlx5_ib_mr *create_cache_mr(struct mlx5_cache_ent *ent)
 		goto free_in;
 	}
 
-	err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey, in, inlen);
+	err = mlx5_core_create_mkey(ent->dev->mdev, &mr->mmkey.key, in, inlen);
 	if (err)
 		goto free_mr;
 
+	init_waitqueue_head(&mr->mmkey.wait);
 	mr->mmkey.type = MLX5_MKEY_MR;
 	WRITE_ONCE(ent->dev->cache.last_add, jiffies);
 	spin_lock_irq(&ent->lock);
@@ -290,7 +297,7 @@ static void remove_cache_mr_locked(struct mlx5_cache_ent *ent)
 	ent->available_mrs--;
 	ent->total_mrs--;
 	spin_unlock_irq(&ent->lock);
-	mlx5_core_destroy_mkey(ent->dev->mdev, &mr->mmkey);
+	mlx5_core_destroy_mkey(ent->dev->mdev, mr->mmkey.key);
 	kfree(mr);
 	spin_lock_irq(&ent->lock);
 }
@@ -658,7 +665,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
 		ent->available_mrs--;
 		ent->total_mrs--;
 		spin_unlock_irq(&ent->lock);
-		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
+		mlx5_core_destroy_mkey(dev->mdev, mr->mmkey.key);
 	}
 
 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -2326,7 +2333,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 	return 0;
 
 free_mkey:
-	mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
+	mlx5_core_destroy_mkey(dev->mdev, mw->mmkey.key);
 free:
 	kfree(in);
 	return err;
@@ -2345,7 +2352,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw)
 		 */
 		mlx5r_deref_wait_odp_mkey(&mmw->mmkey);
 
-	return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
+	return mlx5_core_destroy_mkey(dev->mdev, mmw->mmkey.key);
 }
 
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index d119ba3101a3..a654367af056 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -909,7 +909,7 @@ next_mr:
 		pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out,
 						       bsf0_klm0_pas_mtt0_1);
 
-		ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen);
+		ret = mlx5_core_query_mkey(dev->mdev, mmkey->key, out, outlen);
 		if (ret)
 			goto end;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index f9cf9fb31547..02db3148240a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -745,7 +745,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer)
 	MLX5_SET(mtrc_conf, in, trace_mode, TRACE_TO_MEMORY);
 	MLX5_SET(mtrc_conf, in, log_trace_buffer_size,
 		 ilog2(TRACER_BUFFER_PAGE_NUM));
-	MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey.key);
+	MLX5_SET(mtrc_conf, in, trace_mkey, tracer->buff.mkey);
 
 	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
 				   MLX5_REG_MTRC_CONF, 0, 1);
@@ -1028,7 +1028,7 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer)
 
 err_notifier_unregister:
 	mlx5_eq_notifier_unregister(dev, &tracer->nb);
-	mlx5_core_destroy_mkey(dev, &tracer->buff.mkey);
+	mlx5_core_destroy_mkey(dev, tracer->buff.mkey);
 err_dealloc_pd:
 	mlx5_core_dealloc_pd(dev, tracer->buff.pdn);
 err_cancel_work:
@@ -1051,7 +1051,7 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer)
 	if (tracer->owner)
 		mlx5_fw_tracer_ownership_release(tracer);
 
-	mlx5_core_destroy_mkey(tracer->dev, &tracer->buff.mkey);
+	mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey);
 	mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
index 97252a85d65e..4762b55b0b0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
@@ -89,7 +89,7 @@ struct mlx5_fw_tracer {
 		void *log_buf;
 		dma_addr_t dma;
 		u32 size;
-		struct mlx5_core_mkey mkey;
+		u32 mkey;
 		u32 consumer_index;
 	} buff;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
index ed4fb79b4db7..538adab6878b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c
@@ -30,7 +30,7 @@ static const char *const mlx5_rsc_sgmt_name[] = {
 
 struct mlx5_rsc_dump {
 	u32 pdn;
-	struct mlx5_core_mkey mkey;
+	u32 mkey;
 	u16 fw_segment_type[MLX5_SGMT_TYPE_NUM];
 };
 
@@ -89,7 +89,7 @@ static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump
 		return -ENOMEM;
 
 	in_seq_num = MLX5_GET(resource_dump, cmd->cmd, seq_num);
-	MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey.key);
+	MLX5_SET(resource_dump, cmd->cmd, mkey, rsc_dump->mkey);
 	MLX5_SET64(resource_dump, cmd->cmd, address, dma);
 
 	err = mlx5_core_access_reg(dev, cmd->cmd, sizeof(cmd->cmd), cmd->cmd,
@@ -202,7 +202,7 @@ free_page:
 }
 
 static int mlx5_rsc_dump_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-				     struct mlx5_core_mkey *mkey)
+				     u32 *mkey)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	void *mkc;
@@ -276,7 +276,7 @@ int mlx5_rsc_dump_init(struct mlx5_core_dev *dev)
 	return err;
 
 destroy_mkey:
-	mlx5_core_destroy_mkey(dev, &rsc_dump->mkey);
+	mlx5_core_destroy_mkey(dev, rsc_dump->mkey);
 free_pd:
 	mlx5_core_dealloc_pd(dev, rsc_dump->pdn);
 	return err;
@@ -287,6 +287,6 @@ void mlx5_rsc_dump_cleanup(struct mlx5_core_dev *dev)
 	if (IS_ERR_OR_NULL(dev->rsc_dump))
 		return;
 
-	mlx5_core_destroy_mkey(dev, &dev->rsc_dump->mkey);
+	mlx5_core_destroy_mkey(dev, dev->rsc_dump->mkey);
 	mlx5_core_dealloc_pd(dev, dev->rsc_dump->pdn);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 03a7a4ce5cd5..7761daa25f63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -665,7 +665,7 @@ struct mlx5e_rq {
 	u8                     wq_type;
 	u32                    rqn;
 	struct mlx5_core_dev  *mdev;
-	struct mlx5_core_mkey  umr_mkey;
+	u32  umr_mkey;
 	struct mlx5e_dma_info  wqe_overflow;
 
 	/* XDP read-mostly */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
index 3a86f66d1295..18d542b1c5cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
@@ -682,7 +682,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 	c->tstamp   = &priv->tstamp;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
 	c->num_tc   = mlx5e_get_dcb_num_tc(params);
 	c->stats    = &priv->ptp_stats.ch;
 	c->lag_port = lag_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
index d54607a42740..a55b066746cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c
@@ -137,7 +137,7 @@ static struct mlx5e_trap *mlx5e_open_trap(struct mlx5e_priv *priv)
 	t->tstamp   = &priv->tstamp;
 	t->pdev     = mlx5_core_dma_dev(priv->mdev);
 	t->netdev   = priv->netdev;
-	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+	t->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
 	t->stats    = &priv->trap_stats.ch;
 
 	netif_napi_add(netdev, &t->napi, mlx5e_trap_napi_poll, 64);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 84eb7201c142..c0f409c195bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -47,7 +47,7 @@ void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-			     struct mlx5_core_mkey *mkey)
+			     u32 *mkey)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	void *mkc;
@@ -108,7 +108,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 	return 0;
 
 err_destroy_mkey:
-	mlx5_core_destroy_mkey(mdev, &res->mkey);
+	mlx5_core_destroy_mkey(mdev, res->mkey);
 err_dealloc_transport_domain:
 	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
 err_dealloc_pd:
@@ -121,7 +121,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 	struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs;
 
 	mlx5_free_bfreg(mdev, &res->bfreg);
-	mlx5_core_destroy_mkey(mdev, &res->mkey);
+	mlx5_core_destroy_mkey(mdev, res->mkey);
 	mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
 	mlx5_core_dealloc_pd(mdev, res->pdn);
 	memset(res, 0, sizeof(*res));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 09c8b71b186c..63076d0e8b63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -234,8 +234,7 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
 }
 
 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
-				 u64 npages, u8 page_shift,
-				 struct mlx5_core_mkey *umr_mkey,
+				 u64 npages, u8 page_shift, u32 *umr_mkey,
 				 dma_addr_t filler_addr)
 {
 	struct mlx5_mtt *mtt;
@@ -455,7 +454,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
 			goto err_rq_drop_page;
-		rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+		rq->mkey_be = cpu_to_be32(rq->umr_mkey);
 
 		err = mlx5e_rq_alloc_mpwqe_info(rq, node);
 		if (err)
@@ -487,7 +486,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 		if (err)
 			goto err_rq_frags;
 
-		rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
+		rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
 	}
 
 	if (xsk) {
@@ -574,7 +573,7 @@ err_free_by_rq_type:
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
 err_rq_mkey:
-		mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+		mlx5_core_destroy_mkey(mdev, rq->umr_mkey);
 err_rq_drop_page:
 		mlx5e_free_mpwqe_rq_drop_page(rq);
 		break;
@@ -607,7 +606,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
-		mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+		mlx5_core_destroy_mkey(rq->mdev, rq->umr_mkey);
 		mlx5e_free_mpwqe_rq_drop_page(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
@@ -1991,7 +1990,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	c->cpu      = cpu;
 	c->pdev     = mlx5_core_dma_dev(priv->mdev);
 	c->netdev   = priv->netdev;
-	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
+	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
 	c->num_tc   = mlx5e_get_dcb_num_tc(params);
 	c->xdp      = !!params->xdp_prog;
 	c->stats    = &priv->channel_stats[ix].ch;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 306279b7f9e7..12abe991583a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -115,7 +115,7 @@ static int mlx5_fpga_conn_post_recv(struct mlx5_fpga_conn *conn,
 	ix = conn->qp.rq.pc & (conn->qp.rq.size - 1);
 	data = mlx5_wq_cyc_get_wqe(&conn->qp.wq.rq, ix);
 	data->byte_count = cpu_to_be32(buf->sg[0].size);
-	data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+	data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
 	data->addr = cpu_to_be64(buf->sg[0].dma_addr);
 
 	conn->qp.rq.pc++;
@@ -155,7 +155,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
 		if (!buf->sg[sgi].data)
 			break;
 		data->byte_count = cpu_to_be32(buf->sg[sgi].size);
-		data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey.key);
+		data->lkey = cpu_to_be32(conn->fdev->conn_res.mkey);
 		data->addr = cpu_to_be64(buf->sg[sgi].dma_addr);
 		data++;
 		size++;
@@ -221,7 +221,7 @@ static int mlx5_fpga_conn_post_recv_buf(struct mlx5_fpga_conn *conn)
 }
 
 static int mlx5_fpga_conn_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-				      struct mlx5_core_mkey *mkey)
+				      u32 *mkey)
 {
 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	void *mkc;
@@ -978,7 +978,7 @@ int mlx5_fpga_conn_device_init(struct mlx5_fpga_device *fdev)
 		mlx5_fpga_err(fdev, "create mkey failed, %d\n", err);
 		goto err_dealloc_pd;
 	}
-	mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey.key);
+	mlx5_fpga_dbg(fdev, "Created mkey 0x%x\n", fdev->conn_res.mkey);
 
 	return 0;
 
@@ -994,7 +994,7 @@ out:
 
 void mlx5_fpga_conn_device_cleanup(struct mlx5_fpga_device *fdev)
 {
-	mlx5_core_destroy_mkey(fdev->mdev, &fdev->conn_res.mkey);
+	mlx5_core_destroy_mkey(fdev->mdev, fdev->conn_res.mkey);
 	mlx5_core_dealloc_pd(fdev->mdev, fdev->conn_res.pdn);
 	mlx5_put_uars_page(fdev->mdev, fdev->conn_res.uar);
 	mlx5_nic_vport_disable_roce(fdev->mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
index 52c9dee91ea4..2a984e82ae16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/core.h
@@ -54,7 +54,7 @@ struct mlx5_fpga_device {
 	/* QP Connection resources */
 	struct {
 		u32 pdn;
-		struct mlx5_core_mkey mkey;
+		u32 mkey;
 		struct mlx5_uars_page *uar;
 	} conn_res;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 6e99fd166f98..f099a087400e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -35,9 +35,8 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_core.h"
 
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-			  struct mlx5_core_mkey *mkey,
-			  u32 *in, int inlen)
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+			  int inlen)
 {
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
 	u32 mkey_index;
@@ -50,33 +49,32 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 		return err;
 
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index);
-	init_waitqueue_head(&mkey->wait);
+	*mkey = MLX5_GET(create_mkey_in, in, memory_key_mkey_entry.mkey_7_0) |
+		mlx5_idx_to_mkey(mkey_index);
 
-	mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, mkey->key);
+	mlx5_core_dbg(dev, "out 0x%x, mkey 0x%x\n", mkey_index, *mkey);
 	return 0;
 }
 EXPORT_SYMBOL(mlx5_core_create_mkey);
 
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
-			   struct mlx5_core_mkey *mkey)
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
 
 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
-	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
 	return mlx5_cmd_exec_in(dev, destroy_mkey, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-			 u32 *out, int outlen)
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+			 int outlen)
 {
 	u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {};
 
 	memset(out, 0, outlen);
 	MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
-	MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+	MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 EXPORT_SYMBOL(mlx5_core_query_mkey);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 66c24767e3b0..7f6fd9c5e371 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -24,7 +24,7 @@ struct mlx5dr_icm_dm {
 };
 
 struct mlx5dr_icm_mr {
-	struct mlx5_core_mkey mkey;
+	u32 mkey;
 	struct mlx5dr_icm_dm dm;
 	struct mlx5dr_domain *dmn;
 	size_t length;
@@ -33,7 +33,7 @@ struct mlx5dr_icm_mr {
 
 static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
 				 u32 pd, u64 length, u64 start_addr, int mode,
-				 struct mlx5_core_mkey *mkey)
+				 u32 *mkey)
 {
 	u32 inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
@@ -116,7 +116,7 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
 	return icm_mr;
 
 free_mkey:
-	mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+	mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
 free_dm:
 	mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
 			       icm_mr->dm.addr, icm_mr->dm.obj_id);
@@ -130,7 +130,7 @@ static void dr_icm_pool_mr_destroy(struct mlx5dr_icm_mr *icm_mr)
 	struct mlx5_core_dev *mdev = icm_mr->dmn->mdev;
 	struct mlx5dr_icm_dm *dm = &icm_mr->dm;
 
-	mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
+	mlx5_core_destroy_mkey(mdev, icm_mr->mkey);
 	mlx5_dm_sw_icm_dealloc(mdev, dm->type, dm->length, 0,
 			       dm->addr, dm->obj_id);
 	kvfree(icm_mr);
@@ -252,7 +252,7 @@ dr_icm_chunk_create(struct mlx5dr_icm_pool *pool,
 
 	offset = mlx5dr_icm_pool_dm_type_to_entry_size(pool->icm_type) * seg;
 
-	chunk->rkey = buddy_mem_pool->icm_mr->mkey.key;
+	chunk->rkey = buddy_mem_pool->icm_mr->mkey;
 	chunk->mr_addr = offset;
 	chunk->icm_addr =
 		(uintptr_t)buddy_mem_pool->icm_mr->icm_start_addr + offset;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index bfb14b4b1906..00aef47d7682 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -350,7 +350,7 @@ static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
 	send_info->read.length = send_info->write.length;
 	/* Read into the same write area */
 	send_info->read.addr = (uintptr_t)send_info->write.addr;
-	send_info->read.lkey = send_ring->mr->mkey.key;
+	send_info->read.lkey = send_ring->mr->mkey;
 
 	if (send_ring->pending_wqe % send_ring->signal_th == 0)
 		send_info->read.send_flags = IB_SEND_SIGNALED;
@@ -388,7 +388,7 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 		       (void *)(uintptr_t)send_info->write.addr,
 		       send_info->write.length);
 		send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
-		send_info->write.lkey = send_ring->mr->mkey.key;
+		send_info->write.lkey = send_ring->mr->mkey;
 	}
 
 	send_ring->tx_head++;
@@ -848,8 +848,7 @@ static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
 	kfree(cq);
 }
 
-static int
-dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
+static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
 {
 	u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
 	void *mkc;
@@ -908,7 +907,7 @@ static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
 
 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
 {
-	mlx5_core_destroy_mkey(mdev, &mr->mkey);
+	mlx5_core_destroy_mkey(mdev, mr->mkey);
 	dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
 			 DMA_BIDIRECTIONAL);
 	kfree(mr);
@@ -1039,7 +1038,7 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
 	send_info.write.lkey = 0;
 	/* Using the sync_mr in order to write/read */
 	send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
-	send_info.rkey = send_ring->sync_mr->mkey.key;
+	send_info.rkey = send_ring->sync_mr->mkey;
 
 	for (i = 0; i < num_of_sends_req; i++) {
 		ret = dr_postsend_icm_data(dmn, &send_info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index b20e8aabb861..6c67185c05c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1275,7 +1275,7 @@ struct mlx5dr_cq {
 
 struct mlx5dr_mr {
 	struct mlx5_core_dev *mdev;
-	struct mlx5_core_mkey mkey;
+	u32 mkey;
 	dma_addr_t dma_addr;
 	void *addr;
 	size_t size;
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 01a848adf590..3163b313a470 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -15,7 +15,7 @@ struct mlx5_vdpa_direct_mr {
 	u64 start;
 	u64 end;
 	u32 perm;
-	struct mlx5_core_mkey mr;
+	u32 mr;
 	struct sg_table sg_head;
 	int log_size;
 	int nsg;
@@ -25,7 +25,7 @@ struct mlx5_vdpa_direct_mr {
 };
 
 struct mlx5_vdpa_mr {
-	struct mlx5_core_mkey mkey;
+	u32 mkey;
 
 	/* list of direct MRs descendants of this indirect mr */
 	struct list_head head;
@@ -99,9 +99,9 @@ int mlx5_vdpa_alloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 *tdn);
 void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn);
 int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev);
 void mlx5_vdpa_free_resources(struct mlx5_vdpa_dev *mvdev);
-int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
+int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
 			  int inlen);
-int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey);
+int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey);
 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
 			     bool *change_map);
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb);
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index ff010c6d0cd3..a639b9208d41 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -88,7 +88,7 @@ static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct
 
 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
 {
-	mlx5_vdpa_destroy_mkey(mvdev, &mr->mr);
+	mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
 }
 
 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
@@ -162,7 +162,7 @@ again:
 		}
 
 		if (preve == dmr->start) {
-			klm->key = cpu_to_be32(dmr->mr.key);
+			klm->key = cpu_to_be32(dmr->mr);
 			klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
 			preve = dmr->end;
 		} else {
@@ -217,7 +217,7 @@ static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr
 
 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
 {
-	mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
+	mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
 }
 
 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
@@ -449,7 +449,7 @@ static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 
 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
 {
-	mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
+	mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
 }
 
 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
diff --git a/drivers/vdpa/mlx5/core/resources.c b/drivers/vdpa/mlx5/core/resources.c
index 72b2d80e75b0..9800f9bec225 100644
--- a/drivers/vdpa/mlx5/core/resources.c
+++ b/drivers/vdpa/mlx5/core/resources.c
@@ -198,7 +198,7 @@ void mlx5_vdpa_dealloc_transport_domain(struct mlx5_vdpa_dev *mvdev, u32 tdn)
 	mlx5_cmd_exec_in(mvdev->mdev, dealloc_transport_domain, in);
 }
 
-int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey, u32 *in,
+int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in,
 			  int inlen)
 {
 	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
@@ -213,17 +213,17 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mk
 		return err;
 
 	mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index);
-	mkey->key |= mlx5_idx_to_mkey(mkey_index);
+	*mkey |= mlx5_idx_to_mkey(mkey_index);
 	return 0;
 }
 
-int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, struct mlx5_core_mkey *mkey)
+int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
 
 	MLX5_SET(destroy_mkey_in, in, uid, mvdev->res.uid);
 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
-	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
+	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey));
 	return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
 }
 
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index bd56de7484dc..5c7d2a953dbd 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -865,7 +865,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
 	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
 	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
 	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
-	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey.key);
+	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
 	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
 	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
 	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f0ce7d4dc4ff..bd99f713720b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -650,7 +650,7 @@ struct mlx5e_resources {
 	struct mlx5e_hw_objs {
 		u32                        pdn;
 		struct mlx5_td             td;
-		struct mlx5_core_mkey      mkey;
+		u32			   mkey;
 		struct mlx5_sq_bfreg       bfreg;
 	} hw_objs;
 	struct devlink_port dl_port;
@@ -1021,13 +1021,11 @@ struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 						      gfp_t flags, int npages);
 void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev,
 				 struct mlx5_cmd_mailbox *head);
-int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
-			  struct mlx5_core_mkey *mkey,
-			  u32 *in, int inlen);
-int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
-			   struct mlx5_core_mkey *mkey);
-int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
-			 u32 *out, int outlen);
+int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in,
+			  int inlen);
+int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, u32 mkey);
+int mlx5_core_query_mkey(struct mlx5_core_dev *dev, u32 mkey, u32 *out,
+			 int outlen);
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn);
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn);
 int mlx5_pagealloc_init(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 4123bfb0b28b77b944360be8c758b1a0974e96ad Mon Sep 17 00:00:00 2001
From: Aharon Landau <aharonl@nvidia.com>
Date: Tue, 12 Oct 2021 13:26:34 +0300
Subject: RDMA/mlx5: Move struct mlx5_core_mkey to mlx5_ib

Move mlx5_core_mkey struct to mlx5_ib, as the mlx5_core doesn't use it
at this point.

Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mlx5/devx.c    |  2 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 25 +++++++++++++++++++------
 drivers/infiniband/hw/mlx5/mr.c      | 12 +++++-------
 drivers/infiniband/hw/mlx5/odp.c     |  8 ++++----
 include/linux/mlx5/driver.h          | 13 -------------
 5 files changed, 29 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 5322d787c094..d07a21a13ac0 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -1293,7 +1293,7 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj,
 				     void *in, void *out)
 {
 	struct mlx5_ib_devx_mr *devx_mr = &obj->devx_mr;
-	struct mlx5_core_mkey *mkey;
+	struct mlx5_ib_mkey *mkey;
 	void *mkc;
 	u8 key;
 
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bf20a388eabe..8f754c52d469 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -619,6 +619,19 @@ struct mlx5_user_mmap_entry {
 	u32 page_idx;
 };
 
+enum mlx5_mkey_type {
+	MLX5_MKEY_MR = 1,
+	MLX5_MKEY_MW,
+	MLX5_MKEY_INDIRECT_DEVX,
+};
+
+struct mlx5_ib_mkey {
+	u32 key;
+	enum mlx5_mkey_type type;
+	struct wait_queue_head wait;
+	refcount_t usecount;
+};
+
 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
 
 #define MLX5_IB_DM_MEMIC_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE   |\
@@ -637,7 +650,7 @@ struct mlx5_user_mmap_entry {
 
 struct mlx5_ib_mr {
 	struct ib_mr ibmr;
-	struct mlx5_core_mkey mmkey;
+	struct mlx5_ib_mkey mmkey;
 
 	/* User MR data */
 	struct mlx5_cache_ent *cache_ent;
@@ -713,12 +726,12 @@ static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr)
 
 struct mlx5_ib_mw {
 	struct ib_mw		ibmw;
-	struct mlx5_core_mkey	mmkey;
+	struct mlx5_ib_mkey	mmkey;
 	int			ndescs;
 };
 
 struct mlx5_ib_devx_mr {
-	struct mlx5_core_mkey	mmkey;
+	struct mlx5_ib_mkey	mmkey;
 	int			ndescs;
 };
 
@@ -1579,7 +1592,7 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
 }
 
 static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
-				       struct mlx5_core_mkey *mmkey)
+				       struct mlx5_ib_mkey *mmkey)
 {
 	refcount_set(&mmkey->usecount, 1);
 
@@ -1588,14 +1601,14 @@ static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
 }
 
 /* deref an mkey that can participate in ODP flow */
-static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey)
+static inline void mlx5r_deref_odp_mkey(struct mlx5_ib_mkey *mmkey)
 {
 	if (refcount_dec_and_test(&mmkey->usecount))
 		wake_up(&mmkey->wait);
 }
 
 /* deref an mkey that can participate in ODP flow and wait for relese */
-static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey)
+static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_ib_mkey *mmkey)
 {
 	mlx5r_deref_odp_mkey(mmkey);
 	wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index e2f020472ae2..5afc3e965c28 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -88,9 +88,8 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr,
 	MLX5_SET64(mkc, mkc, start_addr, start_addr);
 }
 
-static void
-assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
-		    u32 *in)
+static void assign_mkey_variant(struct mlx5_ib_dev *dev,
+				struct mlx5_ib_mkey *mkey, u32 *in)
 {
 	u8 key = atomic_inc_return(&dev->mkey_var);
 	void *mkc;
@@ -100,9 +99,8 @@ assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
 	mkey->key = key;
 }
 
-static int
-mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
-		    u32 *in, int inlen)
+static int mlx5_ib_create_mkey(struct mlx5_ib_dev *dev,
+			       struct mlx5_ib_mkey *mkey, u32 *in, int inlen)
 {
 	int ret;
 
@@ -116,7 +114,7 @@ mlx5_ib_create_mkey(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey,
 
 static int
 mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
-		       struct mlx5_core_mkey *mkey,
+		       struct mlx5_ib_mkey *mkey,
 		       struct mlx5_async_ctx *async_ctx,
 		       u32 *in, int inlen, u32 *out, int outlen,
 		       struct mlx5_async_work *context)
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index a654367af056..7ab0a9b752f6 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -788,7 +788,7 @@ struct pf_frame {
 	int depth;
 };
 
-static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
+static bool mkey_is_eq(struct mlx5_ib_mkey *mmkey, u32 key)
 {
 	if (!mmkey)
 		return false;
@@ -797,7 +797,7 @@ static bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key)
 	return mmkey->key == key;
 }
 
-static int get_indirect_num_descs(struct mlx5_core_mkey *mmkey)
+static int get_indirect_num_descs(struct mlx5_ib_mkey *mmkey)
 {
 	struct mlx5_ib_mw *mw;
 	struct mlx5_ib_devx_mr *devx_mr;
@@ -831,7 +831,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
 {
 	int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0;
 	struct pf_frame *head = NULL, *frame;
-	struct mlx5_core_mkey *mmkey;
+	struct mlx5_ib_mkey *mmkey;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_klm *pklm;
 	u32 *out = NULL;
@@ -1703,8 +1703,8 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice,
 		    u32 lkey)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
-	struct mlx5_core_mkey *mmkey;
 	struct mlx5_ib_mr *mr = NULL;
+	struct mlx5_ib_mkey *mmkey;
 
 	xa_lock(&dev->odp_mkeys);
 	mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey));
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index bd99f713720b..70b6aff4940a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -357,19 +357,6 @@ struct mlx5_core_sig_ctx {
 	u32			sigerr_count;
 };
 
-enum {
-	MLX5_MKEY_MR = 1,
-	MLX5_MKEY_MW,
-	MLX5_MKEY_INDIRECT_DEVX,
-};
-
-struct mlx5_core_mkey {
-	u32			key;
-	u32			type;
-	struct wait_queue_head wait;
-	refcount_t usecount;
-};
-
 #define MLX5_24BIT_MASK		((1 << 24) - 1)
 
 enum mlx5_res_type {
-- 
cgit v1.2.3


From e80094a473eefad9d856ce3ab0d7afdbb64800c4 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 18 Oct 2021 14:10:02 -0700
Subject: ethernet: add a helper for assigning port addresses

We have 5 drivers which offset base MAC addr by port id.
Create a helper for them.

This helper takes care of overflows, which some drivers
did not do, please complain if that's going to break
anything!

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Shannon Nelson <snelson@pensando.io>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 23681c3d3b8a..2ad71cc90b37 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -551,6 +551,27 @@ static inline unsigned long compare_ether_header(const void *a, const void *b)
 #endif
 }
 
+/**
+ * eth_hw_addr_gen - Generate and assign Ethernet address to a port
+ * @dev: pointer to port's net_device structure
+ * @base_addr: base Ethernet address
+ * @id: offset to add to the base address
+ *
+ * Generate a MAC address using a base address and an offset and assign it
+ * to a net_device. Commonly used by switch drivers which need to compute
+ * addresses for all their ports. addr_assign_type is not changed.
+ */
+static inline void eth_hw_addr_gen(struct net_device *dev, const u8 *base_addr,
+				   unsigned int id)
+{
+	u64 u = ether_addr_to_u64(base_addr);
+	u8 addr[ETH_ALEN];
+
+	u += id;
+	u64_to_ether_addr(u, addr);
+	eth_hw_addr_set(dev, addr);
+}
+
 /**
  * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
  * @skb: Buffer to pad
-- 
cgit v1.2.3


From db9a02baa23267c695a44234a0f2f4607992780e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 6 Oct 2021 06:15:04 -0600
Subject: block: move bdev_read_only() into the header

This is called for every write in the fast path, move it inline next
to get_disk_ro() which is called internally.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 6 ------
 include/linux/genhd.h | 5 +++++
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 759bc06810f8..80943c123c3e 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1389,12 +1389,6 @@ void set_disk_ro(struct gendisk *disk, bool read_only)
 }
 EXPORT_SYMBOL(set_disk_ro);
 
-int bdev_read_only(struct block_device *bdev)
-{
-	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
-}
-EXPORT_SYMBOL(bdev_read_only);
-
 void inc_diskseq(struct gendisk *disk)
 {
 	disk->diskseq = atomic64_inc_return(&diskseq);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index cd4038fd5743..c70bc5fce4db 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -221,6 +221,11 @@ static inline int get_disk_ro(struct gendisk *disk)
 		test_bit(GD_READ_ONLY, &disk->state);
 }
 
+static inline int bdev_read_only(struct block_device *bdev)
+{
+	return bdev->bd_read_only || get_disk_ro(bdev->bd_disk);
+}
+
 extern void disk_block_events(struct gendisk *disk);
 extern void disk_unblock_events(struct gendisk *disk);
 extern void disk_flush_events(struct gendisk *disk, unsigned int mask);
-- 
cgit v1.2.3


From e028f167eca5fc56938e6c1680d40eed0bc39e80 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Sat, 16 Oct 2021 16:38:14 -0600
Subject: block: move blk_mq_tag_to_rq() inline

This is in the fast path of driver issue or completion, and it's a single
array index operation. Move it inline to avoid a function call for it.

This does mean making struct blk_mq_tags block layer public, but there's
not really much in there.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-tag.h     | 23 -----------------------
 block/blk-mq.c         | 11 -----------
 include/linux/blk-mq.h | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
index 78ae2fb8e2a4..df787b5a23bd 100644
--- a/block/blk-mq-tag.h
+++ b/block/blk-mq-tag.h
@@ -4,29 +4,6 @@
 
 struct blk_mq_alloc_data;
 
-/*
- * Tag address space map.
- */
-struct blk_mq_tags {
-	unsigned int nr_tags;
-	unsigned int nr_reserved_tags;
-
-	atomic_t active_queues;
-
-	struct sbitmap_queue bitmap_tags;
-	struct sbitmap_queue breserved_tags;
-
-	struct request **rqs;
-	struct request **static_rqs;
-	struct list_head page_list;
-
-	/*
-	 * used to clear request reference in rqs[] before freeing one
-	 * request pool
-	 */
-	spinlock_t lock;
-};
-
 extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
 					unsigned int reserved_tags,
 					int node, int alloc_policy);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 104019c0ea41..8f5c1662335b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1120,17 +1120,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
 }
 EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
-{
-	if (tag < tags->nr_tags) {
-		prefetch(tags->rqs[tag]);
-		return tags->rqs[tag];
-	}
-
-	return NULL;
-}
-EXPORT_SYMBOL(blk_mq_tag_to_rq);
-
 static bool blk_mq_rq_inflight(struct blk_mq_hw_ctx *hctx, struct request *rq,
 			       void *priv, bool reserved)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 656fe34bdb6c..6cf35de151a9 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -7,6 +7,7 @@
 #include <linux/srcu.h>
 #include <linux/lockdep.h>
 #include <linux/scatterlist.h>
+#include <linux/prefetch.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -675,7 +676,40 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 		unsigned int op, blk_mq_req_flags_t flags,
 		unsigned int hctx_idx);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+
+/*
+ * Tag address space map.
+ */
+struct blk_mq_tags {
+	unsigned int nr_tags;
+	unsigned int nr_reserved_tags;
+
+	atomic_t active_queues;
+
+	struct sbitmap_queue bitmap_tags;
+	struct sbitmap_queue breserved_tags;
+
+	struct request **rqs;
+	struct request **static_rqs;
+	struct list_head page_list;
+
+	/*
+	 * used to clear request reference in rqs[] before freeing one
+	 * request pool
+	 */
+	spinlock_t lock;
+};
+
+static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
+					       unsigned int tag)
+{
+	if (tag < tags->nr_tags) {
+		prefetch(tags->rqs[tag]);
+		return tags->rqs[tag];
+	}
+
+	return NULL;
+}
 
 enum {
 	BLK_MQ_UNIQUE_TAG_BITS = 16,
-- 
cgit v1.2.3


From bc490f81731e181b07b8d7577425c06ae91692c8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 18 Oct 2021 10:12:12 -0600
Subject: block: change plugging to use a singly linked list

Use a singly linked list for the blk_plug. This saves 8 bytes in the
blk_plug struct, and makes for faster list manipulations than doubly
linked lists. As we don't use the doubly linked lists for anything,
singly linked is just fine.

This yields a bump in default (merging enabled) performance from 7.0
to 7.1M IOPS, and ~7.5M IOPS with merging disabled.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  4 +--
 block/blk-merge.c      |  4 +--
 block/blk-mq.c         | 80 ++++++++++++++++++++++++++++----------------------
 include/linux/blkdev.h |  5 ++--
 4 files changed, 51 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index d0c2e11411d0..14d20909f61a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1550,7 +1550,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
 	if (tsk->plug)
 		return;
 
-	INIT_LIST_HEAD(&plug->mq_list);
+	plug->mq_list = NULL;
 	plug->cached_rq = NULL;
 	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
 	plug->rq_count = 0;
@@ -1640,7 +1640,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	flush_plug_callbacks(plug, from_schedule);
 
-	if (!list_empty(&plug->mq_list))
+	if (!rq_list_empty(plug->mq_list))
 		blk_mq_flush_plug_list(plug, from_schedule);
 	if (unlikely(!from_schedule && plug->cached_rq))
 		blk_mq_free_plug_rqs(plug);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index c273b58378ce..3e6fa449caff 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -1090,11 +1090,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 	struct request *rq;
 
 	plug = blk_mq_plug(q, bio);
-	if (!plug || list_empty(&plug->mq_list))
+	if (!plug || rq_list_empty(plug->mq_list))
 		return false;
 
 	/* check the previously added entry for a quick merge attempt */
-	rq = list_last_entry(&plug->mq_list, struct request, queuelist);
+	rq = rq_list_peek(&plug->mq_list);
 	if (rq->q == q) {
 		/*
 		 * Only blk-mq multiple hardware queues case checks the rq in
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8f5c1662335b..7fa302730d4a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2151,34 +2151,46 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
+	struct blk_mq_hw_ctx *this_hctx;
+	struct blk_mq_ctx *this_ctx;
+	unsigned int depth;
 	LIST_HEAD(list);
 
-	if (list_empty(&plug->mq_list))
+	if (rq_list_empty(plug->mq_list))
 		return;
-	list_splice_init(&plug->mq_list, &list);
 	plug->rq_count = 0;
 
+	this_hctx = NULL;
+	this_ctx = NULL;
+	depth = 0;
 	do {
-		struct list_head rq_list;
-		struct request *rq, *head_rq = list_entry_rq(list.next);
-		struct list_head *pos = &head_rq->queuelist; /* skip first */
-		struct blk_mq_hw_ctx *this_hctx = head_rq->mq_hctx;
-		struct blk_mq_ctx *this_ctx = head_rq->mq_ctx;
-		unsigned int depth = 1;
-
-		list_for_each_continue(pos, &list) {
-			rq = list_entry_rq(pos);
-			BUG_ON(!rq->q);
-			if (rq->mq_hctx != this_hctx || rq->mq_ctx != this_ctx)
-				break;
-			depth++;
+		struct request *rq;
+
+		rq = rq_list_pop(&plug->mq_list);
+
+		if (!this_hctx) {
+			this_hctx = rq->mq_hctx;
+			this_ctx = rq->mq_ctx;
+		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
+			trace_block_unplug(this_hctx->queue, depth,
+						!from_schedule);
+			blk_mq_sched_insert_requests(this_hctx, this_ctx,
+						&list, from_schedule);
+			depth = 0;
+			this_hctx = rq->mq_hctx;
+			this_ctx = rq->mq_ctx;
+
 		}
 
-		list_cut_before(&rq_list, &list, pos);
-		trace_block_unplug(head_rq->q, depth, !from_schedule);
-		blk_mq_sched_insert_requests(this_hctx, this_ctx, &rq_list,
+		list_add(&rq->queuelist, &list);
+		depth++;
+	} while (!rq_list_empty(plug->mq_list));
+
+	if (!list_empty(&list)) {
+		trace_block_unplug(this_hctx->queue, depth, !from_schedule);
+		blk_mq_sched_insert_requests(this_hctx, this_ctx, &list,
 						from_schedule);
-	} while(!list_empty(&list));
+	}
 }
 
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
@@ -2358,16 +2370,15 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 
 static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 {
-	list_add_tail(&rq->queuelist, &plug->mq_list);
-	plug->rq_count++;
-	if (!plug->multiple_queues && !list_is_singular(&plug->mq_list)) {
-		struct request *tmp;
+	if (!plug->multiple_queues) {
+		struct request *nxt = rq_list_peek(&plug->mq_list);
 
-		tmp = list_first_entry(&plug->mq_list, struct request,
-						queuelist);
-		if (tmp->q != rq->q)
+		if (nxt && nxt->q != rq->q)
 			plug->multiple_queues = true;
 	}
+	rq->rq_next = NULL;
+	rq_list_add(&plug->mq_list, rq);
+	plug->rq_count++;
 }
 
 /*
@@ -2479,13 +2490,15 @@ void blk_mq_submit_bio(struct bio *bio)
 		unsigned int request_count = plug->rq_count;
 		struct request *last = NULL;
 
-		if (!request_count)
+		if (!request_count) {
 			trace_block_plug(q);
-		else
-			last = list_entry_rq(plug->mq_list.prev);
+		} else if (!blk_queue_nomerges(q)) {
+			last = rq_list_peek(&plug->mq_list);
+			if (blk_rq_bytes(last) < BLK_PLUG_FLUSH_SIZE)
+				last = NULL;
+		}
 
-		if (request_count >= blk_plug_max_rq_count(plug) || (last &&
-		    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
+		if (request_count >= blk_plug_max_rq_count(plug) || last) {
 			blk_flush_plug_list(plug, false);
 			trace_block_plug(q);
 		}
@@ -2505,10 +2518,7 @@ void blk_mq_submit_bio(struct bio *bio)
 		 * the plug list is empty, and same_queue_rq is invalid.
 		 */
 		if (same_queue_rq) {
-			next_rq = list_last_entry(&plug->mq_list,
-							struct request,
-							queuelist);
-			list_del_init(&next_rq->queuelist);
+			next_rq = rq_list_pop(&plug->mq_list);
 			plug->rq_count--;
 		}
 		blk_add_rq_to_plug(plug, rq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index fd9771a1da09..4027112b9851 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -728,7 +728,7 @@ extern void blk_set_queue_dying(struct request_queue *);
  * schedule() where blk_schedule_flush_plug() is called.
  */
 struct blk_plug {
-	struct list_head mq_list; /* blk-mq requests */
+	struct request *mq_list; /* blk-mq requests */
 
 	/* if ios_left is > 1, we can batch tag/rq allocations */
 	struct request *cached_rq;
@@ -777,8 +777,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 	struct blk_plug *plug = tsk->plug;
 
 	return plug &&
-		 (!list_empty(&plug->mq_list) ||
-		 !list_empty(&plug->cb_list));
+		 (plug->mq_list || !list_empty(&plug->cb_list));
 }
 
 int blkdev_issue_flush(struct block_device *bdev);
-- 
cgit v1.2.3


From dc5fc361d891e089dfd9c0a975dc78041036b906 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 19 Oct 2021 06:02:30 -0600
Subject: block: attempt direct issue of plug list

If we have just one queue type in the plug list, then we can extend our
direct issue to cover a full plug list as well. This allows sending a
batch of requests for direct issue, which is more efficient than doing
one-at-a-time kind of issue.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       |  1 +
 block/blk-mq.c         | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h |  1 +
 3 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 14d20909f61a..e6ad5b51d0c3 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1555,6 +1555,7 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
 	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
 	plug->rq_count = 0;
 	plug->multiple_queues = false;
+	plug->has_elevator = false;
 	plug->nowait = false;
 	INIT_LIST_HEAD(&plug->cb_list);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7fa302730d4a..71ab7521dd3d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2149,6 +2149,58 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 	spin_unlock(&ctx->lock);
 }
 
+static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
+			      bool from_schedule)
+{
+	if (hctx->queue->mq_ops->commit_rqs) {
+		trace_block_unplug(hctx->queue, *queued, !from_schedule);
+		hctx->queue->mq_ops->commit_rqs(hctx);
+	}
+	*queued = 0;
+}
+
+static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
+{
+	struct blk_mq_hw_ctx *hctx = NULL;
+	struct request *rq;
+	int queued = 0;
+	int errors = 0;
+
+	while ((rq = rq_list_pop(&plug->mq_list))) {
+		bool last = rq_list_empty(plug->mq_list);
+		blk_status_t ret;
+
+		if (hctx != rq->mq_hctx) {
+			if (hctx)
+				blk_mq_commit_rqs(hctx, &queued, from_schedule);
+			hctx = rq->mq_hctx;
+		}
+
+		ret = blk_mq_request_issue_directly(rq, last);
+		switch (ret) {
+		case BLK_STS_OK:
+			queued++;
+			break;
+		case BLK_STS_RESOURCE:
+		case BLK_STS_DEV_RESOURCE:
+			blk_mq_request_bypass_insert(rq, false, last);
+			blk_mq_commit_rqs(hctx, &queued, from_schedule);
+			return;
+		default:
+			blk_mq_end_request(rq, ret);
+			errors++;
+			break;
+		}
+	}
+
+	/*
+	 * If we didn't flush the entire list, we could have told the driver
+	 * there was more coming, but that turned out to be a lie.
+	 */
+	if (errors)
+		blk_mq_commit_rqs(hctx, &queued, from_schedule);
+}
+
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
 	struct blk_mq_hw_ctx *this_hctx;
@@ -2160,6 +2212,12 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 		return;
 	plug->rq_count = 0;
 
+	if (!plug->multiple_queues && !plug->has_elevator) {
+		blk_mq_plug_issue_direct(plug, from_schedule);
+		if (rq_list_empty(plug->mq_list))
+			return;
+	}
+
 	this_hctx = NULL;
 	this_ctx = NULL;
 	depth = 0;
@@ -2376,6 +2434,8 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
 		if (nxt && nxt->q != rq->q)
 			plug->multiple_queues = true;
 	}
+	if (!plug->has_elevator && (rq->rq_flags & RQF_ELV))
+		plug->has_elevator = true;
 	rq->rq_next = NULL;
 	rq_list_add(&plug->mq_list, rq);
 	plug->rq_count++;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4027112b9851..f13091d3d476 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -737,6 +737,7 @@ struct blk_plug {
 	unsigned short rq_count;
 
 	bool multiple_queues;
+	bool has_elevator;
 	bool nowait;
 
 	struct list_head cb_list; /* md requires an unplug callback */
-- 
cgit v1.2.3


From cd45c9bf8b43cd387e167cf166ae5c517f56d658 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 18 Oct 2021 16:33:22 +0200
Subject: ASoC: Intel: Move soc_intel_is_foo() helpers to a generic header

The soc_intel_is_foo() helpers from
sound/soc/intel/common/soc-intel-quirks.h are useful outside of the
sound subsystem too.

Move these to include/linux/platform_data/x86/soc.h, so that
other code can use them too.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Link: https://lore.kernel.org/r/20211018143324.296961-2-hdegoede@redhat.com
---
 include/linux/platform_data/x86/soc.h     | 65 +++++++++++++++++++++++++++++++
 sound/soc/intel/common/soc-intel-quirks.h | 51 ++----------------------
 2 files changed, 68 insertions(+), 48 deletions(-)
 create mode 100644 include/linux/platform_data/x86/soc.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h
new file mode 100644
index 000000000000..da05f425587a
--- /dev/null
+++ b/include/linux/platform_data/x86/soc.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers for Intel SoC model detection
+ *
+ * Copyright (c) 2019, Intel Corporation.
+ */
+
+#ifndef __PLATFORM_DATA_X86_SOC_H
+#define __PLATFORM_DATA_X86_SOC_H
+
+#if IS_ENABLED(CONFIG_X86)
+
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+
+#define SOC_INTEL_IS_CPU(soc, type)				\
+static inline bool soc_intel_is_##soc(void)			\
+{								\
+	static const struct x86_cpu_id soc##_cpu_ids[] = {	\
+		X86_MATCH_INTEL_FAM6_MODEL(type, NULL),		\
+		{}						\
+	};							\
+	const struct x86_cpu_id *id;				\
+								\
+	id = x86_match_cpu(soc##_cpu_ids);			\
+	if (id)							\
+		return true;					\
+	return false;						\
+}
+
+SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT);
+SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT);
+SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT);
+SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS);
+SOC_INTEL_IS_CPU(cml, KABYLAKE_L);
+
+#else /* IS_ENABLED(CONFIG_X86) */
+
+static inline bool soc_intel_is_byt(void)
+{
+	return false;
+}
+
+static inline bool soc_intel_is_cht(void)
+{
+	return false;
+}
+
+static inline bool soc_intel_is_apl(void)
+{
+	return false;
+}
+
+static inline bool soc_intel_is_glk(void)
+{
+	return false;
+}
+
+static inline bool soc_intel_is_cml(void)
+{
+	return false;
+}
+#endif /* IS_ENABLED(CONFIG_X86) */
+
+#endif /* __PLATFORM_DATA_X86_SOC_H */
diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h
index a93987ab7f4d..de4e550c5b34 100644
--- a/sound/soc/intel/common/soc-intel-quirks.h
+++ b/sound/soc/intel/common/soc-intel-quirks.h
@@ -9,34 +9,13 @@
 #ifndef _SND_SOC_INTEL_QUIRKS_H
 #define _SND_SOC_INTEL_QUIRKS_H
 
+#include <linux/platform_data/x86/soc.h>
+
 #if IS_ENABLED(CONFIG_X86)
 
 #include <linux/dmi.h>
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
 #include <asm/iosf_mbi.h>
 
-#define SOC_INTEL_IS_CPU(soc, type)				\
-static inline bool soc_intel_is_##soc(void)			\
-{								\
-	static const struct x86_cpu_id soc##_cpu_ids[] = {	\
-		X86_MATCH_INTEL_FAM6_MODEL(type, NULL),		\
-		{}						\
-	};							\
-	const struct x86_cpu_id *id;				\
-								\
-	id = x86_match_cpu(soc##_cpu_ids);			\
-	if (id)							\
-		return true;					\
-	return false;						\
-}
-
-SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT);
-SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT);
-SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT);
-SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS);
-SOC_INTEL_IS_CPU(cml, KABYLAKE_L);
-
 static inline bool soc_intel_is_byt_cr(struct platform_device *pdev)
 {
 	/*
@@ -114,30 +93,6 @@ static inline bool soc_intel_is_byt_cr(struct platform_device *pdev)
 	return false;
 }
 
-static inline bool soc_intel_is_byt(void)
-{
-	return false;
-}
-
-static inline bool soc_intel_is_cht(void)
-{
-	return false;
-}
-
-static inline bool soc_intel_is_apl(void)
-{
-	return false;
-}
-
-static inline bool soc_intel_is_glk(void)
-{
-	return false;
-}
-
-static inline bool soc_intel_is_cml(void)
-{
-	return false;
-}
 #endif
 
- #endif /* _SND_SOC_INTEL_QUIRKS_H */
+#endif /* _SND_SOC_INTEL_QUIRKS_H */
-- 
cgit v1.2.3


From aaa2975f2b07b04ee16b2cad1072cbdea3e1c50a Mon Sep 17 00:00:00 2001
From: Lasse Collin <lasse.collin@tukaani.org>
Date: Mon, 11 Oct 2021 05:31:42 +0800
Subject: lib/xz: Add MicroLZMA decoder

MicroLZMA is a yet another header format variant where the first
byte of a raw LZMA stream (without the end of stream marker) has
been replaced with a bitwise-negation of the lc/lp/pb properties
byte. MicroLZMA was created to be used in EROFS but can be used
by other things too where wasting minimal amount of space for
headers is important.

This is implemented using most of the LZMA2 code as is so the
amount of new code is small. The API has a few extra features
compared to the XZ decoder. On the other hand, the API lacks
XZ_BUF_ERROR support which is important to take into account
when using this API.

MicroLZMA doesn't support BCJ filters. In theory they could be
added later as there are many unused/reserved values for the
first byte of the compressed stream but in practice it is
somewhat unlikely to happen due to a few implementation reasons.

Link: https://lore.kernel.org/r/20211010213145.17462-5-xiang@kernel.org
Signed-off-by: Lasse Collin <lasse.collin@tukaani.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 include/linux/xz.h    | 106 ++++++++++++++++++++++++++++++++++
 lib/xz/Kconfig        |  13 +++++
 lib/xz/xz_dec_lzma2.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/xz/xz_dec_syms.c  |   9 ++-
 lib/xz/xz_private.h   |   3 +
 5 files changed, 284 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/xz.h b/include/linux/xz.h
index 9884c8440188..7285ca5d56e9 100644
--- a/include/linux/xz.h
+++ b/include/linux/xz.h
@@ -233,6 +233,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s);
  */
 XZ_EXTERN void xz_dec_end(struct xz_dec *s);
 
+/*
+ * Decompressor for MicroLZMA, an LZMA variant with a very minimal header.
+ * See xz_dec_microlzma_alloc() below for details.
+ *
+ * These functions aren't used or available in preboot code and thus aren't
+ * marked with XZ_EXTERN. This avoids warnings about static functions that
+ * are never defined.
+ */
+/**
+ * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state
+ */
+struct xz_dec_microlzma;
+
+/**
+ * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder
+ * @mode        XZ_SINGLE or XZ_PREALLOC
+ * @dict_size   LZMA dictionary size. This must be at least 4 KiB and
+ *              at most 3 GiB.
+ *
+ * In contrast to xz_dec_init(), this function only allocates the memory
+ * and remembers the dictionary size. xz_dec_microlzma_reset() must be used
+ * before calling xz_dec_microlzma_run().
+ *
+ * The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE.
+ * With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated.
+ *
+ * On success, xz_dec_microlzma_alloc() returns a pointer to
+ * struct xz_dec_microlzma. If memory allocation fails or
+ * dict_size is invalid, NULL is returned.
+ *
+ * The compressed format supported by this decoder is a raw LZMA stream
+ * whose first byte (always 0x00) has been replaced with bitwise-negation
+ * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is
+ * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00.
+ * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream
+ * marker must not be used. The unused values are reserved for future use.
+ * This MicroLZMA header format was created for use in EROFS but may be used
+ * by others too.
+ */
+extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
+						       uint32_t dict_size);
+
+/**
+ * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state
+ * @s           Decoder state allocated using xz_dec_microlzma_alloc()
+ * @comp_size   Compressed size of the input stream
+ * @uncomp_size Uncompressed size of the input stream. A value smaller
+ *              than the real uncompressed size of the input stream can
+ *              be specified if uncomp_size_is_exact is set to false.
+ *              uncomp_size can never be set to a value larger than the
+ *              expected real uncompressed size because it would eventually
+ *              result in XZ_DATA_ERROR.
+ * @uncomp_size_is_exact  This is an int instead of bool to avoid
+ *              requiring stdbool.h. This should normally be set to true.
+ *              When this is set to false, error detection is weaker.
+ */
+extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s,
+				   uint32_t comp_size, uint32_t uncomp_size,
+				   int uncomp_size_is_exact);
+
+/**
+ * xz_dec_microlzma_run() - Run the MicroLZMA decoder
+ * @s           Decoder state initialized using xz_dec_microlzma_reset()
+ * @b:          Input and output buffers
+ *
+ * This works similarly to xz_dec_run() with a few important differences.
+ * Only the differences are documented here.
+ *
+ * The only possible return values are XZ_OK, XZ_STREAM_END, and
+ * XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress
+ * is possible due to lack of input data or output space, this function will
+ * keep returning XZ_OK. Thus, the calling code must be written so that it
+ * will eventually provide input and output space matching (or exceeding)
+ * comp_size and uncomp_size arguments given to xz_dec_microlzma_reset().
+ * If the caller cannot do this (for example, if the input file is truncated
+ * or otherwise corrupt), the caller must detect this error by itself to
+ * avoid an infinite loop.
+ *
+ * If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned.
+ * This can happen also when incorrect dictionary, uncompressed, or
+ * compressed sizes have been specified.
+ *
+ * With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over
+ * uncompressed data. This way the caller doesn't need to provide a temporary
+ * output buffer for the bytes that will be ignored.
+ *
+ * With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK
+ * is also possible and thus XZ_SINGLE is actually a limited multi-call mode.
+ * After XZ_OK the bytes decoded so far may be read from the output buffer.
+ * It is possible to continue decoding but the variables b->out and b->out_pos
+ * MUST NOT be changed by the caller. Increasing the value of b->out_size is
+ * allowed to make more output space available; one doesn't need to provide
+ * space for the whole uncompressed data on the first call. The input buffer
+ * may be changed normally like with XZ_PREALLOC. This way input data can be
+ * provided from non-contiguous memory.
+ */
+extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s,
+					struct xz_buf *b);
+
+/**
+ * xz_dec_microlzma_end() - Free the memory allocated for the decoder state
+ * @s:          Decoder state allocated using xz_dec_microlzma_alloc().
+ *              If s is NULL, this function does nothing.
+ */
+extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s);
+
 /*
  * Standalone build (userspace build or in-kernel build for boot time use)
  * needs a CRC32 implementation. For normal in-kernel use, kernel's own
diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig
index 5cb50245a878..adce22ac18d6 100644
--- a/lib/xz/Kconfig
+++ b/lib/xz/Kconfig
@@ -39,6 +39,19 @@ config XZ_DEC_SPARC
 	default y
 	select XZ_DEC_BCJ
 
+config XZ_DEC_MICROLZMA
+	bool "MicroLZMA decoder"
+	default n
+	help
+	  MicroLZMA is a header format variant where the first byte
+	  of a raw LZMA stream (without the end of stream marker) has
+	  been replaced with a bitwise-negation of the lc/lp/pb
+	  properties byte. MicroLZMA was created to be used in EROFS
+	  but can be used by other things too where wasting minimal
+	  amount of space for headers is important.
+
+	  Unless you know that you need this, say N.
+
 endif
 
 config XZ_DEC_BCJ
diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c
index 22b789645ce5..46b186d7eb45 100644
--- a/lib/xz/xz_dec_lzma2.c
+++ b/lib/xz/xz_dec_lzma2.c
@@ -248,6 +248,10 @@ struct lzma2_dec {
 	 * before the first LZMA chunk.
 	 */
 	bool need_props;
+
+#ifdef XZ_DEC_MICROLZMA
+	bool pedantic_microlzma;
+#endif
 };
 
 struct xz_dec_lzma2 {
@@ -419,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b,
 	}
 }
 
+#ifdef XZ_DEC_MICROLZMA
+#	define DICT_FLUSH_SUPPORTS_SKIPPING true
+#else
+#	define DICT_FLUSH_SUPPORTS_SKIPPING false
+#endif
+
 /*
  * Flush pending data from dictionary to b->out. It is assumed that there is
  * enough space in b->out. This is guaranteed because caller uses dict_limit()
@@ -437,9 +447,14 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b)
 		 * decompression because in multi-call mode dict->buf
 		 * has been allocated by us in this file; it's not
 		 * provided by the caller like in single-call mode.
+		 *
+		 * With MicroLZMA, b->out can be NULL to skip bytes that
+		 * the caller doesn't need. This cannot be done with XZ
+		 * because it would break BCJ filters.
 		 */
-		memcpy(b->out + b->out_pos, dict->buf + dict->start,
-				copy_size);
+		if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL)
+			memcpy(b->out + b->out_pos, dict->buf + dict->start,
+					copy_size);
 	}
 
 	dict->start = dict->pos;
@@ -1190,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s)
 
 	kfree(s);
 }
+
+#ifdef XZ_DEC_MICROLZMA
+/* This is a wrapper struct to have a nice struct name in the public API. */
+struct xz_dec_microlzma {
+	struct xz_dec_lzma2 s;
+};
+
+enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr,
+				 struct xz_buf *b)
+{
+	struct xz_dec_lzma2 *s = &s_ptr->s;
+
+	/*
+	 * sequence is SEQ_PROPERTIES before the first input byte,
+	 * SEQ_LZMA_PREPARE until a total of five bytes have been read,
+	 * and SEQ_LZMA_RUN for the rest of the input stream.
+	 */
+	if (s->lzma2.sequence != SEQ_LZMA_RUN) {
+		if (s->lzma2.sequence == SEQ_PROPERTIES) {
+			/* One byte is needed for the props. */
+			if (b->in_pos >= b->in_size)
+				return XZ_OK;
+
+			/*
+			 * Don't increment b->in_pos here. The same byte is
+			 * also passed to rc_read_init() which will ignore it.
+			 */
+			if (!lzma_props(s, ~b->in[b->in_pos]))
+				return XZ_DATA_ERROR;
+
+			s->lzma2.sequence = SEQ_LZMA_PREPARE;
+		}
+
+		/*
+		 * xz_dec_microlzma_reset() doesn't validate the compressed
+		 * size so we do it here. We have to limit the maximum size
+		 * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice
+		 * round number and much more than users of this code should
+		 * ever need.
+		 */
+		if (s->lzma2.compressed < RC_INIT_BYTES
+				|| s->lzma2.compressed > (3U << 30))
+			return XZ_DATA_ERROR;
+
+		if (!rc_read_init(&s->rc, b))
+			return XZ_OK;
+
+		s->lzma2.compressed -= RC_INIT_BYTES;
+		s->lzma2.sequence = SEQ_LZMA_RUN;
+
+		dict_reset(&s->dict, b);
+	}
+
+	/* This is to allow increasing b->out_size between calls. */
+	if (DEC_IS_SINGLE(s->dict.mode))
+		s->dict.end = b->out_size - b->out_pos;
+
+	while (true) {
+		dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos,
+					   s->lzma2.uncompressed));
+
+		if (!lzma2_lzma(s, b))
+			return XZ_DATA_ERROR;
+
+		s->lzma2.uncompressed -= dict_flush(&s->dict, b);
+
+		if (s->lzma2.uncompressed == 0) {
+			if (s->lzma2.pedantic_microlzma) {
+				if (s->lzma2.compressed > 0 || s->lzma.len > 0
+						|| !rc_is_finished(&s->rc))
+					return XZ_DATA_ERROR;
+			}
+
+			return XZ_STREAM_END;
+		}
+
+		if (b->out_pos == b->out_size)
+			return XZ_OK;
+
+		if (b->in_pos == b->in_size
+				&& s->temp.size < s->lzma2.compressed)
+			return XZ_OK;
+	}
+}
+
+struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode,
+						uint32_t dict_size)
+{
+	struct xz_dec_microlzma *s;
+
+	/* Restrict dict_size to the same range as in the LZMA2 code. */
+	if (dict_size < 4096 || dict_size > (3U << 30))
+		return NULL;
+
+	s = kmalloc(sizeof(*s), GFP_KERNEL);
+	if (s == NULL)
+		return NULL;
+
+	s->s.dict.mode = mode;
+	s->s.dict.size = dict_size;
+
+	if (DEC_IS_MULTI(mode)) {
+		s->s.dict.end = dict_size;
+
+		s->s.dict.buf = vmalloc(dict_size);
+		if (s->s.dict.buf == NULL) {
+			kfree(s);
+			return NULL;
+		}
+	}
+
+	return s;
+}
+
+void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size,
+			    uint32_t uncomp_size, int uncomp_size_is_exact)
+{
+	/*
+	 * comp_size is validated in xz_dec_microlzma_run().
+	 * uncomp_size can safely be anything.
+	 */
+	s->s.lzma2.compressed = comp_size;
+	s->s.lzma2.uncompressed = uncomp_size;
+	s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact;
+
+	s->s.lzma2.sequence = SEQ_PROPERTIES;
+	s->s.temp.size = 0;
+}
+
+void xz_dec_microlzma_end(struct xz_dec_microlzma *s)
+{
+	if (DEC_IS_MULTI(s->s.dict.mode))
+		vfree(s->s.dict.buf);
+
+	kfree(s);
+}
+#endif
diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c
index 32eb3c03aede..61098c67a413 100644
--- a/lib/xz/xz_dec_syms.c
+++ b/lib/xz/xz_dec_syms.c
@@ -15,8 +15,15 @@ EXPORT_SYMBOL(xz_dec_reset);
 EXPORT_SYMBOL(xz_dec_run);
 EXPORT_SYMBOL(xz_dec_end);
 
+#ifdef CONFIG_XZ_DEC_MICROLZMA
+EXPORT_SYMBOL(xz_dec_microlzma_alloc);
+EXPORT_SYMBOL(xz_dec_microlzma_reset);
+EXPORT_SYMBOL(xz_dec_microlzma_run);
+EXPORT_SYMBOL(xz_dec_microlzma_end);
+#endif
+
 MODULE_DESCRIPTION("XZ decompressor");
-MODULE_VERSION("1.0");
+MODULE_VERSION("1.1");
 MODULE_AUTHOR("Lasse Collin <lasse.collin@tukaani.org> and Igor Pavlov");
 
 /*
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index 09360ebb510e..bf1e94ec7873 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -37,6 +37,9 @@
 #		ifdef CONFIG_XZ_DEC_SPARC
 #			define XZ_DEC_SPARC
 #		endif
+#		ifdef CONFIG_XZ_DEC_MICROLZMA
+#			define XZ_DEC_MICROLZMA
+#		endif
 #		define memeq(a, b, size) (memcmp(a, b, size) == 0)
 #		define memzero(buf, size) memset(buf, 0, size)
 #	endif
-- 
cgit v1.2.3


From e70feb8b3e6886c525c88943b5f1508d02f5a683 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 14 Oct 2021 16:17:10 +0800
Subject: blk-mq: support concurrent queue quiesce/unquiesce

blk_mq_quiesce_queue() has been used a bit wide now, so far we don't support
concurrent/nested quiesce. One biggest issue is that unquiesce can happen
unexpectedly in case that quiesce/unquiesce are run concurrently from
more than one context.

This patch introduces q->mq_quiesce_depth to deal concurrent quiesce,
and we only unquiesce queue when it is the last/outer-most one of all
contexts.

Several kernel panic issue has been reported[1][2][3] when running stress
quiesce test. And this patch has been verified in these reports.

[1] https://lore.kernel.org/linux-block/9b21c797-e505-3821-4f5b-df7bf9380328@huawei.com/T/#m1fc52431fad7f33b1ffc3f12c4450e4238540787
[2] https://lore.kernel.org/linux-block/9b21c797-e505-3821-4f5b-df7bf9380328@huawei.com/T/#m10ad90afeb9c8cc318334190a7c24c8b5c5e0722
[3] https://listman.redhat.com/archives/dm-devel/2021-September/msg00189.html

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211014081710.1871747-7-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 22 +++++++++++++++++++---
 include/linux/blkdev.h |  2 ++
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index bf5936d72de8..31d9e612d236 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -241,7 +241,12 @@ EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
  */
 void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 {
-	blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	unsigned long flags;
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	if (!q->quiesce_depth++)
+		blk_queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 }
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
@@ -282,10 +287,21 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
  */
 void blk_mq_unquiesce_queue(struct request_queue *q)
 {
-	blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+	unsigned long flags;
+	bool run_queue = false;
+
+	spin_lock_irqsave(&q->queue_lock, flags);
+	if (WARN_ON_ONCE(q->quiesce_depth <= 0)) {
+		;
+	} else if (!--q->quiesce_depth) {
+		blk_queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+		run_queue = true;
+	}
+	spin_unlock_irqrestore(&q->queue_lock, flags);
 
 	/* dispatch requests which are inserted during quiescing */
-	blk_mq_run_hw_queues(q, true);
+	if (run_queue)
+		blk_mq_run_hw_queues(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f13091d3d476..2b22fa36e568 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -315,6 +315,8 @@ struct request_queue {
 	 */
 	struct mutex		mq_freeze_lock;
 
+	int			quiesce_depth;
+
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
 	struct bio_set		bio_split;
-- 
cgit v1.2.3


From 34cdd18b8d245f3e901e5325313c27de727ab80d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 17 Jun 2020 16:56:16 -0400
Subject: tracing: Use linker magic instead of recasting ftrace_ops_list_func()

In an effort to enable -Wcast-function-type in the top-level Makefile to
support Control Flow Integrity builds, all function casts need to be
removed.

This means that ftrace_ops_list_func() can no longer be defined as
ftrace_ops_no_ops(). The reason for ftrace_ops_no_ops() is to use that when
an architecture calls ftrace_ops_list_func() with only two parameters
(called from assembly). And to make sure there's no C side-effects, those
archs call ftrace_ops_no_ops() which only has two parameters, as
ftrace_ops_list_func() has four parameters.

Instead of a typecast, use vmlinux.lds.h to define ftrace_ops_list_func() to
arch_ftrace_ops_list_func() that will define the proper set of parameters.

Link: https://lore.kernel.org/r/20200614070154.6039-1-oscar.carter@gmx.com
Link: https://lkml.kernel.org/r/20200617165616.52241bde@oasis.local.home
Link: https://lore.kernel.org/all/20211005053922.GA702049@embeddedor/

Requested-by: Oscar Carter <oscar.carter@gmx.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h | 10 ++++++++--
 include/linux/ftrace.h            | 12 ++++++++++--
 kernel/trace/ftrace.c             | 23 ++++++++++-------------
 3 files changed, 28 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f2984af2b85b..8771c435f34b 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -164,16 +164,22 @@
  * Need to also make ftrace_stub_graph point to ftrace_stub
  * so that the same stub location may have different protocols
  * and not mess up with C verifiers.
+ *
+ * ftrace_ops_list_func will be defined as arch_ftrace_ops_list_func
+ * as some archs will have a different prototype for that function
+ * but ftrace_ops_list_func() will have a single prototype.
  */
 #define MCOUNT_REC()	. = ALIGN(8);				\
 			__start_mcount_loc = .;			\
 			KEEP(*(__mcount_loc))			\
 			KEEP(*(__patchable_function_entries))	\
 			__stop_mcount_loc = .;			\
-			ftrace_stub_graph = ftrace_stub;
+			ftrace_stub_graph = ftrace_stub;	\
+			ftrace_ops_list_func = arch_ftrace_ops_list_func;
 #else
 # ifdef CONFIG_FUNCTION_TRACER
-#  define MCOUNT_REC()	ftrace_stub_graph = ftrace_stub;
+#  define MCOUNT_REC()	ftrace_stub_graph = ftrace_stub;	\
+			ftrace_ops_list_func = arch_ftrace_ops_list_func;
 # else
 #  define MCOUNT_REC()
 # endif
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 832e65f06754..12fcfa2d23ea 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -30,16 +30,26 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 0
 #endif
 
+#ifdef CONFIG_FUNCTION_TRACER
+struct ftrace_ops;
+struct ftrace_regs;
 /*
  * If the arch's mcount caller does not support all of ftrace's
  * features, then it must call an indirect function that
  * does. Or at least does enough to prevent any unwelcome side effects.
+ *
+ * Also define the function prototype that these architectures use
+ * to call the ftrace_ops_list_func().
  */
 #if !ARCH_SUPPORTS_FTRACE_OPS
 # define FTRACE_FORCE_LIST_FUNC 1
+void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
 #else
 # define FTRACE_FORCE_LIST_FUNC 0
+void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+			       struct ftrace_ops *op, struct ftrace_regs *fregs);
 #endif
+#endif /* CONFIG_FUNCTION_TRACER */
 
 /* Main tracing buffer and events set up */
 #ifdef CONFIG_TRACING
@@ -88,8 +98,6 @@ extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void *buffer, size_t *lenp, loff_t *ppos);
 
-struct ftrace_ops;
-
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
 struct ftrace_regs {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2c3e9760df7f..8b5801881271 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -119,14 +119,9 @@ struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 struct ftrace_ops global_ops;
 
-#if ARCH_SUPPORTS_FTRACE_OPS
-static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
-				 struct ftrace_ops *op, struct ftrace_regs *fregs);
-#else
-/* See comment below, where ftrace_ops_list_func is defined */
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
-#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
-#endif
+/* Defined by vmlinux.lds.h see the commment above arch_ftrace_ops_list_func for details */
+void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
 static inline void ftrace_ops_init(struct ftrace_ops *ops)
 {
@@ -7032,21 +7027,23 @@ out:
  * Note, CONFIG_DYNAMIC_FTRACE_WITH_REGS expects a full regs to be saved.
  * An architecture can pass partial regs with ftrace_ops and still
  * set the ARCH_SUPPORTS_FTRACE_OPS.
+ *
+ * In vmlinux.lds.h, ftrace_ops_list_func() is defined to be
+ * arch_ftrace_ops_list_func.
  */
 #if ARCH_SUPPORTS_FTRACE_OPS
-static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
-				 struct ftrace_ops *op, struct ftrace_regs *fregs)
+void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+			       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
 	__ftrace_ops_list_func(ip, parent_ip, NULL, fregs);
 }
-NOKPROBE_SYMBOL(ftrace_ops_list_func);
 #else
-static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
 	__ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
 }
-NOKPROBE_SYMBOL(ftrace_ops_no_ops);
 #endif
+NOKPROBE_SYMBOL(arch_ftrace_ops_list_func);
 
 /*
  * If there's only one function registered but it does not support
-- 
cgit v1.2.3


From 9b84fadc444de5456ab5f5487e2108311c724c3f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 15 Oct 2021 13:42:40 -0400
Subject: tracing: Reuse logic from perf's get_recursion_context()

Instead of having branches that adds noise to the branch prediction, use
the addition logic to set the bit for the level of interrupt context that
the state is currently in. This copies the logic from perf's
get_recursion_context() function.

Link: https://lore.kernel.org/all/20211015161702.GF174703@worktop.programming.kicks-ass.net/

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 11 ++++++-----
 kernel/trace/ring_buffer.c      | 12 ++++++------
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index a9f9c5714e65..f6da7a03bff0 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -137,12 +137,13 @@ enum {
 static __always_inline int trace_get_context_bit(void)
 {
 	unsigned long pc = preempt_count();
+	unsigned char bit = 0;
 
-	if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
-		return TRACE_CTX_NORMAL;
-	else
-		return pc & NMI_MASK ? TRACE_CTX_NMI :
-			pc & HARDIRQ_MASK ? TRACE_CTX_IRQ : TRACE_CTX_SOFTIRQ;
+	bit += !!(pc & (NMI_MASK));
+	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+	return TRACE_CTX_NORMAL - bit;
 }
 
 #ifdef CONFIG_FTRACE_RECORD_RECURSION
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c5a3fbf19617..15d4380006e3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3168,13 +3168,13 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	unsigned int val = cpu_buffer->current_context;
 	unsigned long pc = preempt_count();
-	int bit;
+	int bit = 0;
 
-	if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
-		bit = RB_CTX_NORMAL;
-	else
-		bit = pc & NMI_MASK ? RB_CTX_NMI :
-			pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ;
+	bit += !!(pc & (NMI_MASK));
+	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+	bit = RB_CTX_NORMAL - bit;
 
 	if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) {
 		/*
-- 
cgit v1.2.3


From 91ebe8bcbff9d2ff21303e73bf7434f39a98b255 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 15 Oct 2021 15:01:19 -0400
Subject: tracing/perf: Add interrupt_context_level() helper

Now that there are three different instances of doing the addition trick
to the preempt_count() and NMI_MASK, HARDIRQ_MASK and SOFTIRQ_OFFSET
macros, it deserves a helper function defined in the preempt.h header.

Add the interrupt_context_level() helper and replace the three instances
that do that logic with it.

Link: https://lore.kernel.org/all/20211015142541.4badd8a9@gandalf.local.home/

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/preempt.h         | 21 +++++++++++++++++++++
 include/linux/trace_recursion.h |  7 +------
 kernel/events/internal.h        |  7 +------
 kernel/trace/ring_buffer.c      |  7 +------
 4 files changed, 24 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 4d244e295e85..b32e3dabe28b 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -77,6 +77,27 @@
 /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */
 #include <asm/preempt.h>
 
+/**
+ * interrupt_context_level - return interrupt context level
+ *
+ * Returns the current interrupt context level.
+ *  0 - normal context
+ *  1 - softirq context
+ *  2 - hardirq context
+ *  3 - NMI context
+ */
+static __always_inline unsigned char interrupt_context_level(void)
+{
+	unsigned long pc = preempt_count();
+	unsigned char level = 0;
+
+	level += !!(pc & (NMI_MASK));
+	level += !!(pc & (NMI_MASK | HARDIRQ_MASK));
+	level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+
+	return level;
+}
+
 #define nmi_count()	(preempt_count() & NMI_MASK)
 #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
 #ifdef CONFIG_PREEMPT_RT
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index f6da7a03bff0..1d8cce02c3fb 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -136,12 +136,7 @@ enum {
 
 static __always_inline int trace_get_context_bit(void)
 {
-	unsigned long pc = preempt_count();
-	unsigned char bit = 0;
-
-	bit += !!(pc & (NMI_MASK));
-	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
-	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+	unsigned char bit = interrupt_context_level();
 
 	return TRACE_CTX_NORMAL - bit;
 }
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 228801e20788..082832738c8f 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -205,12 +205,7 @@ DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
 
 static inline int get_recursion_context(int *recursion)
 {
-	unsigned int pc = preempt_count();
-	unsigned char rctx = 0;
-
-	rctx += !!(pc & (NMI_MASK));
-	rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK));
-	rctx += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+	unsigned char rctx = interrupt_context_level();
 
 	if (recursion[rctx])
 		return -1;
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 15d4380006e3..f6520d0a4c8c 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3167,12 +3167,7 @@ static __always_inline int
 trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	unsigned int val = cpu_buffer->current_context;
-	unsigned long pc = preempt_count();
-	int bit = 0;
-
-	bit += !!(pc & (NMI_MASK));
-	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK));
-	bit += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET));
+	int bit = interrupt_context_level();
 
 	bit = RB_CTX_NORMAL - bit;
 
-- 
cgit v1.2.3


From 15bf32398ad488c0df1cbaf16431422c87e4feea Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Tue, 12 Oct 2021 09:23:07 -0400
Subject: security: Return xattr name from security_dentry_init_security()

Right now security_dentry_init_security() only supports single security
label and is used by SELinux only. There are two users of this hook,
namely ceph and nfs.

NFS does not care about xattr name. Ceph hardcodes the xattr name to
security.selinux (XATTR_NAME_SELINUX).

I am making changes to fuse/virtiofs to send security label to virtiofsd
and I need to send xattr name as well. I also hardcoded the name of
xattr to security.selinux.

Stephen Smalley suggested that it probably is a good idea to modify
security_dentry_init_security() to also return name of xattr so that
we can avoid this hardcoding in the callers.

This patch adds a new parameter "const char **xattr_name" to
security_dentry_init_security() and LSM puts the name of xattr
too if caller asked for it (xattr_name != NULL).

Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christian Brauner <christian.brauner@ubuntu.com>
Acked-by: James Morris <jamorris@linux.microsoft.com>
[PM: fixed typos in the commit description]
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 fs/ceph/xattr.c               | 3 +--
 fs/nfs/nfs4proc.c             | 3 ++-
 include/linux/lsm_hook_defs.h | 3 ++-
 include/linux/lsm_hooks.h     | 3 +++
 include/linux/security.h      | 6 ++++--
 security/security.c           | 7 ++++---
 security/selinux/hooks.c      | 6 +++++-
 7 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 159a1ffa4f4b..fcf7dfdecf96 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1311,7 +1311,7 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
 	int err;
 
 	err = security_dentry_init_security(dentry, mode, &dentry->d_name,
-					    &as_ctx->sec_ctx,
+					    &name, &as_ctx->sec_ctx,
 					    &as_ctx->sec_ctxlen);
 	if (err < 0) {
 		WARN_ON_ONCE(err != -EOPNOTSUPP);
@@ -1335,7 +1335,6 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
 	 * It only supports single security module and only selinux has
 	 * dentry_init_security hook.
 	 */
-	name = XATTR_NAME_SELINUX;
 	name_len = strlen(name);
 	err = ceph_pagelist_reserve(pagelist,
 				    4 * 2 + name_len + as_ctx->sec_ctxlen);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e1214bb6b7ee..459860aa8fd7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -127,7 +127,8 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry,
 		return NULL;
 
 	err = security_dentry_init_security(dentry, sattr->ia_mode,
-				&dentry->d_name, (void **)&label->label, &label->len);
+				&dentry->d_name, NULL,
+				(void **)&label->label, &label->len);
 	if (err == 0)
 		return label;
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 4c7ed0268ce3..a9ac70ae01ab 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -83,7 +83,8 @@ LSM_HOOK(int, 0, sb_add_mnt_opt, const char *option, const char *val,
 LSM_HOOK(int, 0, move_mount, const struct path *from_path,
 	 const struct path *to_path)
 LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry,
-	 int mode, const struct qstr *name, void **ctx, u32 *ctxlen)
+	 int mode, const struct qstr *name, const char **xattr_name,
+	 void **ctx, u32 *ctxlen)
 LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode,
 	 struct qstr *name, const struct cred *old, struct cred *new)
 
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 528554e9b90c..0bada4df23fc 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -196,6 +196,9 @@
  *	@dentry dentry to use in calculating the context.
  *	@mode mode used to determine resource type.
  *	@name name of the last path component used to create file
+ *	@xattr_name pointer to place the pointer to security xattr name.
+ *		    Caller does not have to free the resulting pointer. Its
+ *		    a pointer to static string.
  *	@ctx pointer to place the pointer to the resulting context in.
  *	@ctxlen point to place the length of the resulting context.
  * @dentry_create_files_as:
diff --git a/include/linux/security.h b/include/linux/security.h
index cc6d39358336..7e0ba63b5dde 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -317,8 +317,9 @@ int security_add_mnt_opt(const char *option, const char *val,
 				int len, void **mnt_opts);
 int security_move_mount(const struct path *from_path, const struct path *to_path);
 int security_dentry_init_security(struct dentry *dentry, int mode,
-					const struct qstr *name, void **ctx,
-					u32 *ctxlen);
+				  const struct qstr *name,
+				  const char **xattr_name, void **ctx,
+				  u32 *ctxlen);
 int security_dentry_create_files_as(struct dentry *dentry, int mode,
 					struct qstr *name,
 					const struct cred *old,
@@ -739,6 +740,7 @@ static inline void security_inode_free(struct inode *inode)
 static inline int security_dentry_init_security(struct dentry *dentry,
 						 int mode,
 						 const struct qstr *name,
+						 const char **xattr_name,
 						 void **ctx,
 						 u32 *ctxlen)
 {
diff --git a/security/security.c b/security/security.c
index d9d53c1e466a..95e30fadba78 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1052,11 +1052,12 @@ void security_inode_free(struct inode *inode)
 }
 
 int security_dentry_init_security(struct dentry *dentry, int mode,
-					const struct qstr *name, void **ctx,
-					u32 *ctxlen)
+				  const struct qstr *name,
+				  const char **xattr_name, void **ctx,
+				  u32 *ctxlen)
 {
 	return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode,
-				name, ctx, ctxlen);
+				name, xattr_name, ctx, ctxlen);
 }
 EXPORT_SYMBOL(security_dentry_init_security);
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6f08cd2fc6a8..1af2fbc08588 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2927,7 +2927,8 @@ static void selinux_inode_free_security(struct inode *inode)
 }
 
 static int selinux_dentry_init_security(struct dentry *dentry, int mode,
-					const struct qstr *name, void **ctx,
+					const struct qstr *name,
+					const char **xattr_name, void **ctx,
 					u32 *ctxlen)
 {
 	u32 newsid;
@@ -2940,6 +2941,9 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode,
 	if (rc)
 		return rc;
 
+	if (xattr_name)
+		*xattr_name = XATTR_NAME_SELINUX;
+
 	return security_sid_to_context(&selinux_state, newsid, (char **)ctx,
 				       ctxlen);
 }
-- 
cgit v1.2.3


From cf6d6238cdd319eca404756dee05bf55a748b6a9 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 19 Oct 2021 22:24:10 +0100
Subject: block: turn macro helpers into inline functions

Replace bio_set_dev() with an identical inline helper and move it
further to fix a dependency problem with bio_associate_blkg(). Do the
same for bio_copy_dev().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9538f20ffaa5..b12453d7b8a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -430,22 +430,6 @@ void zero_fill_bio(struct bio *bio);
 
 extern const char *bio_devname(struct bio *bio, char *buffer);
 
-#define bio_set_dev(bio, bdev) 				\
-do {							\
-	bio_clear_flag(bio, BIO_REMAPPED);		\
-	if ((bio)->bi_bdev != (bdev))			\
-		bio_clear_flag(bio, BIO_THROTTLED);	\
-	(bio)->bi_bdev = (bdev);			\
-	bio_associate_blkg(bio);			\
-} while (0)
-
-#define bio_copy_dev(dst, src)			\
-do {						\
-	bio_clear_flag(dst, BIO_REMAPPED);		\
-	(dst)->bi_bdev = (src)->bi_bdev;	\
-	bio_clone_blkg_association(dst, src);	\
-} while (0)
-
 #define bio_dev(bio) \
 	disk_devt((bio)->bi_bdev->bd_disk)
 
@@ -463,6 +447,22 @@ static inline void bio_clone_blkg_association(struct bio *dst,
 					      struct bio *src) { }
 #endif	/* CONFIG_BLK_CGROUP */
 
+static inline void bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+	bio_clear_flag(bio, BIO_REMAPPED);
+	if (bio->bi_bdev != bdev)
+		bio_clear_flag(bio, BIO_THROTTLED);
+	bio->bi_bdev = bdev;
+	bio_associate_blkg(bio);
+}
+
+static inline void bio_copy_dev(struct bio *dst, struct bio *src)
+{
+	bio_clear_flag(dst, BIO_REMAPPED);
+	dst->bi_bdev = src->bi_bdev;
+	bio_clone_blkg_association(dst, src);
+}
+
 /*
  * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
-- 
cgit v1.2.3


From c809084ab033a8d4ee404e2ac3c5d3dc80cb65f7 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 19 Oct 2021 22:24:14 +0100
Subject: block: inline a part of bio_release_pages()

Inline BIO_NO_PAGE_REF check of bio_release_pages() to avoid function
call.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 7 ++-----
 include/linux/bio.h | 8 +++++++-
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 4f397ba47db5..46a87c72d2b4 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1033,21 +1033,18 @@ int bio_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_add_page);
 
-void bio_release_pages(struct bio *bio, bool mark_dirty)
+void __bio_release_pages(struct bio *bio, bool mark_dirty)
 {
 	struct bvec_iter_all iter_all;
 	struct bio_vec *bvec;
 
-	if (bio_flagged(bio, BIO_NO_PAGE_REF))
-		return;
-
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		if (mark_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
 		put_page(bvec->bv_page);
 	}
 }
-EXPORT_SYMBOL_GPL(bio_release_pages);
+EXPORT_SYMBOL_GPL(__bio_release_pages);
 
 static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b12453d7b8a8..c88700d1bdc3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -417,7 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
-void bio_release_pages(struct bio *bio, bool mark_dirty);
+void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
@@ -428,6 +428,12 @@ extern void bio_free_pages(struct bio *bio);
 void guard_bio_eod(struct bio *bio);
 void zero_fill_bio(struct bio *bio);
 
+static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
+{
+	if (!bio_flagged(bio, BIO_NO_PAGE_REF))
+		__bio_release_pages(bio, mark_dirty);
+}
+
 extern const char *bio_devname(struct bio *bio, char *buffer);
 
 #define bio_dev(bio) \
-- 
cgit v1.2.3


From dbb6f764a079d1dea883c6f2439d91db4f0fb2f2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 20 Oct 2021 16:41:17 +0200
Subject: blk-mq: move blk_mq_flush_plug_list to block/blk-mq.h

This helper is internal to the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211020144119.142582-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.h         | 1 +
 include/linux/blk-mq.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.h b/block/blk-mq.h
index d8ccb341e82e..08fb5922e611 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -122,6 +122,7 @@ extern int blk_mq_sysfs_register(struct request_queue *q);
 extern void blk_mq_sysfs_unregister(struct request_queue *q);
 extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx);
 void blk_mq_free_plug_rqs(struct blk_plug *plug);
+void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 
 void blk_mq_release(struct request_queue *q);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 6cf35de151a9..e13780236550 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -656,8 +656,6 @@ int blk_mq_alloc_sq_tag_set(struct blk_mq_tag_set *set,
 		unsigned int set_flags);
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
-void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
-
 void blk_mq_free_request(struct request *rq);
 
 bool blk_mq_queue_inflight(struct request_queue *q);
-- 
cgit v1.2.3


From 008f75a20e7072d0840ec323c39b42206f3fa8a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 20 Oct 2021 16:41:19 +0200
Subject: block: cleanup the flush plug helpers

Consolidate the various helpers into a single blk_flush_plug helper that
takes a plk_plug and the from_scheduler bool and switch all callsites to
call it directly.  Checks that the plug is non-NULL must be performed by
the caller, something that most already do anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211020144119.142582-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 13 ++++++-------
 fs/fs-writeback.c      |  5 +++--
 include/linux/blkdev.h | 29 ++++-------------------------
 kernel/sched/core.c    |  5 +++--
 4 files changed, 16 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index db8b2fe0ceaf..dfa199312c2f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1089,7 +1089,7 @@ int bio_poll(struct bio *bio, struct io_comp_batch *iob, unsigned int flags)
 		return 0;
 
 	if (current->plug)
-		blk_flush_plug_list(current->plug, false);
+		blk_flush_plug(current->plug, false);
 
 	if (blk_queue_enter(q, BLK_MQ_REQ_NOWAIT))
 		return 0;
@@ -1637,7 +1637,7 @@ struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug, void *data,
 }
 EXPORT_SYMBOL(blk_check_plugged);
 
-void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
+void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 {
 	if (!list_empty(&plug->cb_list))
 		flush_plug_callbacks(plug, from_schedule);
@@ -1659,11 +1659,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
  */
 void blk_finish_plug(struct blk_plug *plug)
 {
-	if (plug != current->plug)
-		return;
-	blk_flush_plug_list(plug, false);
-
-	current->plug = NULL;
+	if (plug == current->plug) {
+		blk_flush_plug(plug, false);
+		current->plug = NULL;
+	}
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 81ec192ce067..4124a89a1a5d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1893,7 +1893,8 @@ static long writeback_sb_inodes(struct super_block *sb,
 			 * unplug, so get our IOs out the door before we
 			 * give up the CPU.
 			 */
-			blk_flush_plug(current);
+			if (current->plug)
+				blk_flush_plug(current->plug, false);
 			cond_resched();
 		}
 
@@ -2291,7 +2292,7 @@ void wakeup_flusher_threads(enum wb_reason reason)
 	 * If we are expecting writeback progress we must submit plugged IO.
 	 */
 	if (blk_needs_flush_plug(current))
-		blk_schedule_flush_plug(current);
+		blk_flush_plug(current->plug, true);
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 2b22fa36e568..c7b1e9355123 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -725,9 +725,8 @@ extern void blk_set_queue_dying(struct request_queue *);
  * as the lock contention for request_queue lock is reduced.
  *
  * It is ok not to disable preemption when adding the request to the plug list
- * or when attempting a merge, because blk_schedule_flush_list() will only flush
- * the plug list when the task sleeps by itself. For details, please see
- * schedule() where blk_schedule_flush_plug() is called.
+ * or when attempting a merge. For details, please see schedule() where
+ * blk_flush_plug() is called.
  */
 struct blk_plug {
 	struct request *mq_list; /* blk-mq requests */
@@ -757,23 +756,8 @@ extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
 extern void blk_start_plug(struct blk_plug *);
 extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
 extern void blk_finish_plug(struct blk_plug *);
-extern void blk_flush_plug_list(struct blk_plug *, bool);
 
-static inline void blk_flush_plug(struct task_struct *tsk)
-{
-	struct blk_plug *plug = tsk->plug;
-
-	if (plug)
-		blk_flush_plug_list(plug, false);
-}
-
-static inline void blk_schedule_flush_plug(struct task_struct *tsk)
-{
-	struct blk_plug *plug = tsk->plug;
-
-	if (plug)
-		blk_flush_plug_list(plug, true);
-}
+void blk_flush_plug(struct blk_plug *plug, bool from_schedule);
 
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
@@ -802,15 +786,10 @@ static inline void blk_finish_plug(struct blk_plug *plug)
 {
 }
 
-static inline void blk_flush_plug(struct task_struct *task)
-{
-}
-
-static inline void blk_schedule_flush_plug(struct task_struct *task)
+static inline void blk_flush_plug(struct blk_plug *plug, bool async)
 {
 }
 
-
 static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 {
 	return false;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 92ef7b68198c..34f37502c27e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6343,7 +6343,7 @@ static inline void sched_submit_work(struct task_struct *tsk)
 	 * make sure to submit it to avoid deadlocks.
 	 */
 	if (blk_needs_flush_plug(tsk))
-		blk_schedule_flush_plug(tsk);
+		blk_flush_plug(tsk->plug, true);
 }
 
 static void sched_update_worker(struct task_struct *tsk)
@@ -8354,7 +8354,8 @@ int io_schedule_prepare(void)
 	int old_iowait = current->in_iowait;
 
 	current->in_iowait = 1;
-	blk_schedule_flush_plug(current);
+	if (current->plug)
+		blk_flush_plug(current->plug, true);
 
 	return old_iowait;
 }
-- 
cgit v1.2.3


From 55df0933be74bd2e52aba0b67eb743ae0feabe7e Mon Sep 17 00:00:00 2001
From: Imran Khan <imran.f.khan@oracle.com>
Date: Wed, 20 Oct 2021 14:09:00 +1100
Subject: workqueue: Introduce show_one_worker_pool and show_one_workqueue.

Currently show_workqueue_state shows the state of all workqueues and of
all worker pools. In certain cases we may need to dump state of only a
specific workqueue or worker pool. For example in destroy_workqueue we
only need to show state of the workqueue which is getting destroyed.

So rename show_workqueue_state to show_all_workqueues(to signify it
dumps state of all busy workqueues) and divide it into more granular
functions (show_one_workqueue and show_one_worker_pool), that would show
states of individual workqueues and worker pools and can be used in
cases such as the one mentioned above.

Also, as mentioned earlier, make destroy_workqueue dump data pertaining
to only the workqueue that is being destroyed and make user(s) of
earlier interface(show_workqueue_state), use new interface
(show_all_workqueues).

Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/tty/sysrq.c       |   2 +-
 include/linux/workqueue.h |   3 +-
 kernel/power/process.c    |   2 +-
 kernel/workqueue.c        | 172 ++++++++++++++++++++++++++--------------------
 4 files changed, 100 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index c911196ac893..8d0f07509ca7 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -296,7 +296,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {
 static void sysrq_handle_showstate(int key)
 {
 	show_state();
-	show_workqueue_state();
+	show_all_workqueues();
 }
 static const struct sysrq_key_op sysrq_showstate_op = {
 	.handler	= sysrq_handle_showstate,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 74d3c1efd9bb..7fee9b6cfede 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -469,7 +469,8 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
-extern void show_workqueue_state(void);
+extern void show_all_workqueues(void);
+extern void show_one_workqueue(struct workqueue_struct *wq);
 extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);
 
 /**
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 37401c99b7d7..b7e7798637b8 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -94,7 +94,7 @@ static int try_to_freeze_tasks(bool user_only)
 		       todo - wq_busy, wq_busy);
 
 		if (wq_busy)
-			show_workqueue_state();
+			show_all_workqueues();
 
 		if (!wakeup || pm_debug_messages_on) {
 			read_lock(&tasklist_lock);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 76988f39ed5a..1a7df882f55e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -375,6 +375,7 @@ EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 static int worker_thread(void *__worker);
 static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
 static void show_pwq(struct pool_workqueue *pwq);
+static void show_one_worker_pool(struct worker_pool *pool);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -4447,7 +4448,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 			raw_spin_unlock_irq(&pwq->pool->lock);
 			mutex_unlock(&wq->mutex);
 			mutex_unlock(&wq_pool_mutex);
-			show_workqueue_state();
+			show_one_workqueue(wq);
 			return;
 		}
 		raw_spin_unlock_irq(&pwq->pool->lock);
@@ -4797,97 +4798,116 @@ static void show_pwq(struct pool_workqueue *pwq)
 }
 
 /**
- * show_workqueue_state - dump workqueue state
- *
- * Called from a sysrq handler or try_to_freeze_tasks() and prints out
- * all busy workqueues and pools.
+ * show_one_workqueue - dump state of specified workqueue
+ * @wq: workqueue whose state will be printed
  */
-void show_workqueue_state(void)
+void show_one_workqueue(struct workqueue_struct *wq)
 {
-	struct workqueue_struct *wq;
-	struct worker_pool *pool;
+	struct pool_workqueue *pwq;
+	bool idle = true;
 	unsigned long flags;
-	int pi;
-
-	rcu_read_lock();
 
-	pr_info("Showing busy workqueues and worker pools:\n");
-
-	list_for_each_entry_rcu(wq, &workqueues, list) {
-		struct pool_workqueue *pwq;
-		bool idle = true;
-
-		for_each_pwq(pwq, wq) {
-			if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
-				idle = false;
-				break;
-			}
+	for_each_pwq(pwq, wq) {
+		if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
+			idle = false;
+			break;
 		}
-		if (idle)
-			continue;
+	}
+	if (idle) /* Nothing to print for idle workqueue */
+		return;
 
-		pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+	pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
 
-		for_each_pwq(pwq, wq) {
-			raw_spin_lock_irqsave(&pwq->pool->lock, flags);
-			if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
-				/*
-				 * Defer printing to avoid deadlocks in console
-				 * drivers that queue work while holding locks
-				 * also taken in their write paths.
-				 */
-				printk_deferred_enter();
-				show_pwq(pwq);
-				printk_deferred_exit();
-			}
-			raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
+	for_each_pwq(pwq, wq) {
+		raw_spin_lock_irqsave(&pwq->pool->lock, flags);
+		if (pwq->nr_active || !list_empty(&pwq->inactive_works)) {
 			/*
-			 * We could be printing a lot from atomic context, e.g.
-			 * sysrq-t -> show_workqueue_state(). Avoid triggering
-			 * hard lockup.
+			 * Defer printing to avoid deadlocks in console
+			 * drivers that queue work while holding locks
+			 * also taken in their write paths.
 			 */
-			touch_nmi_watchdog();
-		}
-	}
-
-	for_each_pool(pool, pi) {
-		struct worker *worker;
-		bool first = true;
-
-		raw_spin_lock_irqsave(&pool->lock, flags);
-		if (pool->nr_workers == pool->nr_idle)
-			goto next_pool;
-		/*
-		 * Defer printing to avoid deadlocks in console drivers that
-		 * queue work while holding locks also taken in their write
-		 * paths.
-		 */
-		printk_deferred_enter();
-		pr_info("pool %d:", pool->id);
-		pr_cont_pool_info(pool);
-		pr_cont(" hung=%us workers=%d",
-			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
-			pool->nr_workers);
-		if (pool->manager)
-			pr_cont(" manager: %d",
-				task_pid_nr(pool->manager->task));
-		list_for_each_entry(worker, &pool->idle_list, entry) {
-			pr_cont(" %s%d", first ? "idle: " : "",
-				task_pid_nr(worker->task));
-			first = false;
+			printk_deferred_enter();
+			show_pwq(pwq);
+			printk_deferred_exit();
 		}
-		pr_cont("\n");
-		printk_deferred_exit();
-	next_pool:
-		raw_spin_unlock_irqrestore(&pool->lock, flags);
+		raw_spin_unlock_irqrestore(&pwq->pool->lock, flags);
 		/*
 		 * We could be printing a lot from atomic context, e.g.
-		 * sysrq-t -> show_workqueue_state(). Avoid triggering
+		 * sysrq-t -> show_all_workqueues(). Avoid triggering
 		 * hard lockup.
 		 */
 		touch_nmi_watchdog();
 	}
 
+}
+
+/**
+ * show_one_worker_pool - dump state of specified worker pool
+ * @pool: worker pool whose state will be printed
+ */
+static void show_one_worker_pool(struct worker_pool *pool)
+{
+	struct worker *worker;
+	bool first = true;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&pool->lock, flags);
+	if (pool->nr_workers == pool->nr_idle)
+		goto next_pool;
+	/*
+	 * Defer printing to avoid deadlocks in console drivers that
+	 * queue work while holding locks also taken in their write
+	 * paths.
+	 */
+	printk_deferred_enter();
+	pr_info("pool %d:", pool->id);
+	pr_cont_pool_info(pool);
+	pr_cont(" hung=%us workers=%d",
+		jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+		pool->nr_workers);
+	if (pool->manager)
+		pr_cont(" manager: %d",
+			task_pid_nr(pool->manager->task));
+	list_for_each_entry(worker, &pool->idle_list, entry) {
+		pr_cont(" %s%d", first ? "idle: " : "",
+			task_pid_nr(worker->task));
+		first = false;
+	}
+	pr_cont("\n");
+	printk_deferred_exit();
+next_pool:
+	raw_spin_unlock_irqrestore(&pool->lock, flags);
+	/*
+	 * We could be printing a lot from atomic context, e.g.
+	 * sysrq-t -> show_all_workqueues(). Avoid triggering
+	 * hard lockup.
+	 */
+	touch_nmi_watchdog();
+
+}
+
+/**
+ * show_all_workqueues - dump workqueue state
+ *
+ * Called from a sysrq handler or try_to_freeze_tasks() and prints out
+ * all busy workqueues and pools.
+ */
+void show_all_workqueues(void)
+{
+	struct workqueue_struct *wq;
+	struct worker_pool *pool;
+	int pi;
+
+	rcu_read_lock();
+
+	pr_info("Showing busy workqueues and worker pools:\n");
+
+	list_for_each_entry_rcu(wq, &workqueues, list)
+		show_one_workqueue(wq);
+
+	for_each_pool(pool, pi)
+		show_one_worker_pool(pool);
+
 	rcu_read_unlock();
 }
 
@@ -5876,7 +5896,7 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
 	rcu_read_unlock();
 
 	if (lockup_detected)
-		show_workqueue_state();
+		show_all_workqueues();
 
 	wq_watchdog_reset_touched();
 	mod_timer(&wq_watchdog_timer, jiffies + thresh);
-- 
cgit v1.2.3


From f783484381ad088490a497abb4faab47511774c5 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:35 +0200
Subject: mfd: ti_am335x_tscadc: Use driver data

So far every sub-cell parameter in this driver was hardcoded: cell name,
cell compatible, specific clock name and desired clock frequency.

As we are about to introduce support for ADC1/magnetic reader, we need a
bit of flexibility. Let's add a driver data structure which will contain
these information.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-18-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 22 ++++++++++++++++------
 include/linux/mfd/ti_am335x_tscadc.h |  9 +++++++++
 2 files changed, 25 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index ba821109e98b..69b0eff1a55e 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -137,6 +137,8 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	tscadc->data = of_device_get_match_data(&pdev->dev);
+
 	node = of_get_child_by_name(pdev->dev.of_node, "tsc");
 	of_property_read_u32(node, "ti,wires", &tsc_wires);
 	of_property_read_u32(node, "ti,coordiante-readouts", &readouts);
@@ -212,7 +214,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 		goto err_disable_clk;
 	}
 
-	tscadc->clk_div = (clk_get_rate(clk) / ADC_CLK) - 1;
+	tscadc->clk_div = (clk_get_rate(clk) / tscadc->data->target_clk_rate) - 1;
 	regmap_write(tscadc->regmap, REG_CLKDIV, tscadc->clk_div);
 
 	/* Set the control register bits */
@@ -241,8 +243,8 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	if (tsc_wires > 0) {
 		tscadc->tsc_cell = tscadc->used_cells;
 		cell = &tscadc->cells[tscadc->used_cells++];
-		cell->name = "TI-am335x-tsc";
-		cell->of_compatible = "ti,am3359-tsc";
+		cell->name = tscadc->data->secondary_feature_name;
+		cell->of_compatible = tscadc->data->secondary_feature_compatible;
 		cell->platform_data = &tscadc;
 		cell->pdata_size = sizeof(tscadc);
 	}
@@ -251,8 +253,8 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	if (adc_channels > 0) {
 		tscadc->adc_cell = tscadc->used_cells;
 		cell = &tscadc->cells[tscadc->used_cells++];
-		cell->name = "TI-am335x-adc";
-		cell->of_compatible = "ti,am3359-adc";
+		cell->name = tscadc->data->adc_feature_name;
+		cell->of_compatible = tscadc->data->adc_feature_compatible;
 		cell->platform_data = &tscadc;
 		cell->pdata_size = sizeof(tscadc);
 	}
@@ -338,8 +340,16 @@ static int __maybe_unused tscadc_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(tscadc_pm_ops, tscadc_suspend, tscadc_resume);
 
+static const struct ti_tscadc_data tscdata = {
+	.adc_feature_name = "TI-am335x-adc",
+	.adc_feature_compatible = "ti,am3359-adc",
+	.secondary_feature_name = "TI-am335x-tsc",
+	.secondary_feature_compatible = "ti,am3359-tsc",
+	.target_clk_rate = ADC_CLK,
+};
+
 static const struct of_device_id ti_tscadc_dt_ids[] = {
-	{ .compatible = "ti,am3359-tscadc", },
+	{ .compatible = "ti,am3359-tscadc", .data = &tscdata },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ti_tscadc_dt_ids);
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index ffc091b77633..bb3b56ade3fb 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -162,11 +162,20 @@
 
 #define TSCADC_CELLS		2
 
+struct ti_tscadc_data {
+	char *adc_feature_name;
+	char *adc_feature_compatible;
+	char *secondary_feature_name;
+	char *secondary_feature_compatible;
+	unsigned int target_clk_rate;
+};
+
 struct ti_tscadc_dev {
 	struct device *dev;
 	struct regmap *regmap;
 	void __iomem *tscadc_base;
 	phys_addr_t tscadc_phys_base;
+	const struct ti_tscadc_data *data;
 	int irq;
 	int used_cells;	/* 1-2 */
 	int tsc_wires;
-- 
cgit v1.2.3


From 7c605802f33176d872ffb6a3830ba4d88d9f21f6 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:37 +0200
Subject: mfd: ti_am335x_tscadc: Drop useless variables from the driver
 structure

Keeping the count of tsc_cells and adc_cells is completely redundant, we
can derive this information from other variables. Plus, these variables
are not used anywhere else now. Let's get rid of them.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-20-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 15 ++++-----------
 include/linux/mfd/ti_am335x_tscadc.h |  3 ---
 2 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index d9c0b16d8d0d..9ce1a3e34c1e 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -122,7 +122,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	const __be32 *cur;
 	u32 val;
 	int err, ctrl;
-	int tsc_wires = 0, adc_channels = 0, total_channels;
+	int tsc_wires = 0, adc_channels = 0, cell_idx = 0, total_channels;
 	int readouts = 0;
 
 	/* Allocate memory for device */
@@ -235,14 +235,9 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	ctrl |= CNTRLREG_TSCSSENB;
 	regmap_write(tscadc->regmap, REG_CTRL, ctrl);
 
-	tscadc->used_cells = 0;
-	tscadc->tsc_cell = -1;
-	tscadc->adc_cell = -1;
-
 	/* TSC Cell */
 	if (tsc_wires > 0) {
-		tscadc->tsc_cell = tscadc->used_cells;
-		cell = &tscadc->cells[tscadc->used_cells++];
+		cell = &tscadc->cells[cell_idx++];
 		cell->name = tscadc->data->secondary_feature_name;
 		cell->of_compatible = tscadc->data->secondary_feature_compatible;
 		cell->platform_data = &tscadc;
@@ -251,8 +246,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 
 	/* ADC Cell */
 	if (adc_channels > 0) {
-		tscadc->adc_cell = tscadc->used_cells;
-		cell = &tscadc->cells[tscadc->used_cells++];
+		cell = &tscadc->cells[cell_idx++];
 		cell->name = tscadc->data->adc_feature_name;
 		cell->of_compatible = tscadc->data->adc_feature_compatible;
 		cell->platform_data = &tscadc;
@@ -260,8 +254,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	}
 
 	err = mfd_add_devices(&pdev->dev, PLATFORM_DEVID_AUTO,
-			      tscadc->cells, tscadc->used_cells, NULL,
-			      0, NULL);
+			      tscadc->cells, cell_idx, NULL, 0, NULL);
 	if (err < 0)
 		goto err_disable_clk;
 
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index bb3b56ade3fb..23442059d271 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -177,10 +177,7 @@ struct ti_tscadc_dev {
 	phys_addr_t tscadc_phys_base;
 	const struct ti_tscadc_data *data;
 	int irq;
-	int used_cells;	/* 1-2 */
 	int tsc_wires;
-	int tsc_cell;	/* -1 if not used */
-	int adc_cell;	/* -1 if not used */
 	struct mfd_cell cells[TSCADC_CELLS];
 	u32 reg_se_cache;
 	bool adc_waiting;
-- 
cgit v1.2.3


From b813f32030e2b70adf68f73e99853f91526aa3a1 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:40 +0200
Subject: mfd: ti_am335x_tscadc: Gather the ctrl register logic in one place

Instead of deriving in the probe and in the resume path the value of the
ctrl register, let's do it only once in the probe, save the value of
this register (all but the subsystem enable bit) in the driver's
structure and use it from the resume callback.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-23-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 39 +++++++++++++-----------------------
 include/linux/mfd/ti_am335x_tscadc.h |  2 +-
 2 files changed, 15 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 01c2b7d1c18c..df4f905a7b52 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -121,7 +121,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	struct property *prop;
 	const __be32 *cur;
 	u32 val;
-	int err, ctrl;
+	int err;
 	int tsc_wires = 0, adc_channels = 0, cell_idx = 0, total_channels;
 	int readouts = 0;
 
@@ -217,22 +217,25 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	tscadc->clk_div = (clk_get_rate(clk) / tscadc->data->target_clk_rate) - 1;
 	regmap_write(tscadc->regmap, REG_CLKDIV, tscadc->clk_div);
 
-	/* Set the control register bits */
-	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
+	/*
+	 * Set the control register bits. tscadc->ctrl stores the configuration
+	 * of the CTRL register but not the subsystem enable bit which must be
+	 * added manually when timely.
+	 */
+	tscadc->ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_STEPID;
 	if (tsc_wires > 0) {
-		tscadc->tsc_wires = tsc_wires;
+		tscadc->ctrl |= CNTRLREG_TSCENB;
 		if (tsc_wires == 5)
-			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
+			tscadc->ctrl |= CNTRLREG_5WIRE;
 		else
-			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
+			tscadc->ctrl |= CNTRLREG_4WIRE;
 	}
-	regmap_write(tscadc->regmap, REG_CTRL, ctrl);
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 
 	tscadc_idle_config(tscadc);
 
 	/* Enable the TSC module enable bit */
-	ctrl |= CNTRLREG_TSCSSENB;
-	regmap_write(tscadc->regmap, REG_CTRL, ctrl);
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_TSCSSENB);
 
 	/* TSC Cell */
 	if (tsc_wires > 0) {
@@ -307,27 +310,13 @@ static int __maybe_unused tscadc_suspend(struct device *dev)
 static int __maybe_unused tscadc_resume(struct device *dev)
 {
 	struct ti_tscadc_dev *tscadc = dev_get_drvdata(dev);
-	u32 ctrl;
 
 	pm_runtime_get_sync(dev);
 
-	/* context restore */
 	regmap_write(tscadc->regmap, REG_CLKDIV, tscadc->clk_div);
-
-	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
-	if (tscadc->tsc_wires > 0) {
-		if (tscadc->tsc_wires == 5)
-			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
-		else
-			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
-	}
-
-	regmap_write(tscadc->regmap, REG_CTRL, ctrl);
-
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 	tscadc_idle_config(tscadc);
-
-	ctrl |= CNTRLREG_TSCSSENB;
-	regmap_write(tscadc->regmap, REG_CTRL, ctrl);
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_TSCSSENB);
 
 	return 0;
 }
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 23442059d271..c9c6f0b29181 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -177,8 +177,8 @@ struct ti_tscadc_dev {
 	phys_addr_t tscadc_phys_base;
 	const struct ti_tscadc_data *data;
 	int irq;
-	int tsc_wires;
 	struct mfd_cell cells[TSCADC_CELLS];
+	u32 ctrl;
 	u32 reg_se_cache;
 	bool adc_waiting;
 	bool adc_in_use;
-- 
cgit v1.2.3


From 36782dab984a8b4ef8f4ec8c2270c56468cbbbeb Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:41 +0200
Subject: mfd: ti_am335x_tscadc: Replace the header license text with SPDX tag

Drop the text license and replace it with an equivalent SPDX license tag
identifier.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-24-miquel.raynal@bootlin.com
---
 include/linux/mfd/ti_am335x_tscadc.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index c9c6f0b29181..2b8a5f58a3b6 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -1,21 +1,13 @@
-#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
-#define __LINUX_TI_AM335X_TSCADC_MFD_H
-
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * TI Touch Screen / ADC MFD driver
  *
  * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any
- * kind, whether express or implied; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
+#ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
+#define __LINUX_TI_AM335X_TSCADC_MFD_H
+
 #include <linux/mfd/core.h>
 
 #define REG_RAWIRQSTATUS	0x024
-- 
cgit v1.2.3


From 3831abe135566813341cc36b1493d75f6ac460c3 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:42 +0200
Subject: mfd: ti_am335x_tscadc: Fix header spacing

Harmonize the spacing within macro definitions.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-25-miquel.raynal@bootlin.com
---
 include/linux/mfd/ti_am335x_tscadc.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 2b8a5f58a3b6..893c474c1f8c 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -41,7 +41,7 @@
 /* Step Enable */
 #define STEPENB_MASK		(0x1FFFF << 0)
 #define STEPENB(val)		((val) << 0)
-#define ENB(val)			(1 << (val))
+#define ENB(val)		(1 << (val))
 #define STPENB_STEPENB		STEPENB(0x1FFFF)
 #define STPENB_STEPENB_TC	STEPENB(0x1FFF)
 
@@ -122,15 +122,15 @@
 #define CNTRLREG_TSCENB		BIT(7)
 
 /* FIFO READ Register */
-#define FIFOREAD_DATA_MASK (0xfff << 0)
-#define FIFOREAD_CHNLID_MASK (0xf << 16)
+#define FIFOREAD_DATA_MASK	(0xfff << 0)
+#define FIFOREAD_CHNLID_MASK	(0xf << 16)
 
 /* DMA ENABLE/CLEAR Register */
 #define DMA_FIFO0		BIT(0)
 #define DMA_FIFO1		BIT(1)
 
 /* Sequencer Status */
-#define SEQ_STATUS BIT(5)
+#define SEQ_STATUS		BIT(5)
 #define CHARGE_STEP		0x11
 
 #define ADC_CLK			3000000
@@ -150,7 +150,7 @@
  *
  * max processing time: 266431 * 308ns = 83ms(approx)
  */
-#define IDLE_TIMEOUT 83 /* milliseconds */
+#define IDLE_TIMEOUT		83 /* milliseconds */
 
 #define TSCADC_CELLS		2
 
-- 
cgit v1.2.3


From 48959fcdca8b335afa4485e71114008c81291bf9 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:43 +0200
Subject: mfd: ti_am335x_tscadc: Use the new HZ_PER_MHZ macro

Before adding another frequency with even more zeroes, use the
HZ_PER_MHZ macro to clarify the number.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-26-miquel.raynal@bootlin.com
---
 include/linux/mfd/ti_am335x_tscadc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 893c474c1f8c..a85643677bef 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -9,6 +9,7 @@
 #define __LINUX_TI_AM335X_TSCADC_MFD_H
 
 #include <linux/mfd/core.h>
+#include <linux/units.h>
 
 #define REG_RAWIRQSTATUS	0x024
 #define REG_IRQSTATUS		0x028
@@ -133,7 +134,7 @@
 #define SEQ_STATUS		BIT(5)
 #define CHARGE_STEP		0x11
 
-#define ADC_CLK			3000000
+#define ADC_CLK			(3 * HZ_PER_MHZ)
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 #define FIFO1_THRESHOLD		19
-- 
cgit v1.2.3


From 65de5532a317b22f7fb93001cd0ed494145d3f4e Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:44 +0200
Subject: mfd: ti_am335x_tscadc: Drop unused definitions from the header

The STEP ENABLE definitions are highly unclear and not used so drop them.

Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-27-miquel.raynal@bootlin.com
---
 include/linux/mfd/ti_am335x_tscadc.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index a85643677bef..1cd8cd34f2b7 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -39,13 +39,6 @@
 /* IRQ wakeup enable */
 #define IRQWKUP_ENB		BIT(0)
 
-/* Step Enable */
-#define STEPENB_MASK		(0x1FFFF << 0)
-#define STEPENB(val)		((val) << 0)
-#define ENB(val)		(1 << (val))
-#define STPENB_STEPENB		STEPENB(0x1FFFF)
-#define STPENB_STEPENB_TC	STEPENB(0x1FFF)
-
 /* IRQ enable */
 #define IRQENB_HW_PEN		BIT(0)
 #define IRQENB_EOS		BIT(1)
-- 
cgit v1.2.3


From b7cb7bf11817129c623fa921a1d6cd86b3cb727b Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:45 +0200
Subject: mfd: ti_am335x_tscadc: Use BIT(), GENMASK() and FIELD_PREP() when
 relevant

Clean the ti_am335x_tscadc.h header by:
* converting masks to GENMASK()
* converting regular shifts to BIT()
* using FIELD_PREP() when relevant

Sometimes reorder the lines to be able to use the relevant bitmask.

Mind the s/%d/%ld/ change in a log due to the type change following the
use of FIELD_PREP() in the header.

Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-28-miquel.raynal@bootlin.com
---
 drivers/iio/adc/ti_am335x_adc.c      |  2 +-
 include/linux/mfd/ti_am335x_tscadc.h | 61 ++++++++++++++++++------------------
 2 files changed, 32 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 855cc2d64ac8..3dec115e68ee 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -126,7 +126,7 @@ static void tiadc_step_config(struct iio_dev *indio_dev)
 		chan = adc_dev->channel_line[i];
 
 		if (adc_dev->step_avg[i] > STEPCONFIG_AVG_16) {
-			dev_warn(dev, "chan %d step_avg truncating to %d\n",
+			dev_warn(dev, "chan %d step_avg truncating to %ld\n",
 				 chan, STEPCONFIG_AVG_16);
 			adc_dev->step_avg[i] = STEPCONFIG_AVG_16;
 		}
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 1cd8cd34f2b7..ae694fa2d711 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -8,6 +8,7 @@
 #ifndef __LINUX_TI_AM335X_TSCADC_MFD_H
 #define __LINUX_TI_AM335X_TSCADC_MFD_H
 
+#include <linux/bitfield.h>
 #include <linux/mfd/core.h>
 #include <linux/units.h>
 
@@ -51,12 +52,12 @@
 #define IRQENB_PENUP		BIT(9)
 
 /* Step Configuration */
-#define STEPCONFIG_MODE_MASK	(3 << 0)
-#define STEPCONFIG_MODE(val)	((val) << 0)
+#define STEPCONFIG_MODE_MASK	GENMASK(1, 0)
+#define STEPCONFIG_MODE(val)	FIELD_PREP(STEPCONFIG_MODE_MASK, (val))
 #define STEPCONFIG_MODE_SWCNT	STEPCONFIG_MODE(1)
 #define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	(7 << 2)
-#define STEPCONFIG_AVG(val)	((val) << 2)
+#define STEPCONFIG_AVG_MASK	GENMASK(4, 2)
+#define STEPCONFIG_AVG(val)	FIELD_PREP(STEPCONFIG_AVG_MASK, (val))
 #define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
 #define STEPCONFIG_XPP		BIT(5)
 #define STEPCONFIG_XNN		BIT(6)
@@ -64,43 +65,43 @@
 #define STEPCONFIG_YNN		BIT(8)
 #define STEPCONFIG_XNP		BIT(9)
 #define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_RFP(val)	((val) << 12)
-#define STEPCONFIG_RFP_VREFP	(0x3 << 12)
-#define STEPCONFIG_INM_MASK	(0xF << 15)
-#define STEPCONFIG_INM(val)	((val) << 15)
+#define STEPCONFIG_RFP_VREFP	GENMASK(13, 12)
+#define STEPCONFIG_RFP(val)	FIELD_PREP(STEPCONFIG_RFP_VREFP, (val))
+#define STEPCONFIG_INM_MASK	GENMASK(18, 15)
+#define STEPCONFIG_INM(val)	FIELD_PREP(STEPCONFIG_INM_MASK, (val))
 #define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	(0xF << 19)
-#define STEPCONFIG_INP(val)	((val) << 19)
+#define STEPCONFIG_INP_MASK	GENMASK(22, 19)
+#define STEPCONFIG_INP(val)	FIELD_PREP(STEPCONFIG_INP_MASK, (val))
 #define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
 #define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
 #define STEPCONFIG_FIFO1	BIT(26)
-#define STEPCONFIG_RFM(val)	((val) << 23)
-#define STEPCONFIG_RFM_VREFN	(0x3 << 23)
+#define STEPCONFIG_RFM_VREFN	GENMASK(24, 23)
+#define STEPCONFIG_RFM(val)	FIELD_PREP(STEPCONFIG_RFM_VREFN, (val))
 
 /* Delay register */
-#define STEPDELAY_OPEN_MASK	(0x3FFFF << 0)
-#define STEPDELAY_OPEN(val)	((val) << 0)
+#define STEPDELAY_OPEN_MASK	GENMASK(17, 0)
+#define STEPDELAY_OPEN(val)	FIELD_PREP(STEPDELAY_OPEN_MASK, (val))
 #define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-#define STEPDELAY_SAMPLE_MASK	(0xFF << 24)
-#define STEPDELAY_SAMPLE(val)	((val) << 24)
+#define STEPDELAY_SAMPLE_MASK	GENMASK(31, 24)
+#define STEPDELAY_SAMPLE(val)	FIELD_PREP(STEPDELAY_SAMPLE_MASK, (val))
 #define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
 
 /* Charge Config */
-#define STEPCHARGE_RFP_MASK	(7 << 12)
-#define STEPCHARGE_RFP(val)	((val) << 12)
+#define STEPCHARGE_RFP_MASK	GENMASK(14, 12)
+#define STEPCHARGE_RFP(val)	FIELD_PREP(STEPCHARGE_RFP_MASK, (val))
 #define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	(0xF << 15)
-#define STEPCHARGE_INM(val)	((val) << 15)
+#define STEPCHARGE_INM_MASK	GENMASK(18, 15)
+#define STEPCHARGE_INM(val)	FIELD_PREP(STEPCHARGE_INM_MASK, (val))
 #define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	(0xF << 19)
-#define STEPCHARGE_INP(val)	((val) << 19)
-#define STEPCHARGE_RFM_MASK	(3 << 23)
-#define STEPCHARGE_RFM(val)	((val) << 23)
+#define STEPCHARGE_INP_MASK	GENMASK(22, 19)
+#define STEPCHARGE_INP(val)	FIELD_PREP(STEPCHARGE_INP_MASK, (val))
+#define STEPCHARGE_RFM_MASK	GENMASK(24, 23)
+#define STEPCHARGE_RFM(val)	FIELD_PREP(STEPCHARGE_RFM_MASK, (val))
 #define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
 
 /* Charge delay */
-#define CHARGEDLY_OPEN_MASK	(0x3FFFF << 0)
-#define CHARGEDLY_OPEN(val)	((val) << 0)
+#define CHARGEDLY_OPEN_MASK	GENMASK(17, 0)
+#define CHARGEDLY_OPEN(val)	FIELD_PREP(CHARGEDLY_OPEN_MASK, (val))
 #define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(0x400)
 
 /* Control register */
@@ -108,16 +109,16 @@
 #define CNTRLREG_STEPID		BIT(1)
 #define CNTRLREG_STEPCONFIGWRT	BIT(2)
 #define CNTRLREG_POWERDOWN	BIT(4)
-#define CNTRLREG_AFE_CTRL_MASK	(3 << 5)
-#define CNTRLREG_AFE_CTRL(val)	((val) << 5)
+#define CNTRLREG_AFE_CTRL_MASK	GENMASK(6, 5)
+#define CNTRLREG_AFE_CTRL(val)	FIELD_PREP(CNTRLREG_AFE_CTRL_MASK, (val))
 #define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
 #define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
 #define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
 #define CNTRLREG_TSCENB		BIT(7)
 
 /* FIFO READ Register */
-#define FIFOREAD_DATA_MASK	(0xfff << 0)
-#define FIFOREAD_CHNLID_MASK	(0xf << 16)
+#define FIFOREAD_DATA_MASK	GENMASK(11, 0)
+#define FIFOREAD_CHNLID_MASK	GENMASK(19, 16)
 
 /* DMA ENABLE/CLEAR Register */
 #define DMA_FIFO0		BIT(0)
-- 
cgit v1.2.3


From 01d838164b4c305c1cafb0c3f71fb0027d99358b Mon Sep 17 00:00:00 2001
From: Saurav Kashyap <skashyap@marvell.com>
Date: Mon, 23 Aug 2021 05:56:48 -0700
Subject: nvme-fc: add support for ->map_queues

NVMe FC don't have support for map queues, unlike the PCI, RDMA and TCP
transports.  Add a ->map_queues callout for the LLDDs to provide such
functionality.

Signed-off-by: Saurav Kashyap <skashyap@marvell.com>
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c         | 24 ++++++++++++++++++++++++
 include/linux/nvme-fc-driver.h |  7 +++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index aa14ad963d91..34f7a7c236bf 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -16,6 +16,7 @@
 #include <linux/nvme-fc.h>
 #include "fc.h"
 #include <scsi/scsi_transport_fc.h>
+#include <linux/blk-mq-pci.h>
 
 /* *************************** Data Structures/Defines ****************** */
 
@@ -2841,6 +2842,28 @@ nvme_fc_complete_rq(struct request *rq)
 	nvme_fc_ctrl_put(ctrl);
 }
 
+static int nvme_fc_map_queues(struct blk_mq_tag_set *set)
+{
+	struct nvme_fc_ctrl *ctrl = set->driver_data;
+	int i;
+
+	for (i = 0; i < set->nr_maps; i++) {
+		struct blk_mq_queue_map *map = &set->map[i];
+
+		if (!map->nr_queues) {
+			WARN_ON(i == HCTX_TYPE_DEFAULT);
+			continue;
+		}
+
+		/* Call LLDD map queue functionality if defined */
+		if (ctrl->lport->ops->map_queues)
+			ctrl->lport->ops->map_queues(&ctrl->lport->localport,
+						     map);
+		else
+			blk_mq_map_queues(map);
+	}
+	return 0;
+}
 
 static const struct blk_mq_ops nvme_fc_mq_ops = {
 	.queue_rq	= nvme_fc_queue_rq,
@@ -2849,6 +2872,7 @@ static const struct blk_mq_ops nvme_fc_mq_ops = {
 	.exit_request	= nvme_fc_exit_request,
 	.init_hctx	= nvme_fc_init_hctx,
 	.timeout	= nvme_fc_timeout,
+	.map_queues	= nvme_fc_map_queues,
 };
 
 static int
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 2a38f2b477a5..cb909edb76c4 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -7,6 +7,7 @@
 #define _NVME_FC_DRIVER_H 1
 
 #include <linux/scatterlist.h>
+#include <linux/blk-mq.h>
 
 
 /*
@@ -497,6 +498,8 @@ struct nvme_fc_port_template {
 	int	(*xmt_ls_rsp)(struct nvme_fc_local_port *localport,
 				struct nvme_fc_remote_port *rport,
 				struct nvmefc_ls_rsp *ls_rsp);
+	void	(*map_queues)(struct nvme_fc_local_port *localport,
+			      struct blk_mq_queue_map *map);
 
 	u32	max_hw_queues;
 	u16	max_sgl_segments;
@@ -779,6 +782,10 @@ struct nvmet_fc_target_port {
  *       LS received.
  *       Entrypoint is Mandatory.
  *
+ * @map_queues: This functions lets the driver expose the queue mapping
+ *	 to the block layer.
+ *       Entrypoint is Optional.
+ *
  * @fcp_op:  Called to perform a data transfer or transmit a response.
  *       The nvmefc_tgt_fcp_req structure is the same LLDD-supplied
  *       exchange structure specified in the nvmet_fc_rcv_fcp_req() call
-- 
cgit v1.2.3


From 44c3c6257e99c6284f312206de73783575fc8906 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Thu, 23 Sep 2021 00:55:35 +0300
Subject: nvme-rdma: limit the maximal queue size for RDMA controllers

Corrent limit of 1024 isn't valid for some of the RDMA based ctrls. In
case the target expose a cap of larger amount of entries (e.g. 1024),
the initiator may fail to create a QP with this size. Thus limit to a
value that works for all RDMA adapters.

Future general solution should use RDMA/core API to calculate this size
according to device capabilities and number of WRs needed per NVMe IO
request.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c  | 7 +++++++
 include/linux/nvme-rdma.h | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 1624da3702d4..027ee57cbdb0 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1112,6 +1112,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new)
 			ctrl->ctrl.opts->queue_size, ctrl->ctrl.sqsize + 1);
 	}
 
+	if (ctrl->ctrl.sqsize + 1 > NVME_RDMA_MAX_QUEUE_SIZE) {
+		dev_warn(ctrl->ctrl.device,
+			"ctrl sqsize %u > max queue size %u, clamping down\n",
+			ctrl->ctrl.sqsize + 1, NVME_RDMA_MAX_QUEUE_SIZE);
+		ctrl->ctrl.sqsize = NVME_RDMA_MAX_QUEUE_SIZE - 1;
+	}
+
 	if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
 		dev_warn(ctrl->ctrl.device,
 			"sqsize %u > ctrl maxcmd %u, clamping down\n",
diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h
index 3ec8e50efa16..4dd7e6fe92fb 100644
--- a/include/linux/nvme-rdma.h
+++ b/include/linux/nvme-rdma.h
@@ -6,6 +6,8 @@
 #ifndef _LINUX_NVME_RDMA_H
 #define _LINUX_NVME_RDMA_H
 
+#define NVME_RDMA_MAX_QUEUE_SIZE	128
+
 enum nvme_rdma_cm_fmt {
 	NVME_RDMA_CM_FMT_1_0 = 0x0,
 };
-- 
cgit v1.2.3


From e15a8a9755659ff5972f30de4dd64867c97f242d Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 22 Sep 2021 08:35:20 +0200
Subject: nvme: add CNTRLTYPE definitions for 'identify controller'

Update the 'identify controller' structure to define the newly added
CNTRLTYPE field.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index b7c4c4130b65..ed2428918bca 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -31,6 +31,12 @@ enum nvme_subsys_type {
 	NVME_NQN_NVME	= 2,		/* NVME type target subsystem */
 };
 
+enum nvme_ctrl_type {
+	NVME_CTRL_IO	= 1,		/* I/O controller */
+	NVME_CTRL_DISC	= 2,		/* Discovery controller */
+	NVME_CTRL_ADMIN	= 3,		/* Administrative controller */
+};
+
 /* Address Family codes for Discovery Log Page entry ADRFAM field */
 enum {
 	NVMF_ADDR_FAMILY_PCI	= 0,	/* PCIe */
@@ -244,7 +250,9 @@ struct nvme_id_ctrl {
 	__le32			rtd3e;
 	__le32			oaes;
 	__le32			ctratt;
-	__u8			rsvd100[28];
+	__u8			rsvd100[11];
+	__u8			cntrltype;
+	__u8			fguid[16];
 	__le16			crdt1;
 	__le16			crdt2;
 	__le16			crdt3;
-- 
cgit v1.2.3


From d56ae18f063e38eba47550c632519c9fe3f76b19 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Thu, 23 Sep 2021 13:17:44 +0300
Subject: nvmet: use macro definitions for setting cmic value

This makes the code more readable.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c | 3 ++-
 include/linux/nvme.h            | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index dce53030b375..6e75890bc8d9 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -385,7 +385,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 	 */
 
 	/* we support multiple ports, multiples hosts and ANA: */
-	id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
+	id->cmic = NVME_CTRL_CMIC_MULTI_PORT | NVME_CTRL_CMIC_MULTI_CTRL |
+		NVME_CTRL_CMIC_ANA;
 
 	/* Limit MDTS according to transport capability */
 	if (ctrl->ops->get_mdts)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index ed2428918bca..357482dedb59 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -320,6 +320,7 @@ struct nvme_id_ctrl {
 };
 
 enum {
+	NVME_CTRL_CMIC_MULTI_PORT		= 1 << 0,
 	NVME_CTRL_CMIC_MULTI_CTRL		= 1 << 1,
 	NVME_CTRL_CMIC_ANA			= 1 << 3,
 	NVME_CTRL_ONCS_COMPARE			= 1 << 0,
-- 
cgit v1.2.3


From cdd591fc86e38ad3899196066219fbbd845f3162 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Jul 2021 17:26:28 +0200
Subject: iov_iter: Introduce fault_in_iov_iter_writeable

Introduce a new fault_in_iov_iter_writeable helper for safely faulting
in an iterator for writing.  Uses get_user_pages() to fault in the pages
without actually writing to them, which would be destructive.

We'll use fault_in_iov_iter_writeable in gfs2 once we've determined that
the iterator passed to .read_iter isn't in memory.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 include/linux/pagemap.h |  1 +
 include/linux/uio.h     |  1 +
 lib/iov_iter.c          | 39 ++++++++++++++++++++++++++++++
 mm/gup.c                | 63 +++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9fe94f7a4f7e..2f7dd14083d9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -736,6 +736,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
  * Fault in userspace address range.
  */
 size_t fault_in_writeable(char __user *uaddr, size_t size);
+size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
 size_t fault_in_readable(const char __user *uaddr, size_t size);
 
 int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index d18458af6681..25d1c24fd829 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -134,6 +134,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t bytes);
+size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ce3d4f610626..ac9a87e727a3 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -467,6 +467,45 @@ size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
 }
 EXPORT_SYMBOL(fault_in_iov_iter_readable);
 
+/*
+ * fault_in_iov_iter_writeable - fault in iov iterator for writing
+ * @i: iterator
+ * @size: maximum length
+ *
+ * Faults in the iterator using get_user_pages(), i.e., without triggering
+ * hardware page faults.  This is primarily useful when we already know that
+ * some or all of the pages in @i aren't in memory.
+ *
+ * Returns the number of bytes not faulted in, like copy_to_user() and
+ * copy_from_user().
+ *
+ * Always returns 0 for non-user-space iterators.
+ */
+size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
+{
+	if (iter_is_iovec(i)) {
+		size_t count = min(size, iov_iter_count(i));
+		const struct iovec *p;
+		size_t skip;
+
+		size -= count;
+		for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
+			size_t len = min(count, p->iov_len - skip);
+			size_t ret;
+
+			if (unlikely(!len))
+				continue;
+			ret = fault_in_safe_writeable(p->iov_base + skip, len);
+			count -= len - ret;
+			if (ret)
+				break;
+		}
+		return count + size;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(fault_in_iov_iter_writeable);
+
 void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
diff --git a/mm/gup.c b/mm/gup.c
index a7efb027d6cf..795f15c410cc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1691,6 +1691,69 @@ out:
 }
 EXPORT_SYMBOL(fault_in_writeable);
 
+/*
+ * fault_in_safe_writeable - fault in an address range for writing
+ * @uaddr: start of address range
+ * @size: length of address range
+ *
+ * Faults in an address range using get_user_pages, i.e., without triggering
+ * hardware page faults.  This is primarily useful when we already know that
+ * some or all of the pages in the address range aren't in memory.
+ *
+ * Other than fault_in_writeable(), this function is non-destructive.
+ *
+ * Note that we don't pin or otherwise hold the pages referenced that we fault
+ * in.  There's no guarantee that they'll stay in memory for any duration of
+ * time.
+ *
+ * Returns the number of bytes not faulted in, like copy_to_user() and
+ * copy_from_user().
+ */
+size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
+{
+	unsigned long start = (unsigned long)untagged_addr(uaddr);
+	unsigned long end, nstart, nend;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma = NULL;
+	int locked = 0;
+
+	nstart = start & PAGE_MASK;
+	end = PAGE_ALIGN(start + size);
+	if (end < nstart)
+		end = 0;
+	for (; nstart != end; nstart = nend) {
+		unsigned long nr_pages;
+		long ret;
+
+		if (!locked) {
+			locked = 1;
+			mmap_read_lock(mm);
+			vma = find_vma(mm, nstart);
+		} else if (nstart >= vma->vm_end)
+			vma = vma->vm_next;
+		if (!vma || vma->vm_start >= end)
+			break;
+		nend = end ? min(end, vma->vm_end) : vma->vm_end;
+		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
+			continue;
+		if (nstart < vma->vm_start)
+			nstart = vma->vm_start;
+		nr_pages = (nend - nstart) / PAGE_SIZE;
+		ret = __get_user_pages_locked(mm, nstart, nr_pages,
+					      NULL, NULL, &locked,
+					      FOLL_TOUCH | FOLL_WRITE);
+		if (ret <= 0)
+			break;
+		nend = nstart + ret * PAGE_SIZE;
+	}
+	if (locked)
+		mmap_read_unlock(mm);
+	if (nstart == end)
+		return 0;
+	return size - min_t(size_t, nstart - start, size);
+}
+EXPORT_SYMBOL(fault_in_safe_writeable);
+
 /**
  * fault_in_readable - fault in userspace address range for reading
  * @uaddr: start of user address range
-- 
cgit v1.2.3


From a164ff53cbd34479aeac3366840669b10845ce53 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 14 Oct 2021 16:47:54 +0300
Subject: driver core: Provide device_match_acpi_handle() helper

We have a couple of users of this helper, make it available for them.

The prototype for the helper is specifically crafted in order to be
easily used with bus_find_device() call. That's why its location is
in the driver core rather than ACPI.

Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20211014134756.39092-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c        | 6 ++++++
 include/linux/device/bus.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index b67ebe6a323c..fd034d742447 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -4838,6 +4838,12 @@ int device_match_acpi_dev(struct device *dev, const void *adev)
 }
 EXPORT_SYMBOL(device_match_acpi_dev);
 
+int device_match_acpi_handle(struct device *dev, const void *handle)
+{
+	return ACPI_HANDLE(dev) == handle;
+}
+EXPORT_SYMBOL(device_match_acpi_handle);
+
 int device_match_any(struct device *dev, const void *unused)
 {
 	return 1;
diff --git a/include/linux/device/bus.h b/include/linux/device/bus.h
index 062777a45a74..a039ab809753 100644
--- a/include/linux/device/bus.h
+++ b/include/linux/device/bus.h
@@ -143,6 +143,7 @@ int device_match_of_node(struct device *dev, const void *np);
 int device_match_fwnode(struct device *dev, const void *fwnode);
 int device_match_devt(struct device *dev, const void *pdevt);
 int device_match_acpi_dev(struct device *dev, const void *adev);
+int device_match_acpi_handle(struct device *dev, const void *handle);
 int device_match_any(struct device *dev, const void *unused);
 
 /* iterator helpers for buses */
-- 
cgit v1.2.3


From 14fe2471c62816ba82546fb68369d957c3a58b59 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Thu, 7 Oct 2021 16:05:38 +0300
Subject: net/mlx5: Lag, change multipath and bonding to be mutually exclusive

Both multipath and bonding events are changing the HW LAG state
independently.
Handling one of the features events while the other is already
enabled can cause unwanted behavior, for example handling
bonding event while multipath enabled will disable the lag and
cause multipath to stop working.

Fix it by ignoring bonding event while in multipath and ignoring FIB
events while in bonding mode.

Fixes: 544fe7c2e654 ("net/mlx5e: Activate HW multipath and handle port affinity based on FIB events")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/lag.c       |  4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c    | 13 ++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h    |  2 ++
 include/linux/mlx5/driver.h                         |  1 -
 6 files changed, 18 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index b4e986818794..4a13ef561587 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -10,6 +10,8 @@
 #include "en_tc.h"
 #include "rep/tc.h"
 #include "rep/neigh.h"
+#include "lag.h"
+#include "lag_mp.h"
 
 struct mlx5e_tc_tun_route_attr {
 	struct net_device *out_dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ba8164792016..129ff7e0d65c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -67,6 +67,8 @@
 #include "lib/fs_chains.h"
 #include "diag/en_tc_tracepoint.h"
 #include <asm/div64.h>
+#include "lag.h"
+#include "lag_mp.h"
 
 #define nic_chains(priv) ((priv)->fs.tc.chains)
 #define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index ca5690b0a7ab..d2105c1635c3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -442,6 +442,10 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	if (!mlx5_lag_is_ready(ldev)) {
 		do_bond = false;
 	} else {
+		/* VF LAG is in multipath mode, ignore bond change requests */
+		if (mlx5_lag_is_multipath(dev0))
+			return;
+
 		tracker = ldev->tracker;
 
 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index f239b352a58a..21fdaf708f1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -9,20 +9,23 @@
 #include "eswitch.h"
 #include "lib/mlx5.h"
 
+static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
+{
+	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
+}
+
 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
 {
 	if (!mlx5_lag_is_ready(ldev))
 		return false;
 
+	if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
+		return false;
+
 	return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
 					 ldev->pf[MLX5_LAG_P2].dev);
 }
 
-static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
-{
-	return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
-}
-
 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
 {
 	struct mlx5_lag *ldev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
index 729c839397a8..dea199e79bed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h
@@ -24,12 +24,14 @@ struct lag_mp {
 void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
 int mlx5_lag_mp_init(struct mlx5_lag *ldev);
 void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 
 #else /* CONFIG_MLX5_ESWITCH */
 
 static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
 static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
 static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}
+bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { return false; }
 
 #endif /* CONFIG_MLX5_ESWITCH */
 #endif /* __MLX5_LAG_MP_H__ */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e23417424373..f17d2101af7a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1138,7 +1138,6 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev);
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
-bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From f1985002839af80d6c84e9537834a81fb1364d6e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 20 Oct 2021 09:33:21 +0100
Subject: irqchip: Provide stronger type checking for
 IRQCHIP_MATCH/IRQCHIP_DECLARE

Both IRQCHIP_DECLARE() and IRQCHIP_MATCH() use an underlying of_device_id()
structure to encode the matching property and the init callback.
However, this callback is stored in as a void * pointer, which obviously
defeat any attempt at stronger type checking.

Work around this by providing a new macro that builds on top of the
__typecheck() primitive, and that can be used to warn when there is
a discrepency between the drivers and core code.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211020104527.3066268-1-maz@kernel.org
---
 include/linux/irqchip.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 67351aac65ef..5de0dfc5d64d 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -14,8 +14,15 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 
+/* Undefined on purpose */
+extern of_irq_init_cb_t typecheck_irq_init_cb;
+
+#define typecheck_irq_init_cb(fn)					\
+	(__typecheck(typecheck_irq_init_cb, &fn) ? fn : fn)
+
 /*
  * This macro must be used by the different irqchip drivers to declare
  * the association between their DT compatible string and their
@@ -26,14 +33,16 @@
  * @compstr: compatible string of the irqchip driver
  * @fn: initialization function
  */
-#define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn)
+#define IRQCHIP_DECLARE(name, compat, fn)	\
+	OF_DECLARE_2(irqchip, name, compat, typecheck_irq_init_cb(fn))
 
 extern int platform_irqchip_probe(struct platform_device *pdev);
 
 #define IRQCHIP_PLATFORM_DRIVER_BEGIN(drv_name) \
 static const struct of_device_id drv_name##_irqchip_match_table[] = {
 
-#define IRQCHIP_MATCH(compat, fn) { .compatible = compat, .data = fn },
+#define IRQCHIP_MATCH(compat, fn) { .compatible = compat,		\
+				    .data = typecheck_irq_init_cb(fn), },
 
 #define IRQCHIP_PLATFORM_DRIVER_END(drv_name)				\
 	{},								\
-- 
cgit v1.2.3


From 795e92ec5fd79027648bd7f779d34bad5b6f2f55 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 6 Oct 2021 11:43:21 -0500
Subject: of: Add of_get_cpu_hwid() to read hardware ID from CPU nodes

There are various open coded implementions parsing the CPU node 'reg'
property which contains the CPU's hardware ID. Introduce a new function,
of_get_cpu_hwid(), to read the hardware ID.

All the callers should be DT only code, so no need for an empty
function.

Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20211006164332.1981454-2-robh@kernel.org
---
 drivers/of/base.c  | 22 ++++++++++++++++++++++
 include/linux/of.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index f720c0d246f2..e587ab44be22 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -286,6 +286,28 @@ const void *of_get_property(const struct device_node *np, const char *name,
 }
 EXPORT_SYMBOL(of_get_property);
 
+/**
+ * of_get_cpu_hwid - Get the hardware ID from a CPU device node
+ *
+ * @cpun: CPU number(logical index) for which device node is required
+ * @thread: The local thread number to get the hardware ID for.
+ *
+ * Return: The hardware ID for the CPU node or ~0ULL if not found.
+ */
+u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread)
+{
+	const __be32 *cell;
+	int ac, len;
+
+	ac = of_n_addr_cells(cpun);
+	cell = of_get_property(cpun, "reg", &len);
+	if (!cell || !ac || ((sizeof(*cell) * ac * (thread + 1)) > len))
+		return ~0ULL;
+
+	cell += ac * thread;
+	return of_read_number(cell, ac);
+}
+
 /*
  * arch_match_cpu_phys_id - Match the given logical CPU and physical id
  *
diff --git a/include/linux/of.h b/include/linux/of.h
index 6f1c41f109bb..807f8168dad9 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -353,6 +353,7 @@ extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
 extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
 extern struct device_node *of_get_cpu_state_node(struct device_node *cpu_node,
 						 int index);
+extern u64 of_get_cpu_hwid(struct device_node *cpun, unsigned int thread);
 
 #define for_each_property_of_node(dn, pp) \
 	for (pp = dn->properties; pp != NULL; pp = pp->next)
-- 
cgit v1.2.3


From a3c85b2ee098c4a293148fab4eb96299e92b9524 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Thu, 14 Oct 2021 12:30:55 -0500
Subject: of: make of_node_check_flag() device_node parameter const

The device_node argument isn't modified by of_node_check_flag(), so mark it
const.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Link: https://lore.kernel.org/r/20211014173055.2117872-1-nathanl@linux.ibm.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 807f8168dad9..ff143a027abc 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -185,7 +185,7 @@ static inline bool of_node_is_root(const struct device_node *node)
 	return node && (node->parent == NULL);
 }
 
-static inline int of_node_check_flag(struct device_node *n, unsigned long flag)
+static inline int of_node_check_flag(const struct device_node *n, unsigned long flag)
 {
 	return test_bit(flag, &n->_flags);
 }
-- 
cgit v1.2.3


From b8419e7be6c6029eee3448fda45f4f9ad340c4ca Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Oct 2021 11:48:59 -0700
Subject: irqchip: Fix kernel-doc parameter typo for IRQCHIP_DECLARE

The documentation refers to "compstr" when we have the parameter named
"compat", fix the typo.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211020184859.2705451-14-f.fainelli@gmail.com
---
 include/linux/irqchip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 5de0dfc5d64d..7f007b9c23f8 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -30,7 +30,7 @@ extern of_irq_init_cb_t typecheck_irq_init_cb;
  *
  * @name: name that must be unique across all IRQCHIP_DECLARE of the
  * same file.
- * @compstr: compatible string of the irqchip driver
+ * @compat: compatible string of the irqchip driver
  * @fn: initialization function
  */
 #define IRQCHIP_DECLARE(name, compat, fn)	\
-- 
cgit v1.2.3


From 133a48abf6ecc535d7eddc6da1c3e4c972445882 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 4 Oct 2021 15:37:42 -0400
Subject: NFS: Fix up commit deadlocks

If O_DIRECT bumps the commit_info rpcs_out field, then that could lead
to fsync() hangs. The fix is to ensure that O_DIRECT calls
nfs_commit_end().

Fixes: 723c921e7dfc ("sched/wait, fs/nfs: Convert wait_on_atomic_t() usage to the new wait_var_event() API")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/direct.c        | 2 +-
 fs/nfs/pnfs_nfs.c      | 2 --
 fs/nfs/write.c         | 9 ++++++---
 include/linux/nfs_fs.h | 1 +
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2e894fec036b..3c0335c15a73 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -620,7 +620,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
 		nfs_unlock_and_release_request(req);
 	}
 
-	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
+	if (nfs_commit_end(cinfo.mds))
 		nfs_direct_write_complete(dreq);
 }
 
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 02bd6e83961d..316f68f96e57 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list,
 				goto out_error;
 			data->ds_commit_index = i;
 			list_add_tail(&data->list, list);
-			atomic_inc(&cinfo->mds->rpcs_out);
 			nreq++;
 		}
 		mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
@@ -520,7 +519,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
 		data->ds_commit_index = -1;
 		list_splice_init(mds_pages, &data->pages);
 		list_add_tail(&data->list, &list);
-		atomic_inc(&cinfo->mds->rpcs_out);
 		nreq++;
 	}
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 38f181e1343a..465220f47142 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1673,10 +1673,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
 	atomic_inc(&cinfo->rpcs_out);
 }
 
-static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
+bool nfs_commit_end(struct nfs_mds_commit_info *cinfo)
 {
-	if (atomic_dec_and_test(&cinfo->rpcs_out))
+	if (atomic_dec_and_test(&cinfo->rpcs_out)) {
 		wake_up_var(&cinfo->rpcs_out);
+		return true;
+	}
+	return false;
 }
 
 void nfs_commitdata_release(struct nfs_commit_data *data)
@@ -1776,6 +1779,7 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
 	nfs_fattr_init(&data->fattr);
+	nfs_commit_begin(cinfo->mds);
 }
 EXPORT_SYMBOL_GPL(nfs_init_commit);
 
@@ -1822,7 +1826,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
 
 	/* Set up the argument struct */
 	nfs_init_commit(data, head, NULL, cinfo);
-	atomic_inc(&cinfo->mds->rpcs_out);
 	if (NFS_SERVER(inode)->nfs_client->cl_minorversion)
 		task_flags = RPC_TASK_MOVEABLE;
 	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index ca547cc5458c..e58b78da7a98 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -568,6 +568,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
 extern void nfs_commit_free(struct nfs_commit_data *data);
+bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
 
 static inline int
 nfs_have_writebacks(struct inode *inode)
-- 
cgit v1.2.3


From 0ebeebcf59601bcfa0284f4bb7abdec051eb856d Mon Sep 17 00:00:00 2001
From: Dave Wysochanski <dwysocha@redhat.com>
Date: Sun, 10 Oct 2021 18:23:13 -0400
Subject: NFS: Fix WARN_ON due to unionization of nfs_inode.nrequests

Fixes the following WARN_ON
WARNING: CPU: 2 PID: 18678 at fs/nfs/inode.c:123 nfs_clear_inode+0x3b/0x50 [nfs]
...
Call Trace:
  nfs4_evict_inode+0x57/0x70 [nfsv4]
  evict+0xd1/0x180
  dispose_list+0x48/0x60
  evict_inodes+0x156/0x190
  generic_shutdown_super+0x37/0x110
  nfs_kill_super+0x1d/0x40 [nfs]
  deactivate_locked_super+0x36/0xa0

Signed-off-by: Dave Wysochanski <dwysocha@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_fs.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index e58b78da7a98..457b866a2d9e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -573,7 +573,9 @@ bool nfs_commit_end(struct nfs_mds_commit_info *cinfo);
 static inline int
 nfs_have_writebacks(struct inode *inode)
 {
-	return atomic_long_read(&NFS_I(inode)->nrequests) != 0;
+	if (S_ISREG(inode->i_mode))
+		return atomic_long_read(&NFS_I(inode)->nrequests) != 0;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3


From e591b298d7ecb851e200f65946e3d53fe78a3c4f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 28 Sep 2021 17:41:41 -0400
Subject: NFS: Save some space in the inode

Save some space in the nfs_inode by setting up an anonymous union with
the fields that are peculiar to a specific type of filesystem object.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/inode.c         | 26 ++++++++++++++++++--------
 include/linux/nfs_fs.h | 42 ++++++++++++++++++++++++------------------
 2 files changed, 42 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a10572f278e6..b81b2d2f47ad 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -431,6 +431,22 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
 	return inode;
 }
 
+static void nfs_inode_init_regular(struct nfs_inode *nfsi)
+{
+	atomic_long_set(&nfsi->nrequests, 0);
+	INIT_LIST_HEAD(&nfsi->commit_info.list);
+	atomic_long_set(&nfsi->commit_info.ncommit, 0);
+	atomic_set(&nfsi->commit_info.rpcs_out, 0);
+	mutex_init(&nfsi->commit_mutex);
+}
+
+static void nfs_inode_init_dir(struct nfs_inode *nfsi)
+{
+	nfsi->cache_change_attribute = 0;
+	memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+	init_rwsem(&nfsi->rmdir_sem);
+}
+
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
@@ -485,10 +501,12 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		if (S_ISREG(inode->i_mode)) {
 			inode->i_fop = NFS_SB(sb)->nfs_client->rpc_ops->file_ops;
 			inode->i_data.a_ops = &nfs_file_aops;
+			nfs_inode_init_regular(nfsi);
 		} else if (S_ISDIR(inode->i_mode)) {
 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
 			inode->i_data.a_ops = &nfs_dir_aops;
+			nfs_inode_init_dir(nfsi);
 			/* Deal with crossing mountpoints */
 			if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
 					fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
@@ -514,7 +532,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 		inode->i_uid = make_kuid(&init_user_ns, -2);
 		inode->i_gid = make_kgid(&init_user_ns, -2);
 		inode->i_blocks = 0;
-		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
 		nfsi->write_io = 0;
 		nfsi->read_io = 0;
 
@@ -2259,14 +2276,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
-	INIT_LIST_HEAD(&nfsi->commit_info.list);
-	atomic_long_set(&nfsi->nrequests, 0);
-	atomic_long_set(&nfsi->commit_info.ncommit, 0);
-	atomic_set(&nfsi->commit_info.rpcs_out, 0);
-	init_rwsem(&nfsi->rmdir_sem);
-	mutex_init(&nfsi->commit_mutex);
 	nfs4_init_once(nfsi);
-	nfsi->cache_change_attribute = 0;
 }
 
 static int __init nfs_init_inodecache(void)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 457b866a2d9e..739ca1ef934f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -155,33 +155,39 @@ struct nfs_inode {
 	unsigned long		attrtimeo_timestamp;
 
 	unsigned long		attr_gencount;
-	/* "Generation counter" for the attribute cache. This is
-	 * bumped whenever we update the metadata on the
-	 * server.
-	 */
-	unsigned long		cache_change_attribute;
 
 	struct rb_root		access_cache;
 	struct list_head	access_cache_entry_lru;
 	struct list_head	access_cache_inode_lru;
 
-	/*
-	 * This is the cookie verifier used for NFSv3 readdir
-	 * operations
-	 */
-	__be32			cookieverf[NFS_DIR_VERIFIER_SIZE];
-
-	atomic_long_t		nrequests;
-	struct nfs_mds_commit_info commit_info;
+	union {
+		/* Directory */
+		struct {
+			/* "Generation counter" for the attribute cache.
+			 * This is bumped whenever we update the metadata
+			 * on the server.
+			 */
+			unsigned long	cache_change_attribute;
+			/*
+			 * This is the cookie verifier used for NFSv3 readdir
+			 * operations
+			 */
+			__be32		cookieverf[NFS_DIR_VERIFIER_SIZE];
+			/* Readers: in-flight sillydelete RPC calls */
+			/* Writers: rmdir */
+			struct rw_semaphore	rmdir_sem;
+		};
+		/* Regular file */
+		struct {
+			atomic_long_t	nrequests;
+			struct nfs_mds_commit_info commit_info;
+			struct mutex	commit_mutex;
+		};
+	};
 
 	/* Open contexts for shared mmap writes */
 	struct list_head	open_files;
 
-	/* Readers: in-flight sillydelete RPC calls */
-	/* Writers: rmdir */
-	struct rw_semaphore	rmdir_sem;
-	struct mutex		commit_mutex;
-
 #if IS_ENABLED(CONFIG_NFS_V4)
 	struct nfs4_cached_acl	*nfs4_acl;
         /* NFSv4 state */
-- 
cgit v1.2.3


From 0c0593b45c9b4e5b212ffb3fb28bb8d3c0ec0dc8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Fri, 8 Oct 2021 11:13:31 +0200
Subject: x86/ftrace: Make function graph use ftrace directly

We don't need special hook for graph tracer entry point,
but instead we can use graph_ops::func function to install
the return_hooker.

This moves the graph tracing setup _before_ the direct
trampoline prepares the stack, so the return_hooker will
be called when the direct trampoline is finished.

This simplifies the code, because we don't need to take into
account the direct trampoline setup when preparing the graph
tracer hooker and we can allow function graph tracer on entries
registered with direct trampoline.

Link: https://lkml.kernel.org/r/20211008091336.33616-4-jolsa@kernel.org

[fixed compile error reported by kernel test robot <lkp@intel.com>]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h |  9 +++++++--
 arch/x86/kernel/ftrace.c      | 37 ++++++++++++++++++++++++++++++++++---
 arch/x86/kernel/ftrace_64.S   | 29 +----------------------------
 include/linux/ftrace.h        |  9 +++++++++
 kernel/trace/fgraph.c         |  6 ++++--
 5 files changed, 55 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9f3130f40807..024d9797646e 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -57,6 +57,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
 
 #define ftrace_instruction_pointer_set(fregs, _ip)	\
 	do { (fregs)->regs.ip = (_ip); } while (0)
+
+struct ftrace_ops;
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#else
+#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
 #endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -65,8 +72,6 @@ struct dyn_arch_ftrace {
 	/* No extra data needed for x86 */
 };
 
-#define FTRACE_GRAPH_TRAMP_ADDR FTRACE_GRAPH_ADDR
-
 #endif /*  CONFIG_DYNAMIC_FTRACE */
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 7f12eacdf1ae..c39f906cdc4e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -522,7 +522,7 @@ static void *addr_from_call(void *ptr)
 	return ptr + CALL_INSN_SIZE + call.disp;
 }
 
-void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
 			   unsigned long frame_pointer);
 
 /*
@@ -536,7 +536,8 @@ static void *static_tramp_func(struct ftrace_ops *ops, struct dyn_ftrace *rec)
 	void *ptr;
 
 	if (ops && ops->trampoline) {
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) && \
+	defined(CONFIG_FUNCTION_GRAPH_TRACER)
 		/*
 		 * We only know about function graph tracer setting as static
 		 * trampoline.
@@ -584,8 +585,9 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-extern void ftrace_graph_call(void);
 
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+extern void ftrace_graph_call(void);
 static const char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 {
 	return text_gen_insn(JMP32_INSN_OPCODE, (void *)ip, (void *)addr);
@@ -613,7 +615,17 @@ int ftrace_disable_ftrace_graph_caller(void)
 
 	return ftrace_mod_jmp(ip, &ftrace_stub);
 }
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return 0;
+}
 
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return 0;
+}
+#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 #endif /* !CONFIG_DYNAMIC_FTRACE */
 
 /*
@@ -624,6 +636,7 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
 			   unsigned long frame_pointer)
 {
 	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	int bit;
 
 	/*
 	 * When resuming from suspend-to-ram, this function can be indirectly
@@ -643,7 +656,25 @@ void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		return;
 
+	bit = ftrace_test_recursion_trylock(ip, *parent);
+	if (bit < 0)
+		return;
+
 	if (!function_graph_enter(*parent, ip, frame_pointer, parent))
 		*parent = return_hooker;
+
+	ftrace_test_recursion_unlock(bit);
+}
+
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	struct pt_regs *regs = &fregs->regs;
+	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
+
+	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
 }
+#endif
+
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index a8eb084a7a9a..7a879901f103 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -174,11 +174,6 @@ SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
 SYM_FUNC_END(ftrace_caller);
 
 SYM_FUNC_START(ftrace_epilogue)
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
-	jmp ftrace_stub
-#endif
-
 /*
  * This is weak to keep gas from relaxing the jumps.
  * It is also used to copy the retq for trampolines.
@@ -288,15 +283,6 @@ SYM_FUNC_START(__fentry__)
 	cmpq $ftrace_stub, ftrace_trace_function
 	jnz trace
 
-fgraph_trace:
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-	cmpq $ftrace_stub, ftrace_graph_return
-	jnz ftrace_graph_caller
-
-	cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
-	jnz ftrace_graph_caller
-#endif
-
 SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
 	retq
 
@@ -314,25 +300,12 @@ trace:
 	CALL_NOSPEC r8
 	restore_mcount_regs
 
-	jmp fgraph_trace
+	jmp ftrace_stub
 SYM_FUNC_END(__fentry__)
 EXPORT_SYMBOL(__fentry__)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_FUNC_START(ftrace_graph_caller)
-	/* Saves rbp into %rdx and fills first parameter  */
-	save_mcount_regs
-
-	leaq MCOUNT_REG_SIZE+8(%rsp), %rsi
-	movq $0, %rdx	/* No framepointers needed */
-	call	prepare_ftrace_return
-
-	restore_mcount_regs
-
-	retq
-SYM_FUNC_END(ftrace_graph_caller)
-
 SYM_FUNC_START(return_to_handler)
 	subq  $24, %rsp
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 12fcfa2d23ea..16a7baaba702 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -803,6 +803,15 @@ static inline bool is_ftrace_trampoline(unsigned long addr)
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+#ifndef ftrace_graph_func
+#define ftrace_graph_func ftrace_stub
+#define FTRACE_OPS_GRAPH_STUB FTRACE_OPS_FL_STUB
+#else
+#define FTRACE_OPS_GRAPH_STUB 0
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 /* totally disable ftrace - can not re-enable after this */
 void ftrace_kill(void);
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index b8a0d1d564fb..22061d38fc00 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -115,6 +115,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 {
 	struct ftrace_graph_ent trace;
 
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 	/*
 	 * Skip graph tracing if the return location is served by direct trampoline,
 	 * since call sequence and return addresses are unpredictable anyway.
@@ -124,6 +125,7 @@ int function_graph_enter(unsigned long ret, unsigned long func,
 	if (ftrace_direct_func_count &&
 	    ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE))
 		return -EBUSY;
+#endif
 	trace.func = func;
 	trace.depth = ++current->curr_ret_depth;
 
@@ -333,10 +335,10 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 
 static struct ftrace_ops graph_ops = {
-	.func			= ftrace_stub,
+	.func			= ftrace_graph_func,
 	.flags			= FTRACE_OPS_FL_INITIALIZED |
 				   FTRACE_OPS_FL_PID |
-				   FTRACE_OPS_FL_STUB,
+				   FTRACE_OPS_GRAPH_STUB,
 #ifdef FTRACE_GRAPH_TRAMP_ADDR
 	.trampoline		= FTRACE_GRAPH_TRAMP_ADDR,
 	/* trampoline_size is only needed for dynamically allocated tramps */
-- 
cgit v1.2.3


From e967b60eb51116d2347093655e555aa036dd40d8 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:46 +0200
Subject: mfd: ti_am335x_tscadc: Clarify the maximum values for DT entries

Clearly define the maximum open delay and sample delay. Use these
definitions in place of a mask (which works because this is the first
field in the register) and an open-coded value. While at it reword a
little bit the error messages to make them look clearer and similar.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-29-miquel.raynal@bootlin.com
---
 drivers/iio/adc/ti_am335x_adc.c      | 18 +++++++++---------
 include/linux/mfd/ti_am335x_tscadc.h |  2 ++
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 3dec115e68ee..a241e6fa3564 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -126,7 +126,7 @@ static void tiadc_step_config(struct iio_dev *indio_dev)
 		chan = adc_dev->channel_line[i];
 
 		if (adc_dev->step_avg[i] > STEPCONFIG_AVG_16) {
-			dev_warn(dev, "chan %d step_avg truncating to %ld\n",
+			dev_warn(dev, "chan %d: wrong step avg, truncated to %ld\n",
 				 chan, STEPCONFIG_AVG_16);
 			adc_dev->step_avg[i] = STEPCONFIG_AVG_16;
 		}
@@ -147,16 +147,16 @@ static void tiadc_step_config(struct iio_dev *indio_dev)
 				STEPCONFIG_RFP_VREFP |
 				STEPCONFIG_RFM_VREFN);
 
-		if (adc_dev->open_delay[i] > STEPDELAY_OPEN_MASK) {
-			dev_warn(dev, "chan %d open delay truncating to 0x3FFFF\n",
-				 chan);
-			adc_dev->open_delay[i] = STEPDELAY_OPEN_MASK;
+		if (adc_dev->open_delay[i] > STEPCONFIG_MAX_OPENDLY) {
+			dev_warn(dev, "chan %d: wrong open delay, truncated to 0x%lX\n",
+				 chan, STEPCONFIG_MAX_OPENDLY);
+			adc_dev->open_delay[i] = STEPCONFIG_MAX_OPENDLY;
 		}
 
-		if (adc_dev->sample_delay[i] > 0xFF) {
-			dev_warn(dev, "chan %d sample delay truncating to 0xFF\n",
-				 chan);
-			adc_dev->sample_delay[i] = 0xFF;
+		if (adc_dev->sample_delay[i] > STEPCONFIG_MAX_SAMPLE) {
+			dev_warn(dev, "chan %d: wrong sample delay, truncated to 0x%lX\n",
+				 chan, STEPCONFIG_MAX_SAMPLE);
+			adc_dev->sample_delay[i] = STEPCONFIG_MAX_SAMPLE;
 		}
 
 		tiadc_writel(adc_dev, REG_STEPDELAY(steps),
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index ae694fa2d711..31cffb6e8b17 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -84,7 +84,9 @@
 #define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
 #define STEPDELAY_SAMPLE_MASK	GENMASK(31, 24)
 #define STEPDELAY_SAMPLE(val)	FIELD_PREP(STEPDELAY_SAMPLE_MASK, (val))
+#define STEPCONFIG_MAX_OPENDLY	GENMASK(17, 0)
 #define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
+#define STEPCONFIG_MAX_SAMPLE	GENMASK(7, 0)
 
 /* Charge Config */
 #define STEPCHARGE_RFP_MASK	GENMASK(14, 12)
-- 
cgit v1.2.3


From 0fd12262613116a38fd76aefa396e9bc116fbfe2 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:47 +0200
Subject: mfd: ti_am335x_tscadc: Drop useless definitions from the header

Drop useless definitions from the header like the "masks" definitions
which are only used by the following definition.

It could be possible to got even further by removing these definitions
entirely and use FIELD_PREP() macros from the code directly, but while I
have no troubles making these changes in the header, changing the values
in the code directly could darkening a bit the logic and
hardening future git-blames for very little added value IMHO (but this
is of course a personal taste).

Certain macros are using GENMASK() to define the value of a particular
field, while this is purely "by chance" that the value and the mask have
the same value. In this case, drop the "mask" definition, use
FIELD_PREP() and GENMASK() in the macro defining the field, and use the
new macro to define the particular value by feeding directly the actual
number advertised in the datasheet into that macro, as in:
	-#define STEPCONFIG_RFM_VREFN   GENMASK(24, 23)
	-#define STEPCONFIG_RFM(val)    FIELD_PREP(STEPCONFIG_RFM_VREFN, (val))
	+#define STEPCONFIG_RFM(val)    FIELD_PREP(GENMASK(24, 23), (val))
	+#define STEPCONFIG_RFM_VREFN   STEPCONFIG_RFM(3)

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-30-miquel.raynal@bootlin.com
---
 include/linux/mfd/ti_am335x_tscadc.h | 44 +++++++++++++-----------------------
 1 file changed, 16 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 31cffb6e8b17..51d987080cd3 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -52,12 +52,10 @@
 #define IRQENB_PENUP		BIT(9)
 
 /* Step Configuration */
-#define STEPCONFIG_MODE_MASK	GENMASK(1, 0)
-#define STEPCONFIG_MODE(val)	FIELD_PREP(STEPCONFIG_MODE_MASK, (val))
+#define STEPCONFIG_MODE(val)	FIELD_PREP(GENMASK(1, 0), (val))
 #define STEPCONFIG_MODE_SWCNT	STEPCONFIG_MODE(1)
 #define STEPCONFIG_MODE_HWSYNC	STEPCONFIG_MODE(2)
-#define STEPCONFIG_AVG_MASK	GENMASK(4, 2)
-#define STEPCONFIG_AVG(val)	FIELD_PREP(STEPCONFIG_AVG_MASK, (val))
+#define STEPCONFIG_AVG(val)	FIELD_PREP(GENMASK(4, 2), (val))
 #define STEPCONFIG_AVG_16	STEPCONFIG_AVG(4)
 #define STEPCONFIG_XPP		BIT(5)
 #define STEPCONFIG_XNN		BIT(6)
@@ -65,45 +63,36 @@
 #define STEPCONFIG_YNN		BIT(8)
 #define STEPCONFIG_XNP		BIT(9)
 #define STEPCONFIG_YPN		BIT(10)
-#define STEPCONFIG_RFP_VREFP	GENMASK(13, 12)
-#define STEPCONFIG_RFP(val)	FIELD_PREP(STEPCONFIG_RFP_VREFP, (val))
-#define STEPCONFIG_INM_MASK	GENMASK(18, 15)
-#define STEPCONFIG_INM(val)	FIELD_PREP(STEPCONFIG_INM_MASK, (val))
+#define STEPCONFIG_RFP(val)	FIELD_PREP(GENMASK(13, 12), (val))
+#define STEPCONFIG_RFP_VREFP	STEPCONFIG_RFP(3)
+#define STEPCONFIG_INM(val)	FIELD_PREP(GENMASK(18, 15), (val))
 #define STEPCONFIG_INM_ADCREFM	STEPCONFIG_INM(8)
-#define STEPCONFIG_INP_MASK	GENMASK(22, 19)
-#define STEPCONFIG_INP(val)	FIELD_PREP(STEPCONFIG_INP_MASK, (val))
+#define STEPCONFIG_INP(val)	FIELD_PREP(GENMASK(22, 19), (val))
 #define STEPCONFIG_INP_AN4	STEPCONFIG_INP(4)
 #define STEPCONFIG_INP_ADCREFM	STEPCONFIG_INP(8)
 #define STEPCONFIG_FIFO1	BIT(26)
-#define STEPCONFIG_RFM_VREFN	GENMASK(24, 23)
-#define STEPCONFIG_RFM(val)	FIELD_PREP(STEPCONFIG_RFM_VREFN, (val))
+#define STEPCONFIG_RFM(val)	FIELD_PREP(GENMASK(24, 23), (val))
+#define STEPCONFIG_RFM_VREFN	STEPCONFIG_RFM(3)
 
 /* Delay register */
-#define STEPDELAY_OPEN_MASK	GENMASK(17, 0)
-#define STEPDELAY_OPEN(val)	FIELD_PREP(STEPDELAY_OPEN_MASK, (val))
+#define STEPDELAY_OPEN(val)	FIELD_PREP(GENMASK(17, 0), (val))
 #define STEPCONFIG_OPENDLY	STEPDELAY_OPEN(0x098)
-#define STEPDELAY_SAMPLE_MASK	GENMASK(31, 24)
-#define STEPDELAY_SAMPLE(val)	FIELD_PREP(STEPDELAY_SAMPLE_MASK, (val))
 #define STEPCONFIG_MAX_OPENDLY	GENMASK(17, 0)
+#define STEPDELAY_SAMPLE(val)	FIELD_PREP(GENMASK(31, 24), (val))
 #define STEPCONFIG_SAMPLEDLY	STEPDELAY_SAMPLE(0)
 #define STEPCONFIG_MAX_SAMPLE	GENMASK(7, 0)
 
 /* Charge Config */
-#define STEPCHARGE_RFP_MASK	GENMASK(14, 12)
-#define STEPCHARGE_RFP(val)	FIELD_PREP(STEPCHARGE_RFP_MASK, (val))
+#define STEPCHARGE_RFP(val)	FIELD_PREP(GENMASK(14, 12), (val))
 #define STEPCHARGE_RFP_XPUL	STEPCHARGE_RFP(1)
-#define STEPCHARGE_INM_MASK	GENMASK(18, 15)
-#define STEPCHARGE_INM(val)	FIELD_PREP(STEPCHARGE_INM_MASK, (val))
+#define STEPCHARGE_INM(val)	FIELD_PREP(GENMASK(18, 15), (val))
 #define STEPCHARGE_INM_AN1	STEPCHARGE_INM(1)
-#define STEPCHARGE_INP_MASK	GENMASK(22, 19)
-#define STEPCHARGE_INP(val)	FIELD_PREP(STEPCHARGE_INP_MASK, (val))
-#define STEPCHARGE_RFM_MASK	GENMASK(24, 23)
-#define STEPCHARGE_RFM(val)	FIELD_PREP(STEPCHARGE_RFM_MASK, (val))
+#define STEPCHARGE_INP(val)	FIELD_PREP(GENMASK(22, 19), (val))
+#define STEPCHARGE_RFM(val)	FIELD_PREP(GENMASK(24, 23), (val))
 #define STEPCHARGE_RFM_XNUR	STEPCHARGE_RFM(1)
 
 /* Charge delay */
-#define CHARGEDLY_OPEN_MASK	GENMASK(17, 0)
-#define CHARGEDLY_OPEN(val)	FIELD_PREP(CHARGEDLY_OPEN_MASK, (val))
+#define CHARGEDLY_OPEN(val)	FIELD_PREP(GENMASK(17, 0), (val))
 #define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(0x400)
 
 /* Control register */
@@ -111,8 +100,7 @@
 #define CNTRLREG_STEPID		BIT(1)
 #define CNTRLREG_STEPCONFIGWRT	BIT(2)
 #define CNTRLREG_POWERDOWN	BIT(4)
-#define CNTRLREG_AFE_CTRL_MASK	GENMASK(6, 5)
-#define CNTRLREG_AFE_CTRL(val)	FIELD_PREP(CNTRLREG_AFE_CTRL_MASK, (val))
+#define CNTRLREG_AFE_CTRL(val)	FIELD_PREP(GENMASK(6, 5), (val))
 #define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
 #define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
 #define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-- 
cgit v1.2.3


From c3e36b5d069241f3cbc0e647cf297c5a329cd7a6 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:48 +0200
Subject: mfd: ti_am335x_tscadc: Rename the subsystem enable macro

This bit is common to all devices (ADC, Touchscreen, Magnetic reader) so
make it clear that it can be used from any location by operating a
mechanical rename:
s/CNTRLREG_TSCSSENB/CNTRLREG_SSENB/

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-31-miquel.raynal@bootlin.com
---
 drivers/iio/adc/ti_am335x_adc.c      | 6 +++---
 drivers/mfd/ti_am335x_tscadc.c       | 6 +++---
 include/linux/mfd/ti_am335x_tscadc.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index a241e6fa3564..1cc2efbb2875 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -184,7 +184,7 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 	if (status & IRQENB_FIFO1OVRRUN) {
 		/* FIFO Overrun. Clear flag. Disable/Enable ADC to recover */
 		config = tiadc_readl(adc_dev, REG_CTRL);
-		config &= ~(CNTRLREG_TSCSSENB);
+		config &= ~(CNTRLREG_SSENB);
 		tiadc_writel(adc_dev, REG_CTRL, config);
 		tiadc_writel(adc_dev, REG_IRQSTATUS, IRQENB_FIFO1OVRRUN
 				| IRQENB_FIFO1UNDRFLW | IRQENB_FIFO1THRES);
@@ -197,7 +197,7 @@ static irqreturn_t tiadc_irq_h(int irq, void *private)
 			adc_fsm = tiadc_readl(adc_dev, REG_ADCFSM);
 		} while (adc_fsm != 0x10 && count++ < 100);
 
-		tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_TSCSSENB));
+		tiadc_writel(adc_dev, REG_CTRL, (config | CNTRLREG_SSENB));
 		return IRQ_HANDLED;
 	} else if (status & IRQENB_FIFO1THRES) {
 		/* Disable irq and wake worker thread */
@@ -671,7 +671,7 @@ static int __maybe_unused tiadc_suspend(struct device *dev)
 	unsigned int idle;
 
 	idle = tiadc_readl(adc_dev, REG_CTRL);
-	idle &= ~(CNTRLREG_TSCSSENB);
+	idle &= ~(CNTRLREG_SSENB);
 	tiadc_writel(adc_dev, REG_CTRL, (idle |
 			CNTRLREG_POWERDOWN));
 
diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index df4f905a7b52..a34ca378e82c 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -235,7 +235,7 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	tscadc_idle_config(tscadc);
 
 	/* Enable the TSC module enable bit */
-	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_TSCSSENB);
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_SSENB);
 
 	/* TSC Cell */
 	if (tsc_wires > 0) {
@@ -299,7 +299,7 @@ static int __maybe_unused tscadc_suspend(struct device *dev)
 
 		regmap_read(tscadc->regmap, REG_CTRL, &ctrl);
 		ctrl &= ~(CNTRLREG_POWERDOWN);
-		ctrl |= CNTRLREG_TSCSSENB;
+		ctrl |= CNTRLREG_SSENB;
 		regmap_write(tscadc->regmap, REG_CTRL, ctrl);
 	}
 	pm_runtime_put_sync(dev);
@@ -316,7 +316,7 @@ static int __maybe_unused tscadc_resume(struct device *dev)
 	regmap_write(tscadc->regmap, REG_CLKDIV, tscadc->clk_div);
 	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 	tscadc_idle_config(tscadc);
-	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_TSCSSENB);
+	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_SSENB);
 
 	return 0;
 }
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 51d987080cd3..860289ae8516 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -96,7 +96,7 @@
 #define CHARGEDLY_OPENDLY	CHARGEDLY_OPEN(0x400)
 
 /* Control register */
-#define CNTRLREG_TSCSSENB	BIT(0)
+#define CNTRLREG_SSENB		BIT(0)
 #define CNTRLREG_STEPID		BIT(1)
 #define CNTRLREG_STEPCONFIGWRT	BIT(2)
 #define CNTRLREG_POWERDOWN	BIT(4)
-- 
cgit v1.2.3


From 2f89c2619ce93bf2b0e6e721614fc33e8cf48f03 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:49 +0200
Subject: mfd: ti_am335x_tscadc: Add TSC prefix in certain macros

While the register list (and names) between ADC0 and ADC1 are pretty
close, the bits inside changed a little bit. To avoid any future
confusion, let's add the TSC prefix when some bits are in a register
that is common to both revisions of the ADC, but are specific to the
am33xx hardware.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-32-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 10 +++++-----
 include/linux/mfd/ti_am335x_tscadc.h | 14 +++++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index a34ca378e82c..ffd52e021c2c 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -222,13 +222,13 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	 * of the CTRL register but not the subsystem enable bit which must be
 	 * added manually when timely.
 	 */
-	tscadc->ctrl = CNTRLREG_STEPCONFIGWRT | CNTRLREG_STEPID;
+	tscadc->ctrl = CNTRLREG_TSC_STEPCONFIGWRT | CNTRLREG_STEPID;
 	if (tsc_wires > 0) {
-		tscadc->ctrl |= CNTRLREG_TSCENB;
+		tscadc->ctrl |= CNTRLREG_TSC_ENB;
 		if (tsc_wires == 5)
-			tscadc->ctrl |= CNTRLREG_5WIRE;
+			tscadc->ctrl |= CNTRLREG_TSC_5WIRE;
 		else
-			tscadc->ctrl |= CNTRLREG_4WIRE;
+			tscadc->ctrl |= CNTRLREG_TSC_4WIRE;
 	}
 	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 
@@ -328,7 +328,7 @@ static const struct ti_tscadc_data tscdata = {
 	.adc_feature_compatible = "ti,am3359-adc",
 	.secondary_feature_name = "TI-am335x-tsc",
 	.secondary_feature_compatible = "ti,am3359-tsc",
-	.target_clk_rate = ADC_CLK,
+	.target_clk_rate = TSC_ADC_CLK,
 };
 
 static const struct of_device_id ti_tscadc_dt_ids[] = {
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 860289ae8516..cc6de9258455 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -98,13 +98,13 @@
 /* Control register */
 #define CNTRLREG_SSENB		BIT(0)
 #define CNTRLREG_STEPID		BIT(1)
-#define CNTRLREG_STEPCONFIGWRT	BIT(2)
+#define CNTRLREG_TSC_STEPCONFIGWRT BIT(2)
 #define CNTRLREG_POWERDOWN	BIT(4)
-#define CNTRLREG_AFE_CTRL(val)	FIELD_PREP(GENMASK(6, 5), (val))
-#define CNTRLREG_4WIRE		CNTRLREG_AFE_CTRL(1)
-#define CNTRLREG_5WIRE		CNTRLREG_AFE_CTRL(2)
-#define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
-#define CNTRLREG_TSCENB		BIT(7)
+#define CNTRLREG_TSC_AFE_CTRL(val) FIELD_PREP(GENMASK(6, 5), (val))
+#define CNTRLREG_TSC_4WIRE	CNTRLREG_TSC_AFE_CTRL(1)
+#define CNTRLREG_TSC_5WIRE	CNTRLREG_TSC_AFE_CTRL(2)
+#define CNTRLREG_TSC_8WIRE	CNTRLREG_TSC_AFE_CTRL(3)
+#define CNTRLREG_TSC_ENB	BIT(7)
 
 /* FIFO READ Register */
 #define FIFOREAD_DATA_MASK	GENMASK(11, 0)
@@ -118,7 +118,7 @@
 #define SEQ_STATUS		BIT(5)
 #define CHARGE_STEP		0x11
 
-#define ADC_CLK			(3 * HZ_PER_MHZ)
+#define TSC_ADC_CLK		(3 * HZ_PER_MHZ)
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 #define FIFO1_THRESHOLD		19
-- 
cgit v1.2.3


From bf0f394c7b1e4d623ca2afdf59b3b4b95cc6f592 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:53 +0200
Subject: mfd: ti_am335x_tscadc: Introduce a helper to deal with the type of
 hardware

One way of knowing which hardware we are dealing with is to check the
compatible string. When this must be done at several places, it's best
and certainly more clear to use a helper for that.

Introduce ti_adc_with_touchscreen() to indicate if there is a touchscreen
controller available (meaning it's an am33xx-like ADC). This helper does
not indicate if it is actually used (that is the purpose of the use_tsc
boolean).

Introducing this helper helps making a difference in the code between
what is generic to both types of ADCs and what is specific to the am33xx
hardware before introducing support for the am437x hardware.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-36-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 36 +++++++++++++++++++++---------------
 include/linux/mfd/ti_am335x_tscadc.h |  6 ++++++
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index f7cfe2016bbc..4f76b5498077 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -105,8 +105,9 @@ static void tscadc_idle_config(struct ti_tscadc_dev *tscadc)
 {
 	unsigned int idleconfig;
 
-	idleconfig = STEPCONFIG_YNN | STEPCONFIG_INM_ADCREFM |
-			STEPCONFIG_INP_ADCREFM | STEPCONFIG_YPN;
+	idleconfig = STEPCONFIG_INM_ADCREFM | STEPCONFIG_INP_ADCREFM;
+	if (ti_adc_with_touchscreen(tscadc))
+		idleconfig |= STEPCONFIG_YNN | STEPCONFIG_YPN;
 
 	regmap_write(tscadc->regmap, REG_IDLECONFIG, idleconfig);
 }
@@ -140,12 +141,14 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 
 	tscadc->data = of_device_get_match_data(&pdev->dev);
 
-	node = of_get_child_by_name(pdev->dev.of_node, "tsc");
-	of_property_read_u32(node, "ti,wires", &tscmag_wires);
-	of_property_read_u32(node, "ti,coordiante-readouts", &readouts);
-	of_node_put(node);
-	if (tscmag_wires)
-		use_tsc = true;
+	if (ti_adc_with_touchscreen(tscadc)) {
+		node = of_get_child_by_name(pdev->dev.of_node, "tsc");
+		of_property_read_u32(node, "ti,wires", &tscmag_wires);
+		of_property_read_u32(node, "ti,coordiante-readouts", &readouts);
+		of_node_put(node);
+		if (tscmag_wires)
+			use_tsc = true;
+	}
 
 	node = of_get_child_by_name(pdev->dev.of_node, "adc");
 	of_property_for_each_u32(node, "ti,adc-channels", prop, cur, val) {
@@ -225,13 +228,16 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	 * of the CTRL register but not the subsystem enable bit which must be
 	 * added manually when timely.
 	 */
-	tscadc->ctrl = CNTRLREG_TSC_STEPCONFIGWRT | CNTRLREG_STEPID;
-	if (use_tsc) {
-		tscadc->ctrl |= CNTRLREG_TSC_ENB;
-		if (tscmag_wires == 5)
-			tscadc->ctrl |= CNTRLREG_TSC_5WIRE;
-		else
-			tscadc->ctrl |= CNTRLREG_TSC_4WIRE;
+	tscadc->ctrl = CNTRLREG_STEPID;
+	if (ti_adc_with_touchscreen(tscadc)) {
+		tscadc->ctrl |= CNTRLREG_TSC_STEPCONFIGWRT;
+		if (use_tsc) {
+			tscadc->ctrl |= CNTRLREG_TSC_ENB;
+			if (tscmag_wires == 5)
+				tscadc->ctrl |= CNTRLREG_TSC_5WIRE;
+			else
+				tscadc->ctrl |= CNTRLREG_TSC_4WIRE;
+		}
 	}
 	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index cc6de9258455..ee160b2036c1 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -177,6 +177,12 @@ static inline struct ti_tscadc_dev *ti_tscadc_dev_get(struct platform_device *p)
 	return *tscadc_dev;
 }
 
+static inline bool ti_adc_with_touchscreen(struct ti_tscadc_dev *tscadc)
+{
+	return of_device_is_compatible(tscadc->dev->of_node,
+				       "ti,am3359-tscadc");
+}
+
 void am335x_tsc_se_set_cache(struct ti_tscadc_dev *tsadc, u32 val);
 void am335x_tsc_se_set_once(struct ti_tscadc_dev *tsadc, u32 val);
 void am335x_tsc_se_clr(struct ti_tscadc_dev *tsadc, u32 val);
-- 
cgit v1.2.3


From 0a1233031c16d8575be6b864e6fd353b6fd758c4 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:14:54 +0200
Subject: mfd: ti_am335x_tscadc: Add ADC1/magnetic reader support

Introduce a new compatible that has another set of driver data,
targeting am437x SoCs with a magnetic reader instead of the
touchscreen and a more featureful set of registers.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-37-miquel.raynal@bootlin.com
---
 drivers/mfd/ti_am335x_tscadc.c       | 37 +++++++++++++++++++++++++++++-------
 include/linux/mfd/ti_am335x_tscadc.h |  6 ++++++
 2 files changed, 36 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 4f76b5498077..bfbc8288a71e 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -121,11 +121,11 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	struct mfd_cell *cell;
 	struct property *prop;
 	const __be32 *cur;
-	bool use_tsc = false;
+	bool use_tsc = false, use_mag = false;
 	u32 val;
 	int err;
 	int tscmag_wires = 0, adc_channels = 0, cell_idx = 0, total_channels;
-	int readouts = 0;
+	int readouts = 0, mag_tracks = 0;
 
 	/* Allocate memory for device */
 	tscadc = devm_kzalloc(&pdev->dev, sizeof(*tscadc), GFP_KERNEL);
@@ -148,6 +148,16 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 		of_node_put(node);
 		if (tscmag_wires)
 			use_tsc = true;
+	} else {
+		/*
+		 * When adding support for the magnetic stripe reader, here is
+		 * the place to look for the number of tracks used from device
+		 * tree. Let's default to 0 for now.
+		 */
+		mag_tracks = 0;
+		tscmag_wires = mag_tracks * 2;
+		if (tscmag_wires)
+			use_mag = true;
 	}
 
 	node = of_get_child_by_name(pdev->dev.of_node, "adc");
@@ -209,8 +219,9 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	 * The TSC_ADC_Subsystem has 2 clock domains: OCP_CLK and ADC_CLK.
 	 * ADCs produce a 12-bit sample every 15 ADC_CLK cycles.
 	 * am33xx ADCs expect to capture 200ksps.
-	 * We need the ADC clocks to run at 3MHz.
-	 * This frequency is valid since TSC_ADC_SS controller design
+	 * am47xx ADCs expect to capture 867ksps.
+	 * We need ADC clocks respectively running at 3MHz and 13MHz.
+	 * These frequencies are valid since TSC_ADC_SS controller design
 	 * assumes the OCP clock is at least 6x faster than the ADC clock.
 	 */
 	clk = devm_clk_get(&pdev->dev, NULL);
@@ -238,6 +249,9 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 			else
 				tscadc->ctrl |= CNTRLREG_TSC_4WIRE;
 		}
+	} else {
+		tscadc->ctrl |= CNTRLREG_MAG_PREAMP_PWRDOWN |
+				CNTRLREG_MAG_PREAMP_BYPASS;
 	}
 	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl);
 
@@ -246,8 +260,8 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	/* Enable the TSC module enable bit */
 	regmap_write(tscadc->regmap, REG_CTRL, tscadc->ctrl | CNTRLREG_SSENB);
 
-	/* TSC Cell */
-	if (tscmag_wires > 0) {
+	/* TSC or MAG Cell */
+	if (use_tsc || use_mag) {
 		cell = &tscadc->cells[cell_idx++];
 		cell->name = tscadc->data->secondary_feature_name;
 		cell->of_compatible = tscadc->data->secondary_feature_compatible;
@@ -340,8 +354,17 @@ static const struct ti_tscadc_data tscdata = {
 	.target_clk_rate = TSC_ADC_CLK,
 };
 
+static const struct ti_tscadc_data magdata = {
+	.adc_feature_name = "TI-am43xx-adc",
+	.adc_feature_compatible = "ti,am4372-adc",
+	.secondary_feature_name = "TI-am43xx-mag",
+	.secondary_feature_compatible = "ti,am4372-mag",
+	.target_clk_rate = MAG_ADC_CLK,
+};
+
 static const struct of_device_id ti_tscadc_dt_ids[] = {
 	{ .compatible = "ti,am3359-tscadc", .data = &tscdata },
+	{ .compatible = "ti,am4372-magadc", .data = &magdata },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, ti_tscadc_dt_ids);
@@ -359,6 +382,6 @@ static struct platform_driver ti_tscadc_driver = {
 
 module_platform_driver(ti_tscadc_driver);
 
-MODULE_DESCRIPTION("TI touchscreen / ADC MFD controller driver");
+MODULE_DESCRIPTION("TI touchscreen/magnetic stripe reader/ADC MFD controller driver");
 MODULE_AUTHOR("Rachna Patil <rachna@ti.com>");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index ee160b2036c1..5225e3fc194d 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -106,6 +106,11 @@
 #define CNTRLREG_TSC_8WIRE	CNTRLREG_TSC_AFE_CTRL(3)
 #define CNTRLREG_TSC_ENB	BIT(7)
 
+/*Control registers bitfields  for MAGADC IP */
+#define CNTRLREG_MAGADCENB      BIT(0)
+#define CNTRLREG_MAG_PREAMP_PWRDOWN BIT(5)
+#define CNTRLREG_MAG_PREAMP_BYPASS  BIT(6)
+
 /* FIFO READ Register */
 #define FIFOREAD_DATA_MASK	GENMASK(11, 0)
 #define FIFOREAD_CHNLID_MASK	GENMASK(19, 16)
@@ -119,6 +124,7 @@
 #define CHARGE_STEP		0x11
 
 #define TSC_ADC_CLK		(3 * HZ_PER_MHZ)
+#define MAG_ADC_CLK		(13 * HZ_PER_MHZ)
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 #define FIFO1_THRESHOLD		19
-- 
cgit v1.2.3


From 789e5ebcc61b72a71717b02b62e6c6f10fdbb722 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 15 Oct 2021 10:15:01 +0200
Subject: iio: adc: ti_am335x_adc: Add a unit to the timeout delay

The lack of unit in the macro name kind of tricked me when I was
troubleshooting an issue. Physical constants should always get a unit.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211015081506.933180-44-miquel.raynal@bootlin.com
---
 drivers/iio/adc/ti_am335x_adc.c      | 4 ++--
 include/linux/mfd/ti_am335x_tscadc.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 6f47a1ace3d4..e7dba10b29b4 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -101,7 +101,7 @@ static int tiadc_wait_idle(struct tiadc_device *adc_dev)
 
 	return readl_poll_timeout(adc_dev->mfd_tscadc->tscadc_base + REG_ADCFSM,
 				  val, !(val & SEQ_STATUS), 10,
-				  IDLE_TIMEOUT * 1000 * adc_dev->channels);
+				  IDLE_TIMEOUT_MS * 1000 * adc_dev->channels);
 }
 
 static void tiadc_step_config(struct iio_dev *indio_dev)
@@ -461,7 +461,7 @@ static int tiadc_read_raw(struct iio_dev *indio_dev,
 	am335x_tsc_se_set_once(adc_dev->mfd_tscadc, step_en);
 
 	/* Wait for Fifo threshold interrupt */
-	timeout = jiffies + msecs_to_jiffies(IDLE_TIMEOUT * adc_dev->channels);
+	timeout = jiffies + msecs_to_jiffies(IDLE_TIMEOUT_MS * adc_dev->channels);
 	while (1) {
 		fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
 		if (fifo1count)
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 5225e3fc194d..ba13e043d910 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -141,7 +141,7 @@
  *
  * max processing time: 266431 * 308ns = 83ms(approx)
  */
-#define IDLE_TIMEOUT		83 /* milliseconds */
+#define IDLE_TIMEOUT_MS		83 /* milliseconds */
 
 #define TSCADC_CELLS		2
 
-- 
cgit v1.2.3


From 9a48e7564ac83fb0f1d5b0eac5fe8a7af62da398 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 20 Oct 2021 13:00:39 -0700
Subject: compiler-gcc.h: Define __SANITIZE_ADDRESS__ under hwaddress sanitizer

When Clang is using the hwaddress sanitizer, it sets __SANITIZE_ADDRESS__
explicitly:

 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
 /* Emulate GCC's __SANITIZE_ADDRESS__ flag */
 #define __SANITIZE_ADDRESS__
 #endif

Once hwaddress sanitizer was added to GCC, however, a separate define
was created, __SANITIZE_HWADDRESS__. The kernel is expecting to find
__SANITIZE_ADDRESS__ in either case, though, and the existing string
macros break on supported architectures:

 #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
          !defined(__SANITIZE_ADDRESS__)

where as other architectures (like arm32) have no idea about hwaddress
sanitizer and just check for __SANITIZE_ADDRESS__:

 #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)

This would lead to compiler foritfy self-test warnings when building
with CONFIG_KASAN_SW_TAGS=y:

warning: unsafe memmove() usage lacked '__read_overflow2' symbol in lib/test_fortify/read_overflow2-memmove.c
warning: unsafe memcpy() usage lacked '__write_overflow' symbol in lib/test_fortify/write_overflow-memcpy.c
...

Sort this out by also defining __SANITIZE_ADDRESS__ in GCC under the
hwaddress sanitizer.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Will Deacon <will@kernel.org>
Cc: Arvind Sankar <nivedita@alum.mit.edu>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: llvm@lists.linux.dev
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20211020200039.170424-1-keescook@chromium.org
---
 include/linux/compiler-gcc.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 9957085b8148..7bbd8df02532 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -121,6 +121,14 @@
 #define __no_sanitize_coverage
 #endif
 
+/*
+ * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel,
+ * matching the defines used by Clang.
+ */
+#ifdef __SANITIZE_HWADDRESS__
+#define __SANITIZE_ADDRESS__
+#endif
+
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
-- 
cgit v1.2.3


From 3598b30bd970bbc09dba0cf31aa0a04105f37207 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 20 Oct 2021 21:08:06 +0200
Subject: cpufreq: Fix typo in cpufreq.h

s/internale/internal/

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ff88bb3e44fc..3317887027b5 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -385,7 +385,7 @@ struct cpufreq_driver {
 /* flags */
 
 /*
- * Set by drivers that need to update internale upper and lower boundaries along
+ * Set by drivers that need to update internal upper and lower boundaries along
  * with the target frequency and so the core and governors should also invoke
  * the diver if the target frequency does not change, but the policy min or max
  * may have changed.
-- 
cgit v1.2.3


From e279317e9aeb11d8670e0a5acb10d50566eea9c9 Mon Sep 17 00:00:00 2001
From: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Date: Fri, 15 Oct 2021 11:47:00 +0200
Subject: rpmsg: core: add API to get MTU

Return the rpmsg buffer MTU for sending message, so rpmsg users
can split a long message in several sub rpmsg buffers.

Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
Link: https://lore.kernel.org/r/20211015094701.5732-2-arnaud.pouliquen@foss.st.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/rpmsg/rpmsg_core.c       | 21 +++++++++++++++++++++
 drivers/rpmsg/rpmsg_internal.h   |  2 ++
 drivers/rpmsg/virtio_rpmsg_bus.c | 10 ++++++++++
 include/linux/rpmsg.h            | 10 ++++++++++
 4 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c
index 9151836190ce..d3eb60059ef1 100644
--- a/drivers/rpmsg/rpmsg_core.c
+++ b/drivers/rpmsg/rpmsg_core.c
@@ -327,6 +327,27 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
 }
 EXPORT_SYMBOL(rpmsg_trysend_offchannel);
 
+/**
+ * rpmsg_get_mtu() - get maximum transmission buffer size for sending message.
+ * @ept: the rpmsg endpoint
+ *
+ * This function returns maximum buffer size available for a single outgoing message.
+ *
+ * Return: the maximum transmission size on success and an appropriate error
+ * value on failure.
+ */
+
+ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept)
+{
+	if (WARN_ON(!ept))
+		return -EINVAL;
+	if (!ept->ops->get_mtu)
+		return -ENOTSUPP;
+
+	return ept->ops->get_mtu(ept);
+}
+EXPORT_SYMBOL(rpmsg_get_mtu);
+
 /*
  * match a rpmsg channel with a channel info struct.
  * this is used to make sure we're not creating rpmsg devices for channels
diff --git a/drivers/rpmsg/rpmsg_internal.h b/drivers/rpmsg/rpmsg_internal.h
index a76c344253bf..b1245d3ed7c6 100644
--- a/drivers/rpmsg/rpmsg_internal.h
+++ b/drivers/rpmsg/rpmsg_internal.h
@@ -53,6 +53,7 @@ struct rpmsg_device_ops {
  * @trysendto:		see @rpmsg_trysendto(), optional
  * @trysend_offchannel:	see @rpmsg_trysend_offchannel(), optional
  * @poll:		see @rpmsg_poll(), optional
+ * @get_mtu:		see @rpmsg_get_mtu(), optional
  *
  * Indirection table for the operations that a rpmsg backend should implement.
  * In addition to @destroy_ept, the backend must at least implement @send and
@@ -72,6 +73,7 @@ struct rpmsg_endpoint_ops {
 			     void *data, int len);
 	__poll_t (*poll)(struct rpmsg_endpoint *ept, struct file *filp,
 			     poll_table *wait);
+	ssize_t (*get_mtu)(struct rpmsg_endpoint *ept);
 };
 
 struct device *rpmsg_find_device(struct device *parent,
diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c
index 8e49a3bacfc7..05fd06fc67e9 100644
--- a/drivers/rpmsg/virtio_rpmsg_bus.c
+++ b/drivers/rpmsg/virtio_rpmsg_bus.c
@@ -149,6 +149,7 @@ static int virtio_rpmsg_trysendto(struct rpmsg_endpoint *ept, void *data,
 				  int len, u32 dst);
 static int virtio_rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
 					   u32 dst, void *data, int len);
+static ssize_t virtio_rpmsg_get_mtu(struct rpmsg_endpoint *ept);
 static struct rpmsg_device *__rpmsg_create_channel(struct virtproc_info *vrp,
 						   struct rpmsg_channel_info *chinfo);
 
@@ -160,6 +161,7 @@ static const struct rpmsg_endpoint_ops virtio_endpoint_ops = {
 	.trysend = virtio_rpmsg_trysend,
 	.trysendto = virtio_rpmsg_trysendto,
 	.trysend_offchannel = virtio_rpmsg_trysend_offchannel,
+	.get_mtu = virtio_rpmsg_get_mtu,
 };
 
 /**
@@ -696,6 +698,14 @@ static int virtio_rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src,
 	return rpmsg_send_offchannel_raw(rpdev, src, dst, data, len, false);
 }
 
+static ssize_t virtio_rpmsg_get_mtu(struct rpmsg_endpoint *ept)
+{
+	struct rpmsg_device *rpdev = ept->rpdev;
+	struct virtio_rpmsg_channel *vch = to_virtio_rpmsg_channel(rpdev);
+
+	return vch->vrp->buf_size - sizeof(struct rpmsg_hdr);
+}
+
 static int rpmsg_recv_single(struct virtproc_info *vrp, struct device *dev,
 			     struct rpmsg_hdr *msg, unsigned int len)
 {
diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h
index d97dcd049f18..990b80fb49ad 100644
--- a/include/linux/rpmsg.h
+++ b/include/linux/rpmsg.h
@@ -186,6 +186,8 @@ int rpmsg_trysend_offchannel(struct rpmsg_endpoint *ept, u32 src, u32 dst,
 __poll_t rpmsg_poll(struct rpmsg_endpoint *ept, struct file *filp,
 			poll_table *wait);
 
+ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept);
+
 #else
 
 static inline int rpmsg_register_device(struct rpmsg_device *rpdev)
@@ -296,6 +298,14 @@ static inline __poll_t rpmsg_poll(struct rpmsg_endpoint *ept,
 	return 0;
 }
 
+static inline ssize_t rpmsg_get_mtu(struct rpmsg_endpoint *ept)
+{
+	/* This shouldn't be possible */
+	WARN_ON(1);
+
+	return -ENXIO;
+}
+
 #endif /* IS_ENABLED(CONFIG_RPMSG) */
 
 /* use a macro to avoid include chaining to get THIS_MODULE */
-- 
cgit v1.2.3


From 8ac33b8b6841e99a624ace543d92cbf598a91381 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <vilhelm.gray@gmail.com>
Date: Thu, 21 Oct 2021 19:35:40 +0900
Subject: counter: Fix use-after-free race condition for events_queue_size
 write

A race condition is possible when writing to events_queue_size where the
events kfifo is freed during the execution of a kfifo_in(), resulting in
a use-after-free. This patch prevents such a scenario by protecting the
events queue in operation with a spinlock and locking before performing
the events queue size adjustment.

The existing events_lock mutex is renamed to events_out_lock to reflect
that it only protects events queue out operations. Because the events
queue in operations can occur in an interrupt context, a new
events_in_lock spinlock is introduced and utilized.

Fixes: feff17a550c7 ("counter: Implement events_queue_size sysfs attribute")
Cc: David Lechner <david@lechnology.com>
Signed-off-by: William Breathitt Gray <vilhelm.gray@gmail.com>
Link: https://lore.kernel.org/r/20211021103540.955639-1-vilhelm.gray@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/counter/counter-chrdev.c | 10 ++++++----
 drivers/counter/counter-sysfs.c  |  7 +++++++
 include/linux/counter.h          |  6 ++++--
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/counter/counter-chrdev.c b/drivers/counter/counter-chrdev.c
index 0c82613582f1..b7c62f957a6a 100644
--- a/drivers/counter/counter-chrdev.c
+++ b/drivers/counter/counter-chrdev.c
@@ -81,10 +81,10 @@ static ssize_t counter_chrdev_read(struct file *filp, char __user *buf,
 				return -ENODEV;
 		}
 
-		if (mutex_lock_interruptible(&counter->events_lock))
+		if (mutex_lock_interruptible(&counter->events_out_lock))
 			return -ERESTARTSYS;
 		err = kfifo_to_user(&counter->events, buf, len, &copied);
-		mutex_unlock(&counter->events_lock);
+		mutex_unlock(&counter->events_out_lock);
 		if (err < 0)
 			return err;
 	} while (!copied);
@@ -436,7 +436,8 @@ int counter_chrdev_add(struct counter_device *const counter)
 	spin_lock_init(&counter->events_list_lock);
 	mutex_init(&counter->n_events_list_lock);
 	init_waitqueue_head(&counter->events_wait);
-	mutex_init(&counter->events_lock);
+	spin_lock_init(&counter->events_in_lock);
+	mutex_init(&counter->events_out_lock);
 
 	/* Initialize character device */
 	cdev_init(&counter->chrdev, &counter_fops);
@@ -559,7 +560,8 @@ void counter_push_event(struct counter_device *const counter, const u8 event,
 		ev.watch.component = comp_node->component;
 		ev.status = -counter_get_data(counter, comp_node, &ev.value);
 
-		copied += kfifo_in(&counter->events, &ev, 1);
+		copied += kfifo_in_spinlocked_noirqsave(&counter->events, &ev,
+							1, &counter->events_in_lock);
 	}
 
 exit_early:
diff --git a/drivers/counter/counter-sysfs.c b/drivers/counter/counter-sysfs.c
index 67a988851657..7cc4d1d523ea 100644
--- a/drivers/counter/counter-sysfs.c
+++ b/drivers/counter/counter-sysfs.c
@@ -11,6 +11,8 @@
 #include <linux/kfifo.h>
 #include <linux/kstrtox.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
 #include <linux/types.h>
@@ -796,6 +798,7 @@ static int counter_events_queue_size_write(struct counter_device *counter,
 {
 	DECLARE_KFIFO_PTR(events, struct counter_event);
 	int err;
+	unsigned long flags;
 
 	/* Allocate new events queue */
 	err = kfifo_alloc(&events, val, GFP_KERNEL);
@@ -803,8 +806,12 @@ static int counter_events_queue_size_write(struct counter_device *counter,
 		return err;
 
 	/* Swap in new events queue */
+	mutex_lock(&counter->events_out_lock);
+	spin_lock_irqsave(&counter->events_in_lock, flags);
 	kfifo_free(&counter->events);
 	counter->events.kfifo = events.kfifo;
+	spin_unlock_irqrestore(&counter->events_in_lock, flags);
+	mutex_unlock(&counter->events_out_lock);
 
 	return 0;
 }
diff --git a/include/linux/counter.h b/include/linux/counter.h
index 0fd99e255a50..b7d0a00a61cf 100644
--- a/include/linux/counter.h
+++ b/include/linux/counter.h
@@ -296,7 +296,8 @@ struct counter_ops {
  * @n_events_list_lock:	lock to protect Counter next events list operations
  * @events:		queue of detected Counter events
  * @events_wait:	wait queue to allow blocking reads of Counter events
- * @events_lock:	lock to protect Counter events queue read operations
+ * @events_in_lock:	lock to protect Counter events queue in operations
+ * @events_out_lock:	lock to protect Counter events queue out operations
  * @ops_exist_lock:	lock to prevent use during removal
  */
 struct counter_device {
@@ -323,7 +324,8 @@ struct counter_device {
 	struct mutex n_events_list_lock;
 	DECLARE_KFIFO_PTR(events, struct counter_event);
 	wait_queue_head_t events_wait;
-	struct mutex events_lock;
+	spinlock_t events_in_lock;
+	struct mutex events_out_lock;
 	struct mutex ops_exist_lock;
 };
 
-- 
cgit v1.2.3


From 992e5cc7be8e693aba1f0ee7905d34c87fd7872a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 20 Oct 2021 20:49:55 +0300
Subject: net: dsa: tag_8021q: make dsa_8021q_{rx,tx}_vid take dp as argument

Pass a single argument to dsa_8021q_rx_vid and dsa_8021q_tx_vid that
contains the necessary information from the two arguments that are
currently provided: the switch and the port number.

Also rename those functions so that they have a dsa_port_* prefix, since
they operate on a struct dsa_port *.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_vl.c |  3 ++-
 include/linux/dsa/8021q.h            |  5 +++--
 net/dsa/tag_8021q.c                  | 32 ++++++++++++++++----------------
 net/dsa/tag_ocelot_8021q.c           |  2 +-
 net/dsa/tag_sja1105.c                |  4 ++--
 5 files changed, 24 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index d55572994e1f..f5dca6a9b0f9 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -394,7 +394,8 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 				vl_lookup[k].vlanid = rule->key.vl.vid;
 				vl_lookup[k].vlanprior = rule->key.vl.pcp;
 			} else {
-				u16 vid = dsa_8021q_rx_vid(priv->ds, port);
+				struct dsa_port *dp = dsa_to_port(priv->ds, port);
+				u16 vid = dsa_tag_8021q_rx_vid(dp);
 
 				vl_lookup[k].vlanid = vid;
 				vl_lookup[k].vlanprior = 0;
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index c7fa4a3498fe..254b165f2b44 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 
 struct dsa_switch;
+struct dsa_port;
 struct sk_buff;
 struct net_device;
 
@@ -45,9 +46,9 @@ void dsa_tag_8021q_bridge_tx_fwd_unoffload(struct dsa_switch *ds, int port,
 
 u16 dsa_8021q_bridge_tx_fwd_offload_vid(int bridge_num);
 
-u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
+u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp);
 
-u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
+u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp);
 
 int dsa_8021q_rx_switch_id(u16 vid);
 
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 8f4e0af2f74f..72cac2c0af7b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -77,22 +77,22 @@ EXPORT_SYMBOL_GPL(dsa_8021q_bridge_tx_fwd_offload_vid);
 /* Returns the VID to be inserted into the frame from xmit for switch steering
  * instructions on egress. Encodes switch ID and port ID.
  */
-u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port)
+u16 dsa_tag_8021q_tx_vid(const struct dsa_port *dp)
 {
-	return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(ds->index) |
-	       DSA_8021Q_PORT(port);
+	return DSA_8021Q_DIR_TX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+	       DSA_8021Q_PORT(dp->index);
 }
-EXPORT_SYMBOL_GPL(dsa_8021q_tx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_tx_vid);
 
 /* Returns the VID that will be installed as pvid for this switch port, sent as
  * tagged egress towards the CPU port and decoded by the rcv function.
  */
-u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
+u16 dsa_tag_8021q_rx_vid(const struct dsa_port *dp)
 {
-	return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
-	       DSA_8021Q_PORT(port);
+	return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(dp->ds->index) |
+	       DSA_8021Q_PORT(dp->index);
 }
-EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
+EXPORT_SYMBOL_GPL(dsa_tag_8021q_rx_vid);
 
 /* Returns the decoded switch ID from the RX VID. */
 int dsa_8021q_rx_switch_id(u16 vid)
@@ -354,10 +354,10 @@ int dsa_tag_8021q_bridge_join(struct dsa_switch *ds,
 
 	targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
 	targeted_dp = dsa_to_port(targeted_ds, info->port);
-	targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+	targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
 
 	dsa_switch_for_each_port(dp, ds) {
-		u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index);
+		u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
 
 		if (!dsa_port_tag_8021q_bridge_match(dp, info))
 			continue;
@@ -389,10 +389,10 @@ int dsa_tag_8021q_bridge_leave(struct dsa_switch *ds,
 
 	targeted_ds = dsa_switch_find(info->tree_index, info->sw_index);
 	targeted_dp = dsa_to_port(targeted_ds, info->port);
-	targeted_rx_vid = dsa_8021q_rx_vid(targeted_ds, info->port);
+	targeted_rx_vid = dsa_tag_8021q_rx_vid(targeted_dp);
 
 	dsa_switch_for_each_port(dp, ds) {
-		u16 rx_vid = dsa_8021q_rx_vid(ds, dp->index);
+		u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
 
 		if (!dsa_port_tag_8021q_bridge_match(dp, info))
 			continue;
@@ -433,8 +433,8 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port)
 {
 	struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
 	struct dsa_port *dp = dsa_to_port(ds, port);
-	u16 rx_vid = dsa_8021q_rx_vid(ds, port);
-	u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+	u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
+	u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
 	struct net_device *master;
 	int err;
 
@@ -478,8 +478,8 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port)
 {
 	struct dsa_8021q_context *ctx = ds->tag_8021q_ctx;
 	struct dsa_port *dp = dsa_to_port(ds, port);
-	u16 rx_vid = dsa_8021q_rx_vid(ds, port);
-	u16 tx_vid = dsa_8021q_tx_vid(ds, port);
+	u16 rx_vid = dsa_tag_8021q_rx_vid(dp);
+	u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
 	struct net_device *master;
 
 	/* The CPU port is implicitly configured by
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3412051981d7..a1919ea5e828 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -39,9 +39,9 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
 				   struct net_device *netdev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+	u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
 	struct ethhdr *hdr = eth_hdr(skb);
 
 	if (ocelot_ptp_rew_op(skb) || is_link_local_ether_addr(hdr->h_dest))
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 8b2d458f72b3..262c8833a910 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -235,9 +235,9 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
 				    struct net_device *netdev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+	u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
 
 	if (skb->offload_fwd_mark)
 		return sja1105_imprecise_xmit(skb, netdev);
@@ -263,9 +263,9 @@ static struct sk_buff *sja1110_xmit(struct sk_buff *skb,
 {
 	struct sk_buff *clone = SJA1105_SKB_CB(skb)->clone;
 	struct dsa_port *dp = dsa_slave_to_port(netdev);
-	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 	u16 queue_mapping = skb_get_queue_mapping(skb);
 	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
+	u16 tx_vid = dsa_tag_8021q_tx_vid(dp);
 	__be32 *tx_trailer;
 	__be16 *tx_header;
 	int trailer_pos;
-- 
cgit v1.2.3


From 061514dbfb79910ef60eb40dd9fc528be3f45d62 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Mon, 18 Oct 2021 17:43:35 -0700
Subject: regulator: lp872x: Remove lp872x_dvs_state

After this driver was converted to gpiod, clang started warning:

vers/regulator/lp872x.c:689:57: error: implicit conversion from
enumeration type 'enum lp872x_dvs_state' to different enumeration type
'enum gpiod_flags' [-Werror,-Wenum-conversion]
        dvs->gpio = devm_gpiod_get_optional(lp->dev, "ti,dvs", pinstate);
                    ~~~~~~~~~~~~~~~~~~~~~~~                    ^~~~~~~~
1 error generated.

lp872x_dvs_state was updated to have values from gpiod_flags but this is
not enough to avoid an implicit conversion warning from either GCC or
clang (although GCC enables this warning under -Wextra instead of -Wall
like clang so it is not seen under normal builds).

Eliminate lp872x_dvs_state in favor of using gpiod_flags everywhere so
that there is no more warning about an implicit conversion.

Fixes: 72bf80cf09c4 ("regulator: lp872x: replacing legacy gpio interface for gpiod")
Link: https://github.com/ClangBuiltLinux/linux/issues/1481
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20211019004335.193492-1-nathan@kernel.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/lp872x.c       | 14 +++++++-------
 include/linux/regulator/lp872x.h |  7 +------
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/lp872x.c b/drivers/regulator/lp872x.c
index 1dba5dbcd461..35d826fe9def 100644
--- a/drivers/regulator/lp872x.c
+++ b/drivers/regulator/lp872x.c
@@ -103,7 +103,7 @@ struct lp872x {
 	enum lp872x_id chipid;
 	struct lp872x_platform_data *pdata;
 	int num_regulators;
-	enum lp872x_dvs_state dvs_pin;
+	enum gpiod_flags dvs_pin;
 };
 
 /* LP8720/LP8725 shared voltage table for LDOs */
@@ -251,9 +251,9 @@ static int lp872x_regulator_enable_time(struct regulator_dev *rdev)
 static void lp872x_set_dvs(struct lp872x *lp, enum lp872x_dvs_sel dvs_sel,
 			struct gpio_desc *gpio)
 {
-	enum lp872x_dvs_state state;
+	enum gpiod_flags state;
 
-	state = dvs_sel == SEL_V1 ? DVS_HIGH : DVS_LOW;
+	state = dvs_sel == SEL_V1 ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
 	gpiod_set_value(gpio, state);
 	lp->dvs_pin = state;
 }
@@ -269,7 +269,7 @@ static u8 lp872x_select_buck_vout_addr(struct lp872x *lp,
 	switch (buck) {
 	case LP8720_ID_BUCK:
 		if (val & LP8720_EXT_DVS_M) {
-			addr = (lp->dvs_pin == DVS_HIGH) ?
+			addr = (lp->dvs_pin == GPIOD_OUT_HIGH) ?
 				LP8720_BUCK_VOUT1 : LP8720_BUCK_VOUT2;
 		} else {
 			if (lp872x_read_byte(lp, LP8720_ENABLE, &val))
@@ -283,7 +283,7 @@ static u8 lp872x_select_buck_vout_addr(struct lp872x *lp,
 		if (val & LP8725_DVS1_M)
 			addr = LP8725_BUCK1_VOUT1;
 		else
-			addr = (lp->dvs_pin == DVS_HIGH) ?
+			addr = (lp->dvs_pin == GPIOD_OUT_HIGH) ?
 				LP8725_BUCK1_VOUT1 : LP8725_BUCK1_VOUT2;
 		break;
 	case LP8725_ID_BUCK2:
@@ -675,7 +675,7 @@ static const struct regulator_desc lp8725_regulator_desc[] = {
 static int lp872x_init_dvs(struct lp872x *lp)
 {
 	struct lp872x_dvs *dvs = lp->pdata ? lp->pdata->dvs : NULL;
-	enum lp872x_dvs_state pinstate;
+	enum gpiod_flags pinstate;
 	u8 mask[] = { LP8720_EXT_DVS_M, LP8725_DVS1_M | LP8725_DVS2_M };
 	u8 default_dvs_mode[] = { LP8720_DEFAULT_DVS, LP8725_DEFAULT_DVS };
 
@@ -841,7 +841,7 @@ static struct lp872x_platform_data
 
 	of_property_read_u8(np, "ti,dvs-vsel", (u8 *)&pdata->dvs->vsel);
 	of_property_read_u8(np, "ti,dvs-state", &dvs_state);
-	pdata->dvs->init_state = dvs_state ? DVS_HIGH : DVS_LOW;
+	pdata->dvs->init_state = dvs_state ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
 
 	if (of_get_child_count(np) == 0)
 		goto out;
diff --git a/include/linux/regulator/lp872x.h b/include/linux/regulator/lp872x.h
index 8e7e0343c6e1..b62e45aa1dd3 100644
--- a/include/linux/regulator/lp872x.h
+++ b/include/linux/regulator/lp872x.h
@@ -40,11 +40,6 @@ enum lp872x_regulator_id {
 	LP872X_ID_MAX,
 };
 
-enum lp872x_dvs_state {
-	DVS_LOW  = GPIOD_OUT_LOW,
-	DVS_HIGH = GPIOD_OUT_HIGH,
-};
-
 enum lp872x_dvs_sel {
 	SEL_V1,
 	SEL_V2,
@@ -59,7 +54,7 @@ enum lp872x_dvs_sel {
 struct lp872x_dvs {
 	struct gpio_desc *gpio;
 	enum lp872x_dvs_sel vsel;
-	enum lp872x_dvs_state init_state;
+	enum gpiod_flags init_state;
 };
 
 /**
-- 
cgit v1.2.3


From 6a8b5bb0f1350fc4cf398435a1119db12b0bd50e Mon Sep 17 00:00:00 2001
From: Maíra Canal <maira.canal@usp.br>
Date: Sun, 17 Oct 2021 15:06:39 -0300
Subject: regulator: tps62360: replacing legacy gpio interface for gpiod
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Removing all linux/gpio.h and linux/of_gpio.h dependencies and replacing
them with the gpiod interface.

Signed-off-by: Maíra Canal <maira.canal@usp.br>
Link: https://lore.kernel.org/r/YWxmL2baF5AdzyHv@fedora
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/tps62360-regulator.c | 59 +++++++++++++++-------------------
 include/linux/regulator/tps62360.h     |  6 ----
 2 files changed, 26 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/tps62360-regulator.c b/drivers/regulator/tps62360-regulator.c
index 315cd5daf480..574958690ace 100644
--- a/drivers/regulator/tps62360-regulator.c
+++ b/drivers/regulator/tps62360-regulator.c
@@ -28,13 +28,12 @@
 #include <linux/err.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/tps62360.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/regmap.h>
@@ -65,8 +64,8 @@ struct tps62360_chip {
 	struct regulator_desc desc;
 	struct regulator_dev *rdev;
 	struct regmap *regmap;
-	int vsel0_gpio;
-	int vsel1_gpio;
+	struct gpio_desc *vsel0_gpio;
+	struct gpio_desc *vsel1_gpio;
 	u8 voltage_reg_mask;
 	bool en_internal_pulldn;
 	bool en_discharge;
@@ -165,8 +164,8 @@ static int tps62360_dcdc_set_voltage_sel(struct regulator_dev *dev,
 
 	/* Select proper VSET register vio gpios */
 	if (tps->valid_gpios) {
-		gpio_set_value_cansleep(tps->vsel0_gpio, new_vset_id & 0x1);
-		gpio_set_value_cansleep(tps->vsel1_gpio,
+		gpiod_set_value_cansleep(tps->vsel0_gpio, new_vset_id & 0x1);
+		gpiod_set_value_cansleep(tps->vsel1_gpio,
 					(new_vset_id >> 1) & 0x1);
 	}
 	return 0;
@@ -310,9 +309,6 @@ static struct tps62360_regulator_platform_data *
 		return NULL;
 	}
 
-	pdata->vsel0_gpio = of_get_named_gpio(np, "vsel0-gpio", 0);
-	pdata->vsel1_gpio = of_get_named_gpio(np, "vsel1-gpio", 0);
-
 	if (of_find_property(np, "ti,vsel0-state-high", NULL))
 		pdata->vsel0_def_state = 1;
 
@@ -349,6 +345,7 @@ static int tps62360_probe(struct i2c_client *client,
 	int ret;
 	int i;
 	int chip_id;
+	int gpio_flags;
 
 	pdata = dev_get_platdata(&client->dev);
 
@@ -390,8 +387,6 @@ static int tps62360_probe(struct i2c_client *client,
 
 	tps->en_discharge = pdata->en_discharge;
 	tps->en_internal_pulldn = pdata->en_internal_pulldn;
-	tps->vsel0_gpio = pdata->vsel0_gpio;
-	tps->vsel1_gpio = pdata->vsel1_gpio;
 	tps->dev = &client->dev;
 
 	switch (chip_id) {
@@ -426,29 +421,27 @@ static int tps62360_probe(struct i2c_client *client,
 	tps->lru_index[0] = tps->curr_vset_id;
 	tps->valid_gpios = false;
 
-	if (gpio_is_valid(tps->vsel0_gpio) && gpio_is_valid(tps->vsel1_gpio)) {
-		int gpio_flags;
-		gpio_flags = (pdata->vsel0_def_state) ?
-				GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
-		ret = devm_gpio_request_one(&client->dev, tps->vsel0_gpio,
-				gpio_flags, "tps62360-vsel0");
-		if (ret) {
-			dev_err(&client->dev,
-				"%s(): Could not obtain vsel0 GPIO %d: %d\n",
-				__func__, tps->vsel0_gpio, ret);
-			return ret;
-		}
+	gpio_flags = (pdata->vsel0_def_state) ?
+			GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+	tps->vsel0_gpio = devm_gpiod_get_optional(&client->dev, "vsel0", gpio_flags);
+	if (IS_ERR(tps->vsel0_gpio)) {
+		dev_err(&client->dev,
+			"%s(): Could not obtain vsel0 GPIO: %ld\n",
+			__func__, PTR_ERR(tps->vsel0_gpio));
+		return PTR_ERR(tps->vsel0_gpio);
+	}
 
-		gpio_flags = (pdata->vsel1_def_state) ?
-				GPIOF_OUT_INIT_HIGH : GPIOF_OUT_INIT_LOW;
-		ret = devm_gpio_request_one(&client->dev, tps->vsel1_gpio,
-				gpio_flags, "tps62360-vsel1");
-		if (ret) {
-			dev_err(&client->dev,
-				"%s(): Could not obtain vsel1 GPIO %d: %d\n",
-				__func__, tps->vsel1_gpio, ret);
-			return ret;
-		}
+	gpio_flags = (pdata->vsel1_def_state) ?
+			GPIOD_OUT_HIGH : GPIOD_OUT_LOW;
+	tps->vsel1_gpio = devm_gpiod_get_optional(&client->dev, "vsel1", gpio_flags);
+	if (IS_ERR(tps->vsel1_gpio)) {
+		dev_err(&client->dev,
+			"%s(): Could not obtain vsel1 GPIO: %ld\n",
+			__func__, PTR_ERR(tps->vsel1_gpio));
+		return PTR_ERR(tps->vsel1_gpio);
+	}
+
+	if (tps->vsel0_gpio != NULL && tps->vsel1_gpio != NULL) {
 		tps->valid_gpios = true;
 
 		/*
diff --git a/include/linux/regulator/tps62360.h b/include/linux/regulator/tps62360.h
index 94a90c06f1e5..398e74a1d941 100644
--- a/include/linux/regulator/tps62360.h
+++ b/include/linux/regulator/tps62360.h
@@ -19,10 +19,6 @@
  * @en_discharge: Enable discharge the output capacitor via internal
  *                register.
  * @en_internal_pulldn: internal pull down enable or not.
- * @vsel0_gpio: Gpio number for vsel0. It should be -1 if this is tied with
- *              fixed logic.
- * @vsel1_gpio: Gpio number for vsel1. It should be -1 if this is tied with
- *              fixed logic.
  * @vsel0_def_state: Default state of vsel0. 1 if it is high else 0.
  * @vsel1_def_state: Default state of vsel1. 1 if it is high else 0.
  */
@@ -30,8 +26,6 @@ struct tps62360_regulator_platform_data {
 	struct regulator_init_data *reg_init_data;
 	bool en_discharge;
 	bool en_internal_pulldn;
-	int vsel0_gpio;
-	int vsel1_gpio;
 	int vsel0_def_state;
 	int vsel1_def_state;
 };
-- 
cgit v1.2.3


From 4570ddda43387e5a130dd85e71a1947b0c11da77 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 12 Mar 2021 14:04:07 +0100
Subject: powercap/drivers/dtpm: Encapsulate even more the code

In order to increase the self-encapsulation of the dtpm generic code,
the following changes are adding a power update ops to the dtpm
ops. That allows the generic code to call directly the dtpm backend
function to update the power values.

The power update function does compute the power characteristics when
the function is invoked. In the case of the CPUs, the power
consumption depends on the number of online CPUs. The online CPUs mask
is not up to date at CPUHP_AP_ONLINE_DYN state in the tear down
callback. That is the reason why the online / offline are at separate
state. As there is already an existing state for DTPM, this one is
only moved to the DEAD state, so there is no addition of new state
with these changes. The dtpm node is not removed when the cpu is
unplugged.

That simplifies the code for the next changes and results in a more
self-encapsulated code.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://lore.kernel.org/r/20210312130411.29833-1-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c     |  54 ++++++++--------
 drivers/powercap/dtpm_cpu.c | 148 ++++++++++++++++++++------------------------
 include/linux/cpuhotplug.h  |   2 +-
 include/linux/dtpm.h        |   3 +-
 4 files changed, 97 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index c2185ec5f887..58433b8ef9a1 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
 		parent->power_limit -= dtpm->power_limit;
 		parent = parent->parent;
 	}
-
-	__dtpm_rebalance_weight(root);
 }
 
 static void __dtpm_add_power(struct dtpm *dtpm)
@@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
 		parent->power_limit += dtpm->power_limit;
 		parent = parent->parent;
 	}
+}
+
+static int __dtpm_update_power(struct dtpm *dtpm)
+{
+	int ret;
+
+	__dtpm_sub_power(dtpm);
 
-	__dtpm_rebalance_weight(root);
+	ret = dtpm->ops->update_power_uw(dtpm);
+	if (ret)
+		pr_err("Failed to update power for '%s': %d\n",
+		       dtpm->zone.name, ret);
+
+	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
+		dtpm->power_limit = dtpm->power_max;
+
+	__dtpm_add_power(dtpm);
+
+	if (root)
+		__dtpm_rebalance_weight(root);
+
+	return ret;
 }
 
 /**
  * dtpm_update_power - Update the power on the dtpm
  * @dtpm: a pointer to a dtpm structure to update
- * @power_min: a u64 representing the new power_min value
- * @power_max: a u64 representing the new power_max value
  *
  * Function to update the power values of the dtpm node specified in
  * parameter. These new values will be propagated to the tree.
  *
  * Return: zero on success, -EINVAL if the values are inconsistent
  */
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max)
+int dtpm_update_power(struct dtpm *dtpm)
 {
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&dtpm_lock);
-
-	if (power_min == dtpm->power_min && power_max == dtpm->power_max)
-		goto unlock;
-
-	if (power_max < power_min) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	__dtpm_sub_power(dtpm);
-
-	dtpm->power_min = power_min;
-	dtpm->power_max = power_max;
-	if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
-		dtpm->power_limit = power_max;
-
-	__dtpm_add_power(dtpm);
-
-unlock:
+	ret = __dtpm_update_power(dtpm);
 	mutex_unlock(&dtpm_lock);
 
 	return ret;
@@ -436,6 +434,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 
 	if (dtpm->ops && !(dtpm->ops->set_power_uw &&
 			   dtpm->ops->get_power_uw &&
+			   dtpm->ops->update_power_uw &&
 			   dtpm->ops->release))
 		return -EINVAL;
 
@@ -455,7 +454,8 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 		root = dtpm;
 	}
 
-	__dtpm_add_power(dtpm);
+	if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm))
+		__dtpm_add_power(dtpm);
 
 	pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
 		dtpm->zone.name, dtpm->power_min, dtpm->power_max);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 51c366938acd..f6076de39540 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -14,6 +14,8 @@
  * The CPU hotplug is supported and the power numbers will be updated
  * if a CPU is hot plugged / unplugged.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/cpumask.h>
 #include <linux/cpufreq.h>
 #include <linux/cpuhotplug.h>
@@ -23,8 +25,6 @@
 #include <linux/slab.h>
 #include <linux/units.h>
 
-static struct dtpm *__parent;
-
 static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
 
 struct dtpm_cpu {
@@ -32,57 +32,16 @@ struct dtpm_cpu {
 	int cpu;
 };
 
-/*
- * When a new CPU is inserted at hotplug or boot time, add the power
- * contribution and update the dtpm tree.
- */
-static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
-{
-	u64 power_min, power_max;
-
-	power_min = em->table[0].power;
-	power_min *= MICROWATT_PER_MILLIWATT;
-	power_min += dtpm->power_min;
-
-	power_max = em->table[em->nr_perf_states - 1].power;
-	power_max *= MICROWATT_PER_MILLIWATT;
-	power_max += dtpm->power_max;
-
-	return dtpm_update_power(dtpm, power_min, power_max);
-}
-
-/*
- * When a CPU is unplugged, remove its power contribution from the
- * dtpm tree.
- */
-static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
-{
-	u64 power_min, power_max;
-
-	power_min = em->table[0].power;
-	power_min *= MICROWATT_PER_MILLIWATT;
-	power_min = dtpm->power_min - power_min;
-
-	power_max = em->table[em->nr_perf_states - 1].power;
-	power_max *= MICROWATT_PER_MILLIWATT;
-	power_max = dtpm->power_max - power_max;
-
-	return dtpm_update_power(dtpm, power_min, power_max);
-}
-
 static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 {
 	struct dtpm_cpu *dtpm_cpu = dtpm->private;
-	struct em_perf_domain *pd;
+	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	unsigned long freq;
 	u64 power;
 	int i, nr_cpus;
 
-	pd = em_cpu_get(dtpm_cpu->cpu);
-
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
-
 	nr_cpus = cpumask_weight(&cpus);
 
 	for (i = 0; i < pd->nr_perf_states; i++) {
@@ -113,6 +72,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 
 	pd = em_cpu_get(dtpm_cpu->cpu);
 	freq = cpufreq_quick_get(dtpm_cpu->cpu);
+
 	cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
 	nr_cpus = cpumask_weight(&cpus);
 
@@ -128,6 +88,27 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 	return 0;
 }
 
+static int update_pd_power_uw(struct dtpm *dtpm)
+{
+	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
+	struct cpumask cpus;
+	int nr_cpus;
+
+	cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
+	nr_cpus = cpumask_weight(&cpus);
+
+	dtpm->power_min = em->table[0].power;
+	dtpm->power_min *= MICROWATT_PER_MILLIWATT;
+	dtpm->power_min *= nr_cpus;
+
+	dtpm->power_max = em->table[em->nr_perf_states - 1].power;
+	dtpm->power_max *= MICROWATT_PER_MILLIWATT;
+	dtpm->power_max *= nr_cpus;
+
+	return 0;
+}
+
 static void pd_release(struct dtpm *dtpm)
 {
 	struct dtpm_cpu *dtpm_cpu = dtpm->private;
@@ -139,39 +120,24 @@ static void pd_release(struct dtpm *dtpm)
 }
 
 static struct dtpm_ops dtpm_ops = {
-	.set_power_uw = set_pd_power_limit,
-	.get_power_uw = get_pd_power_uw,
-	.release = pd_release,
+	.set_power_uw	 = set_pd_power_limit,
+	.get_power_uw	 = get_pd_power_uw,
+	.update_power_uw = update_pd_power_uw,
+	.release	 = pd_release,
 };
 
 static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 {
-	struct cpufreq_policy *policy;
 	struct em_perf_domain *pd;
 	struct dtpm *dtpm;
 
-	policy = cpufreq_cpu_get(cpu);
-
-	if (!policy)
-		return 0;
-
 	pd = em_cpu_get(cpu);
 	if (!pd)
 		return -EINVAL;
 
 	dtpm = per_cpu(dtpm_per_cpu, cpu);
 
-	power_sub(dtpm, pd);
-
-	if (cpumask_weight(policy->cpus) != 1)
-		return 0;
-
-	for_each_cpu(cpu, policy->related_cpus)
-		per_cpu(dtpm_per_cpu, cpu) = NULL;
-
-	dtpm_unregister(dtpm);
-
-	return 0;
+	return dtpm_update_power(dtpm);
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
@@ -184,7 +150,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	int ret = -ENOMEM;
 
 	policy = cpufreq_cpu_get(cpu);
-
 	if (!policy)
 		return 0;
 
@@ -194,7 +159,7 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 
 	dtpm = per_cpu(dtpm_per_cpu, cpu);
 	if (dtpm)
-		return power_add(dtpm, pd);
+		return dtpm_update_power(dtpm);
 
 	dtpm = dtpm_alloc(&dtpm_ops);
 	if (!dtpm)
@@ -210,27 +175,20 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	for_each_cpu(cpu, policy->related_cpus)
 		per_cpu(dtpm_per_cpu, cpu) = dtpm;
 
-	sprintf(name, "cpu%d", dtpm_cpu->cpu);
+	snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, dtpm, __parent);
+	ret = dtpm_register(name, dtpm, NULL);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
-	ret = power_add(dtpm, pd);
-	if (ret)
-		goto out_dtpm_unregister;
-
 	ret = freq_qos_add_request(&policy->constraints,
 				   &dtpm_cpu->qos_req, FREQ_QOS_MAX,
 				   pd->table[pd->nr_perf_states - 1].frequency);
 	if (ret)
-		goto out_power_sub;
+		goto out_dtpm_unregister;
 
 	return 0;
 
-out_power_sub:
-	power_sub(dtpm, pd);
-
 out_dtpm_unregister:
 	dtpm_unregister(dtpm);
 	dtpm_cpu = NULL;
@@ -248,10 +206,38 @@ out_kfree_dtpm:
 
 int dtpm_register_cpu(struct dtpm *parent)
 {
-	__parent = parent;
+	int ret;
+
+	/*
+	 * The callbacks at CPU hotplug time are calling
+	 * dtpm_update_power() which in turns calls update_pd_power().
+	 *
+	 * The function update_pd_power() uses the online mask to
+	 * figure out the power consumption limits.
+	 *
+	 * At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU
+	 * online mask when the cpuhp_dtpm_cpu_online function is
+	 * called, but the CPU is still in the online mask for the
+	 * tear down callback. So the power can not be updated when
+	 * the CPU is unplugged.
+	 *
+	 * At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as
+	 * above. The CPU online mask is not up to date when the CPU
+	 * is plugged in.
+	 *
+	 * For this reason, we need to call the online and offline
+	 * callbacks at different moments when the CPU online mask is
+	 * consistent with the power numbers we want to update.
+	 */
+	ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
+				NULL, cpuhp_dtpm_cpu_offline);
+	if (ret < 0)
+		return ret;
+
+	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
+				cpuhp_dtpm_cpu_online, NULL);
+	if (ret < 0)
+		return ret;
 
-	return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
-				 "dtpm_cpu:online",
-				 cpuhp_dtpm_cpu_online,
-				 cpuhp_dtpm_cpu_offline);
+	return 0;
 }
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 832d8a74fa59..ad9a34a80440 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -97,6 +97,7 @@ enum cpuhp_state {
 	CPUHP_LUSTRE_CFS_DEAD,
 	CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
 	CPUHP_PADATA_DEAD,
+	CPUHP_AP_DTPM_CPU_DEAD,
 	CPUHP_WORKQUEUE_PREP,
 	CPUHP_POWER_NUMA_PREPARE,
 	CPUHP_HRTIMERS_PREPARE,
@@ -242,7 +243,6 @@ enum cpuhp_state {
 	CPUHP_AP_ONLINE_DYN_END		= CPUHP_AP_ONLINE_DYN + 30,
 	CPUHP_AP_X86_HPET_ONLINE,
 	CPUHP_AP_X86_KVM_CLK_ONLINE,
-	CPUHP_AP_DTPM_CPU_ONLINE,
 	CPUHP_AP_ACTIVE,
 	CPUHP_ONLINE,
 };
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index e80a332e3d8a..acf8d3638988 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -29,6 +29,7 @@ struct dtpm {
 struct dtpm_ops {
 	u64 (*set_power_uw)(struct dtpm *, u64);
 	u64 (*get_power_uw)(struct dtpm *);
+	int (*update_power_uw)(struct dtpm *);
 	void (*release)(struct dtpm *);
 };
 
@@ -62,7 +63,7 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
 	return container_of(zone, struct dtpm, zone);
 }
 
-int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max);
+int dtpm_update_power(struct dtpm *dtpm);
 
 int dtpm_release_zone(struct powercap_zone *pcz);
 
-- 
cgit v1.2.3


From 7a89d7eacf8e84f2afb94db5ae9d9f9faa93f01c Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 12 Mar 2021 14:04:09 +0100
Subject: powercap/drivers/dtpm: Simplify the dtpm table

The dtpm table is an array of pointers, that forces the user of the
table to define initdata along with the declaration of the table
entry. It is more efficient to create an array of dtpm structure, so
the declaration of the table entry can be done by initializing the
different fields.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://lore.kernel.org/r/20210312130411.29833-3-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c     |  4 ++--
 drivers/powercap/dtpm_cpu.c |  4 +++-
 include/linux/dtpm.h        | 20 +++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 58433b8ef9a1..8c032398f6ce 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -467,7 +467,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 
 static int __init dtpm_init(void)
 {
-	struct dtpm_descr **dtpm_descr;
+	struct dtpm_descr *dtpm_descr;
 
 	pct = powercap_register_control_type(NULL, "dtpm", NULL);
 	if (IS_ERR(pct)) {
@@ -476,7 +476,7 @@ static int __init dtpm_init(void)
 	}
 
 	for_each_dtpm_table(dtpm_descr)
-		(*dtpm_descr)->init(*dtpm_descr);
+		dtpm_descr->init();
 
 	return 0;
 }
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index f6076de39540..98841524a782 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -204,7 +204,7 @@ out_kfree_dtpm:
 	return ret;
 }
 
-int dtpm_register_cpu(struct dtpm *parent)
+static int __init dtpm_cpu_init(void)
 {
 	int ret;
 
@@ -241,3 +241,5 @@ int dtpm_register_cpu(struct dtpm *parent)
 
 	return 0;
 }
+
+DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index acf8d3638988..1e53db6bd5f9 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -33,25 +33,23 @@ struct dtpm_ops {
 	void (*release)(struct dtpm *);
 };
 
-struct dtpm_descr;
-
-typedef int (*dtpm_init_t)(struct dtpm_descr *);
+typedef int (*dtpm_init_t)(void);
 
 struct dtpm_descr {
-	struct dtpm *parent;
-	const char *name;
 	dtpm_init_t init;
 };
 
 /* Init section thermal table */
-extern struct dtpm_descr *__dtpm_table[];
-extern struct dtpm_descr *__dtpm_table_end[];
+extern struct dtpm_descr __dtpm_table[];
+extern struct dtpm_descr __dtpm_table_end[];
 
-#define DTPM_TABLE_ENTRY(name)			\
-	static typeof(name) *__dtpm_table_entry_##name	\
-	__used __section("__dtpm_table") = &name
+#define DTPM_TABLE_ENTRY(name, __init)				\
+	static struct dtpm_descr __dtpm_table_entry_##name	\
+	__used __section("__dtpm_table") = {			\
+		.init = __init,					\
+	}
 
-#define DTPM_DECLARE(name)	DTPM_TABLE_ENTRY(name)
+#define DTPM_DECLARE(name, init)	DTPM_TABLE_ENTRY(name, init)
 
 #define for_each_dtpm_table(__dtpm)	\
 	for (__dtpm = __dtpm_table;	\
-- 
cgit v1.2.3


From d2cdc6adc30879d81160199fc7c6ab890fc4bd4c Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Fri, 12 Mar 2021 14:04:10 +0100
Subject: powercap/drivers/dtpm: Use container_of instead of a private data
 field

The dtpm framework provides an API to allocate a dtpm node. However
when a backend dtpm driver needs to allocate a dtpm node it must
define its own structure and store the pointer of this structure in
the private field of the dtpm structure.

It is more elegant to use the container_of macro and add the dtpm
structure inside the dtpm backend specific structure. The code will be
able to deal properly with the dtpm structure as a generic entity,
making all this even more self-encapsulated.

The dtpm_alloc() function does no longer make sense as the dtpm
structure will be allocated when allocating the device specific dtpm
structure. The dtpm_init() is provided instead.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://lore.kernel.org/r/20210312130411.29833-4-daniel.lezcano@linaro.org
---
 drivers/powercap/dtpm.c     | 18 ++++++-----------
 drivers/powercap/dtpm_cpu.c | 48 ++++++++++++++++++++++-----------------------
 include/linux/dtpm.h        |  3 +--
 3 files changed, 30 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 8c032398f6ce..c7c5529b61eb 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -357,24 +357,18 @@ static struct powercap_zone_ops zone_ops = {
 };
 
 /**
- * dtpm_alloc - Allocate and initialize a dtpm struct
- * @name: a string specifying the name of the node
- *
- * Return: a struct dtpm pointer, NULL in case of error
+ * dtpm_init - Allocate and initialize a dtpm struct
+ * @dtpm: The dtpm struct pointer to be initialized
+ * @ops: The dtpm device specific ops, NULL for a virtual node
  */
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops)
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
 {
-	struct dtpm *dtpm;
-
-	dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
 	if (dtpm) {
 		INIT_LIST_HEAD(&dtpm->children);
 		INIT_LIST_HEAD(&dtpm->sibling);
 		dtpm->weight = 1024;
 		dtpm->ops = ops;
 	}
-
-	return dtpm;
 }
 
 /**
@@ -465,7 +459,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
 	return 0;
 }
 
-static int __init dtpm_init(void)
+static int __init init_dtpm(void)
 {
 	struct dtpm_descr *dtpm_descr;
 
@@ -480,4 +474,4 @@ static int __init dtpm_init(void)
 
 	return 0;
 }
-late_initcall(dtpm_init);
+late_initcall(init_dtpm);
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
index 98841524a782..2e21e4e2b01f 100644
--- a/drivers/powercap/dtpm_cpu.c
+++ b/drivers/powercap/dtpm_cpu.c
@@ -25,16 +25,22 @@
 #include <linux/slab.h>
 #include <linux/units.h>
 
-static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
-
 struct dtpm_cpu {
+	struct dtpm dtpm;
 	struct freq_qos_request qos_req;
 	int cpu;
 };
 
+static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
+
+static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
+{
+	return container_of(dtpm, struct dtpm_cpu, dtpm);
+}
+
 static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	unsigned long freq;
@@ -64,7 +70,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
 
 static u64 get_pd_power_uw(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *pd;
 	struct cpumask cpus;
 	unsigned long freq;
@@ -90,7 +96,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm)
 
 static int update_pd_power_uw(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 	struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
 	struct cpumask cpus;
 	int nr_cpus;
@@ -111,7 +117,7 @@ static int update_pd_power_uw(struct dtpm *dtpm)
 
 static void pd_release(struct dtpm *dtpm)
 {
-	struct dtpm_cpu *dtpm_cpu = dtpm->private;
+	struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
 
 	if (freq_qos_request_active(&dtpm_cpu->qos_req))
 		freq_qos_remove_request(&dtpm_cpu->qos_req);
@@ -129,20 +135,19 @@ static struct dtpm_ops dtpm_ops = {
 static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
 {
 	struct em_perf_domain *pd;
-	struct dtpm *dtpm;
+	struct dtpm_cpu *dtpm_cpu;
 
 	pd = em_cpu_get(cpu);
 	if (!pd)
 		return -EINVAL;
 
-	dtpm = per_cpu(dtpm_per_cpu, cpu);
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
 
-	return dtpm_update_power(dtpm);
+	return dtpm_update_power(&dtpm_cpu->dtpm);
 }
 
 static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 {
-	struct dtpm *dtpm;
 	struct dtpm_cpu *dtpm_cpu;
 	struct cpufreq_policy *policy;
 	struct em_perf_domain *pd;
@@ -157,27 +162,23 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	if (!pd)
 		return -EINVAL;
 
-	dtpm = per_cpu(dtpm_per_cpu, cpu);
-	if (dtpm)
-		return dtpm_update_power(dtpm);
-
-	dtpm = dtpm_alloc(&dtpm_ops);
-	if (!dtpm)
-		return -EINVAL;
+	dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
+	if (dtpm_cpu)
+		return dtpm_update_power(&dtpm_cpu->dtpm);
 
 	dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
 	if (!dtpm_cpu)
-		goto out_kfree_dtpm;
+		return -ENOMEM;
 
-	dtpm->private = dtpm_cpu;
+	dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
 	dtpm_cpu->cpu = cpu;
 
 	for_each_cpu(cpu, policy->related_cpus)
-		per_cpu(dtpm_per_cpu, cpu) = dtpm;
+		per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
 
 	snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
 
-	ret = dtpm_register(name, dtpm, NULL);
+	ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
 	if (ret)
 		goto out_kfree_dtpm_cpu;
 
@@ -190,17 +191,14 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
 	return 0;
 
 out_dtpm_unregister:
-	dtpm_unregister(dtpm);
+	dtpm_unregister(&dtpm_cpu->dtpm);
 	dtpm_cpu = NULL;
-	dtpm = NULL;
 
 out_kfree_dtpm_cpu:
 	for_each_cpu(cpu, policy->related_cpus)
 		per_cpu(dtpm_per_cpu, cpu) = NULL;
 	kfree(dtpm_cpu);
 
-out_kfree_dtpm:
-	kfree(dtpm);
 	return ret;
 }
 
diff --git a/include/linux/dtpm.h b/include/linux/dtpm.h
index 1e53db6bd5f9..2890f6370eb9 100644
--- a/include/linux/dtpm.h
+++ b/include/linux/dtpm.h
@@ -23,7 +23,6 @@ struct dtpm {
 	u64 power_max;
 	u64 power_min;
 	int weight;
-	void *private;
 };
 
 struct dtpm_ops {
@@ -65,7 +64,7 @@ int dtpm_update_power(struct dtpm *dtpm);
 
 int dtpm_release_zone(struct powercap_zone *pcz);
 
-struct dtpm *dtpm_alloc(struct dtpm_ops *ops);
+void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops);
 
 void dtpm_unregister(struct dtpm *dtpm);
 
-- 
cgit v1.2.3


From 3b13c168186c115501ee7d194460ba2f8c825155 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 21 Oct 2021 14:30:51 +0100
Subject: percpu_ref: percpu_ref_tryget_live() version holding RCU

Add percpu_ref_tryget_live_rcu(), which is a version of
percpu_ref_tryget_live() but the user is responsible for enclosing it in
a RCU read lock section.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Link: https://lore.kernel.org/r/3066500d7a6eb3e03f10adf98b87fdb3b1c49db8.1634822969.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/percpu-refcount.h | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index ae16a9856305..b31d3f3312ce 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -266,6 +266,28 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 	return percpu_ref_tryget_many(ref, 1);
 }
 
+/**
+ * percpu_ref_tryget_live_rcu - same as percpu_ref_tryget_live() but the
+ * caller is responsible for taking RCU.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline bool percpu_ref_tryget_live_rcu(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count;
+	bool ret = false;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(__ref_is_percpu(ref, &percpu_count))) {
+		this_cpu_inc(*percpu_count);
+		ret = true;
+	} else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
+		ret = atomic_long_inc_not_zero(&ref->data->count);
+	}
+	return ret;
+}
+
 /**
  * percpu_ref_tryget_live - try to increment a live percpu refcount
  * @ref: percpu_ref to try-get
@@ -283,20 +305,11 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned long __percpu *percpu_count;
 	bool ret = false;
 
 	rcu_read_lock();
-
-	if (__ref_is_percpu(ref, &percpu_count)) {
-		this_cpu_inc(*percpu_count);
-		ret = true;
-	} else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
-		ret = atomic_long_inc_not_zero(&ref->data->count);
-	}
-
+	ret = percpu_ref_tryget_live_rcu(ref);
 	rcu_read_unlock();
-
 	return ret;
 }
 
-- 
cgit v1.2.3


From f059a1d2e23a165bf86e33673c6a7535a08c6341 Mon Sep 17 00:00:00 2001
From: Xie Yongji <xieyongji@bytedance.com>
Date: Wed, 22 Sep 2021 20:37:08 +0800
Subject: block: Add invalidate_disk() helper to invalidate the gendisk

To hide internal implementation and simplify some driver code,
this adds a helper to invalidate the gendisk. It will clean the
gendisk's associated buffer/page caches and reset its internal
states.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210922123711.187-2-xieyongji@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 20 ++++++++++++++++++++
 include/linux/genhd.h |  2 ++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 80943c123c3e..64f83c4aee99 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -624,6 +624,26 @@ void del_gendisk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(del_gendisk);
 
+/**
+ * invalidate_disk - invalidate the disk
+ * @disk: the struct gendisk to invalidate
+ *
+ * A helper to invalidates the disk. It will clean the disk's associated
+ * buffer/page caches and reset its internal states so that the disk
+ * can be reused by the drivers.
+ *
+ * Context: can sleep
+ */
+void invalidate_disk(struct gendisk *disk)
+{
+	struct block_device *bdev = disk->part0;
+
+	invalidate_bdev(bdev);
+	bdev->bd_inode->i_mapping->wb_err = 0;
+	set_capacity(disk, 0);
+}
+EXPORT_SYMBOL(invalidate_disk);
+
 /* sysfs access to bad-blocks list. */
 static ssize_t disk_badblocks_show(struct device *dev,
 					struct device_attribute *attr,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c70bc5fce4db..13f313ab99e7 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -213,6 +213,8 @@ static inline int add_disk(struct gendisk *disk)
 }
 extern void del_gendisk(struct gendisk *gp);
 
+void invalidate_disk(struct gendisk *disk);
+
 void set_disk_ro(struct gendisk *disk, bool read_only);
 
 static inline int get_disk_ro(struct gendisk *disk)
-- 
cgit v1.2.3


From 1e8d44bddf57f6d878e083f281a34d5c88feb7db Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Oct 2021 11:04:51 -0700
Subject: blk-crypto: rename keyslot-manager files to blk-crypto-profile

In preparation for renaming struct blk_keyslot_manager to struct
blk_crypto_profile, rename the keyslot-manager.h and keyslot-manager.c
source files.  Renaming these files separately before making a lot of
changes to their contents makes it easier for git to understand that
they were renamed.

Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20211018180453.40441-3-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile                     |   2 +-
 block/blk-crypto-fallback.c        |   2 +-
 block/blk-crypto-profile.c         | 579 +++++++++++++++++++++++++++++++++++++
 block/blk-crypto.c                 |   2 +-
 block/keyslot-manager.c            | 579 -------------------------------------
 drivers/md/dm-core.h               |   2 +-
 drivers/md/dm.c                    |   2 +-
 drivers/mmc/host/cqhci-crypto.c    |   2 +-
 drivers/scsi/ufs/ufshcd.h          |   2 +-
 include/linux/blk-crypto-profile.h | 120 ++++++++
 include/linux/keyslot-manager.h    | 120 --------
 include/linux/mmc/host.h           |   2 +-
 12 files changed, 707 insertions(+), 707 deletions(-)
 create mode 100644 block/blk-crypto-profile.c
 delete mode 100644 block/keyslot-manager.c
 create mode 100644 include/linux/blk-crypto-profile.h
 delete mode 100644 include/linux/keyslot-manager.h

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index 74df168729ec..602f7f47b7b6 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -36,6 +36,6 @@ obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
 obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
 obj-$(CONFIG_BLK_PM)		+= blk-pm.o
-obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= keyslot-manager.o blk-crypto.o
+obj-$(CONFIG_BLK_INLINE_ENCRYPTION)	+= blk-crypto.o blk-crypto-profile.o
 obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK)	+= blk-crypto-fallback.o
 obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED)	+= holder.o
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 1bcc1a151424..08bfea292c75 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -12,9 +12,9 @@
 #include <crypto/skcipher.h>
 #include <linux/blk-cgroup.h>
 #include <linux/blk-crypto.h>
+#include <linux/blk-crypto-profile.h>
 #include <linux/blkdev.h>
 #include <linux/crypto.h>
-#include <linux/keyslot-manager.h>
 #include <linux/mempool.h>
 #include <linux/module.h>
 #include <linux/random.h>
diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
new file mode 100644
index 000000000000..1a235fa3c3e8
--- /dev/null
+++ b/block/blk-crypto-profile.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 Google LLC
+ */
+
+/**
+ * DOC: The Keyslot Manager
+ *
+ * Many devices with inline encryption support have a limited number of "slots"
+ * into which encryption contexts may be programmed, and requests can be tagged
+ * with a slot number to specify the key to use for en/decryption.
+ *
+ * As the number of slots is limited, and programming keys is expensive on
+ * many inline encryption hardware, we don't want to program the same key into
+ * multiple slots - if multiple requests are using the same key, we want to
+ * program just one slot with that key and use that slot for all requests.
+ *
+ * The keyslot manager manages these keyslots appropriately, and also acts as
+ * an abstraction between the inline encryption hardware and the upper layers.
+ *
+ * Lower layer devices will set up a keyslot manager in their request queue
+ * and tell it how to perform device specific operations like programming/
+ * evicting keys from keyslots.
+ *
+ * Upper layers will call blk_ksm_get_slot_for_key() to program a
+ * key into some slot in the inline encryption hardware.
+ */
+
+#define pr_fmt(fmt) "blk-crypto: " fmt
+
+#include <linux/blk-crypto-profile.h>
+#include <linux/device.h>
+#include <linux/atomic.h>
+#include <linux/mutex.h>
+#include <linux/pm_runtime.h>
+#include <linux/wait.h>
+#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
+
+struct blk_ksm_keyslot {
+	atomic_t slot_refs;
+	struct list_head idle_slot_node;
+	struct hlist_node hash_node;
+	const struct blk_crypto_key *key;
+	struct blk_keyslot_manager *ksm;
+};
+
+static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm)
+{
+	/*
+	 * Calling into the driver requires ksm->lock held and the device
+	 * resumed.  But we must resume the device first, since that can acquire
+	 * and release ksm->lock via blk_ksm_reprogram_all_keys().
+	 */
+	if (ksm->dev)
+		pm_runtime_get_sync(ksm->dev);
+	down_write(&ksm->lock);
+}
+
+static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm)
+{
+	up_write(&ksm->lock);
+	if (ksm->dev)
+		pm_runtime_put_sync(ksm->dev);
+}
+
+static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm)
+{
+	return ksm->num_slots == 0;
+}
+
+/**
+ * blk_ksm_init() - Initialize a keyslot manager
+ * @ksm: The keyslot_manager to initialize.
+ * @num_slots: The number of key slots to manage.
+ *
+ * Allocate memory for keyslots and initialize a keyslot manager. Called by
+ * e.g. storage drivers to set up a keyslot manager in their request_queue.
+ *
+ * Return: 0 on success, or else a negative error code.
+ */
+int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
+{
+	unsigned int slot;
+	unsigned int i;
+	unsigned int slot_hashtable_size;
+
+	memset(ksm, 0, sizeof(*ksm));
+
+	if (num_slots == 0)
+		return -EINVAL;
+
+	ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL);
+	if (!ksm->slots)
+		return -ENOMEM;
+
+	ksm->num_slots = num_slots;
+
+	init_rwsem(&ksm->lock);
+
+	init_waitqueue_head(&ksm->idle_slots_wait_queue);
+	INIT_LIST_HEAD(&ksm->idle_slots);
+
+	for (slot = 0; slot < num_slots; slot++) {
+		ksm->slots[slot].ksm = ksm;
+		list_add_tail(&ksm->slots[slot].idle_slot_node,
+			      &ksm->idle_slots);
+	}
+
+	spin_lock_init(&ksm->idle_slots_lock);
+
+	slot_hashtable_size = roundup_pow_of_two(num_slots);
+	/*
+	 * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2
+	 * buckets.  This only makes a difference when there is only 1 keyslot.
+	 */
+	if (slot_hashtable_size < 2)
+		slot_hashtable_size = 2;
+
+	ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
+	ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
+					     sizeof(ksm->slot_hashtable[0]),
+					     GFP_KERNEL);
+	if (!ksm->slot_hashtable)
+		goto err_destroy_ksm;
+	for (i = 0; i < slot_hashtable_size; i++)
+		INIT_HLIST_HEAD(&ksm->slot_hashtable[i]);
+
+	return 0;
+
+err_destroy_ksm:
+	blk_ksm_destroy(ksm);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_init);
+
+static void blk_ksm_destroy_callback(void *ksm)
+{
+	blk_ksm_destroy(ksm);
+}
+
+/**
+ * devm_blk_ksm_init() - Resource-managed blk_ksm_init()
+ * @dev: The device which owns the blk_keyslot_manager.
+ * @ksm: The blk_keyslot_manager to initialize.
+ * @num_slots: The number of key slots to manage.
+ *
+ * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically
+ * on driver detach.
+ *
+ * Return: 0 on success, or else a negative error code.
+ */
+int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
+		      unsigned int num_slots)
+{
+	int err = blk_ksm_init(ksm, num_slots);
+
+	if (err)
+		return err;
+
+	return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm);
+}
+EXPORT_SYMBOL_GPL(devm_blk_ksm_init);
+
+static inline struct hlist_head *
+blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm,
+			    const struct blk_crypto_key *key)
+{
+	return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)];
+}
+
+static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot)
+{
+	struct blk_keyslot_manager *ksm = slot->ksm;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ksm->idle_slots_lock, flags);
+	list_del(&slot->idle_slot_node);
+	spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
+}
+
+static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
+					struct blk_keyslot_manager *ksm,
+					const struct blk_crypto_key *key)
+{
+	const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key);
+	struct blk_ksm_keyslot *slotp;
+
+	hlist_for_each_entry(slotp, head, hash_node) {
+		if (slotp->key == key)
+			return slotp;
+	}
+	return NULL;
+}
+
+static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot(
+					struct blk_keyslot_manager *ksm,
+					const struct blk_crypto_key *key)
+{
+	struct blk_ksm_keyslot *slot;
+
+	slot = blk_ksm_find_keyslot(ksm, key);
+	if (!slot)
+		return NULL;
+	if (atomic_inc_return(&slot->slot_refs) == 1) {
+		/* Took first reference to this slot; remove it from LRU list */
+		blk_ksm_remove_slot_from_lru_list(slot);
+	}
+	return slot;
+}
+
+unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot)
+{
+	return slot - slot->ksm->slots;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx);
+
+/**
+ * blk_ksm_get_slot_for_key() - Program a key into a keyslot.
+ * @ksm: The keyslot manager to program the key into.
+ * @key: Pointer to the key object to program, including the raw key, crypto
+ *	 mode, and data unit size.
+ * @slot_ptr: A pointer to return the pointer of the allocated keyslot.
+ *
+ * Get a keyslot that's been programmed with the specified key.  If one already
+ * exists, return it with incremented refcount.  Otherwise, wait for a keyslot
+ * to become idle and program it.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the
+ *	   allocated keyslot), or some other blk_status_t otherwise (and
+ *	   keyslot is set to NULL).
+ */
+blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
+				      const struct blk_crypto_key *key,
+				      struct blk_ksm_keyslot **slot_ptr)
+{
+	struct blk_ksm_keyslot *slot;
+	int slot_idx;
+	int err;
+
+	*slot_ptr = NULL;
+
+	if (blk_ksm_is_passthrough(ksm))
+		return BLK_STS_OK;
+
+	down_read(&ksm->lock);
+	slot = blk_ksm_find_and_grab_keyslot(ksm, key);
+	up_read(&ksm->lock);
+	if (slot)
+		goto success;
+
+	for (;;) {
+		blk_ksm_hw_enter(ksm);
+		slot = blk_ksm_find_and_grab_keyslot(ksm, key);
+		if (slot) {
+			blk_ksm_hw_exit(ksm);
+			goto success;
+		}
+
+		/*
+		 * If we're here, that means there wasn't a slot that was
+		 * already programmed with the key. So try to program it.
+		 */
+		if (!list_empty(&ksm->idle_slots))
+			break;
+
+		blk_ksm_hw_exit(ksm);
+		wait_event(ksm->idle_slots_wait_queue,
+			   !list_empty(&ksm->idle_slots));
+	}
+
+	slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot,
+				idle_slot_node);
+	slot_idx = blk_ksm_get_slot_idx(slot);
+
+	err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx);
+	if (err) {
+		wake_up(&ksm->idle_slots_wait_queue);
+		blk_ksm_hw_exit(ksm);
+		return errno_to_blk_status(err);
+	}
+
+	/* Move this slot to the hash list for the new key. */
+	if (slot->key)
+		hlist_del(&slot->hash_node);
+	slot->key = key;
+	hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key));
+
+	atomic_set(&slot->slot_refs, 1);
+
+	blk_ksm_remove_slot_from_lru_list(slot);
+
+	blk_ksm_hw_exit(ksm);
+success:
+	*slot_ptr = slot;
+	return BLK_STS_OK;
+}
+
+/**
+ * blk_ksm_put_slot() - Release a reference to a slot
+ * @slot: The keyslot to release the reference of.
+ *
+ * Context: Any context.
+ */
+void blk_ksm_put_slot(struct blk_ksm_keyslot *slot)
+{
+	struct blk_keyslot_manager *ksm;
+	unsigned long flags;
+
+	if (!slot)
+		return;
+
+	ksm = slot->ksm;
+
+	if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
+					&ksm->idle_slots_lock, flags)) {
+		list_add_tail(&slot->idle_slot_node, &ksm->idle_slots);
+		spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
+		wake_up(&ksm->idle_slots_wait_queue);
+	}
+}
+
+/**
+ * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is
+ *				    supported by a ksm.
+ * @ksm: The keyslot manager to check
+ * @cfg: The crypto configuration to check for.
+ *
+ * Checks for crypto_mode/data unit size/dun bytes support.
+ *
+ * Return: Whether or not this ksm supports the specified crypto config.
+ */
+bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
+				  const struct blk_crypto_config *cfg)
+{
+	if (!ksm)
+		return false;
+	if (!(ksm->crypto_modes_supported[cfg->crypto_mode] &
+	      cfg->data_unit_size))
+		return false;
+	if (ksm->max_dun_bytes_supported < cfg->dun_bytes)
+		return false;
+	return true;
+}
+
+/**
+ * blk_ksm_evict_key() - Evict a key from the lower layer device.
+ * @ksm: The keyslot manager to evict from
+ * @key: The key to evict
+ *
+ * Find the keyslot that the specified key was programmed into, and evict that
+ * slot from the lower layer device. The slot must not be in use by any
+ * in-flight IO when this function is called.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
+ *	   if the keyslot is still in use, or another -errno value on other
+ *	   error.
+ */
+int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+		      const struct blk_crypto_key *key)
+{
+	struct blk_ksm_keyslot *slot;
+	int err = 0;
+
+	if (blk_ksm_is_passthrough(ksm)) {
+		if (ksm->ksm_ll_ops.keyslot_evict) {
+			blk_ksm_hw_enter(ksm);
+			err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1);
+			blk_ksm_hw_exit(ksm);
+			return err;
+		}
+		return 0;
+	}
+
+	blk_ksm_hw_enter(ksm);
+	slot = blk_ksm_find_keyslot(ksm, key);
+	if (!slot)
+		goto out_unlock;
+
+	if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
+		err = -EBUSY;
+		goto out_unlock;
+	}
+	err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
+					    blk_ksm_get_slot_idx(slot));
+	if (err)
+		goto out_unlock;
+
+	hlist_del(&slot->hash_node);
+	slot->key = NULL;
+	err = 0;
+out_unlock:
+	blk_ksm_hw_exit(ksm);
+	return err;
+}
+
+/**
+ * blk_ksm_reprogram_all_keys() - Re-program all keyslots.
+ * @ksm: The keyslot manager
+ *
+ * Re-program all keyslots that are supposed to have a key programmed.  This is
+ * intended only for use by drivers for hardware that loses its keys on reset.
+ *
+ * Context: Process context. Takes and releases ksm->lock.
+ */
+void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm)
+{
+	unsigned int slot;
+
+	if (blk_ksm_is_passthrough(ksm))
+		return;
+
+	/* This is for device initialization, so don't resume the device */
+	down_write(&ksm->lock);
+	for (slot = 0; slot < ksm->num_slots; slot++) {
+		const struct blk_crypto_key *key = ksm->slots[slot].key;
+		int err;
+
+		if (!key)
+			continue;
+
+		err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot);
+		WARN_ON(err);
+	}
+	up_write(&ksm->lock);
+}
+EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys);
+
+void blk_ksm_destroy(struct blk_keyslot_manager *ksm)
+{
+	if (!ksm)
+		return;
+	kvfree(ksm->slot_hashtable);
+	kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots);
+	memzero_explicit(ksm, sizeof(*ksm));
+}
+EXPORT_SYMBOL_GPL(blk_ksm_destroy);
+
+bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q)
+{
+	if (blk_integrity_queue_supports_integrity(q)) {
+		pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
+		return false;
+	}
+	q->ksm = ksm;
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_register);
+
+void blk_ksm_unregister(struct request_queue *q)
+{
+	q->ksm = NULL;
+}
+
+/**
+ * blk_ksm_intersect_modes() - restrict supported modes by child device
+ * @parent: The keyslot manager for parent device
+ * @child: The keyslot manager for child device, or NULL
+ *
+ * Clear any crypto mode support bits in @parent that aren't set in @child.
+ * If @child is NULL, then all parent bits are cleared.
+ *
+ * Only use this when setting up the keyslot manager for a layered device,
+ * before it's been exposed yet.
+ */
+void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
+			     const struct blk_keyslot_manager *child)
+{
+	if (child) {
+		unsigned int i;
+
+		parent->max_dun_bytes_supported =
+			min(parent->max_dun_bytes_supported,
+			    child->max_dun_bytes_supported);
+		for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported);
+		     i++) {
+			parent->crypto_modes_supported[i] &=
+				child->crypto_modes_supported[i];
+		}
+	} else {
+		parent->max_dun_bytes_supported = 0;
+		memset(parent->crypto_modes_supported, 0,
+		       sizeof(parent->crypto_modes_supported));
+	}
+}
+EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes);
+
+/**
+ * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes
+ *			   and DUN bytes that another KSM supports. Here,
+ *			   "superset" refers to the mathematical meaning of the
+ *			   word - i.e. if two KSMs have the *same* capabilities,
+ *			   they *are* considered supersets of each other.
+ * @ksm_superset: The KSM that we want to verify is a superset
+ * @ksm_subset: The KSM that we want to verify is a subset
+ *
+ * Return: True if @ksm_superset supports a superset of the crypto modes and DUN
+ *	   bytes that @ksm_subset supports.
+ */
+bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
+			 struct blk_keyslot_manager *ksm_subset)
+{
+	int i;
+
+	if (!ksm_subset)
+		return true;
+
+	if (!ksm_superset)
+		return false;
+
+	for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) {
+		if (ksm_subset->crypto_modes_supported[i] &
+		    (~ksm_superset->crypto_modes_supported[i])) {
+			return false;
+		}
+	}
+
+	if (ksm_subset->max_dun_bytes_supported >
+	    ksm_superset->max_dun_bytes_supported) {
+		return false;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_is_superset);
+
+/**
+ * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of
+ *				   another KSM
+ * @target_ksm: The KSM whose restrictions to update.
+ * @reference_ksm: The KSM to whose restrictions this function will update
+ *		   @target_ksm's restrictions to.
+ *
+ * Blk-crypto requires that crypto capabilities that were
+ * advertised when a bio was created continue to be supported by the
+ * device until that bio is ended. This is turn means that a device cannot
+ * shrink its advertised crypto capabilities without any explicit
+ * synchronization with upper layers. So if there's no such explicit
+ * synchronization, @reference_ksm must support all the crypto capabilities that
+ * @target_ksm does
+ * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true).
+ *
+ * Note also that as long as the crypto capabilities are being expanded, the
+ * order of updates becoming visible is not important because it's alright
+ * for blk-crypto to see stale values - they only cause blk-crypto to
+ * believe that a crypto capability isn't supported when it actually is (which
+ * might result in blk-crypto-fallback being used if available, or the bio being
+ * failed).
+ */
+void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
+				 struct blk_keyslot_manager *reference_ksm)
+{
+	memcpy(target_ksm->crypto_modes_supported,
+	       reference_ksm->crypto_modes_supported,
+	       sizeof(target_ksm->crypto_modes_supported));
+
+	target_ksm->max_dun_bytes_supported =
+				reference_ksm->max_dun_bytes_supported;
+}
+EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities);
+
+/**
+ * blk_ksm_init_passthrough() - Init a passthrough keyslot manager
+ * @ksm: The keyslot manager to init
+ *
+ * Initialize a passthrough keyslot manager.
+ * Called by e.g. storage drivers to set up a keyslot manager in their
+ * request_queue, when the storage driver wants to manage its keys by itself.
+ * This is useful for inline encryption hardware that doesn't have the concept
+ * of keyslots, and for layered devices.
+ */
+void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm)
+{
+	memset(ksm, 0, sizeof(*ksm));
+	init_rwsem(&ksm->lock);
+}
+EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 8f53f4a1f9e2..76ce7a5d2676 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -11,7 +11,7 @@
 
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 
diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c
deleted file mode 100644
index 1792159d12d1..000000000000
--- a/block/keyslot-manager.c
+++ /dev/null
@@ -1,579 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright 2019 Google LLC
- */
-
-/**
- * DOC: The Keyslot Manager
- *
- * Many devices with inline encryption support have a limited number of "slots"
- * into which encryption contexts may be programmed, and requests can be tagged
- * with a slot number to specify the key to use for en/decryption.
- *
- * As the number of slots is limited, and programming keys is expensive on
- * many inline encryption hardware, we don't want to program the same key into
- * multiple slots - if multiple requests are using the same key, we want to
- * program just one slot with that key and use that slot for all requests.
- *
- * The keyslot manager manages these keyslots appropriately, and also acts as
- * an abstraction between the inline encryption hardware and the upper layers.
- *
- * Lower layer devices will set up a keyslot manager in their request queue
- * and tell it how to perform device specific operations like programming/
- * evicting keys from keyslots.
- *
- * Upper layers will call blk_ksm_get_slot_for_key() to program a
- * key into some slot in the inline encryption hardware.
- */
-
-#define pr_fmt(fmt) "blk-crypto: " fmt
-
-#include <linux/keyslot-manager.h>
-#include <linux/device.h>
-#include <linux/atomic.h>
-#include <linux/mutex.h>
-#include <linux/pm_runtime.h>
-#include <linux/wait.h>
-#include <linux/blkdev.h>
-#include <linux/blk-integrity.h>
-
-struct blk_ksm_keyslot {
-	atomic_t slot_refs;
-	struct list_head idle_slot_node;
-	struct hlist_node hash_node;
-	const struct blk_crypto_key *key;
-	struct blk_keyslot_manager *ksm;
-};
-
-static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm)
-{
-	/*
-	 * Calling into the driver requires ksm->lock held and the device
-	 * resumed.  But we must resume the device first, since that can acquire
-	 * and release ksm->lock via blk_ksm_reprogram_all_keys().
-	 */
-	if (ksm->dev)
-		pm_runtime_get_sync(ksm->dev);
-	down_write(&ksm->lock);
-}
-
-static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm)
-{
-	up_write(&ksm->lock);
-	if (ksm->dev)
-		pm_runtime_put_sync(ksm->dev);
-}
-
-static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm)
-{
-	return ksm->num_slots == 0;
-}
-
-/**
- * blk_ksm_init() - Initialize a keyslot manager
- * @ksm: The keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
- *
- * Allocate memory for keyslots and initialize a keyslot manager. Called by
- * e.g. storage drivers to set up a keyslot manager in their request_queue.
- *
- * Return: 0 on success, or else a negative error code.
- */
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
-{
-	unsigned int slot;
-	unsigned int i;
-	unsigned int slot_hashtable_size;
-
-	memset(ksm, 0, sizeof(*ksm));
-
-	if (num_slots == 0)
-		return -EINVAL;
-
-	ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL);
-	if (!ksm->slots)
-		return -ENOMEM;
-
-	ksm->num_slots = num_slots;
-
-	init_rwsem(&ksm->lock);
-
-	init_waitqueue_head(&ksm->idle_slots_wait_queue);
-	INIT_LIST_HEAD(&ksm->idle_slots);
-
-	for (slot = 0; slot < num_slots; slot++) {
-		ksm->slots[slot].ksm = ksm;
-		list_add_tail(&ksm->slots[slot].idle_slot_node,
-			      &ksm->idle_slots);
-	}
-
-	spin_lock_init(&ksm->idle_slots_lock);
-
-	slot_hashtable_size = roundup_pow_of_two(num_slots);
-	/*
-	 * hash_ptr() assumes bits != 0, so ensure the hash table has at least 2
-	 * buckets.  This only makes a difference when there is only 1 keyslot.
-	 */
-	if (slot_hashtable_size < 2)
-		slot_hashtable_size = 2;
-
-	ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
-	ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
-					     sizeof(ksm->slot_hashtable[0]),
-					     GFP_KERNEL);
-	if (!ksm->slot_hashtable)
-		goto err_destroy_ksm;
-	for (i = 0; i < slot_hashtable_size; i++)
-		INIT_HLIST_HEAD(&ksm->slot_hashtable[i]);
-
-	return 0;
-
-err_destroy_ksm:
-	blk_ksm_destroy(ksm);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_init);
-
-static void blk_ksm_destroy_callback(void *ksm)
-{
-	blk_ksm_destroy(ksm);
-}
-
-/**
- * devm_blk_ksm_init() - Resource-managed blk_ksm_init()
- * @dev: The device which owns the blk_keyslot_manager.
- * @ksm: The blk_keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
- *
- * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically
- * on driver detach.
- *
- * Return: 0 on success, or else a negative error code.
- */
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
-		      unsigned int num_slots)
-{
-	int err = blk_ksm_init(ksm, num_slots);
-
-	if (err)
-		return err;
-
-	return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm);
-}
-EXPORT_SYMBOL_GPL(devm_blk_ksm_init);
-
-static inline struct hlist_head *
-blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm,
-			    const struct blk_crypto_key *key)
-{
-	return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)];
-}
-
-static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot)
-{
-	struct blk_keyslot_manager *ksm = slot->ksm;
-	unsigned long flags;
-
-	spin_lock_irqsave(&ksm->idle_slots_lock, flags);
-	list_del(&slot->idle_slot_node);
-	spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
-}
-
-static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
-					struct blk_keyslot_manager *ksm,
-					const struct blk_crypto_key *key)
-{
-	const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key);
-	struct blk_ksm_keyslot *slotp;
-
-	hlist_for_each_entry(slotp, head, hash_node) {
-		if (slotp->key == key)
-			return slotp;
-	}
-	return NULL;
-}
-
-static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot(
-					struct blk_keyslot_manager *ksm,
-					const struct blk_crypto_key *key)
-{
-	struct blk_ksm_keyslot *slot;
-
-	slot = blk_ksm_find_keyslot(ksm, key);
-	if (!slot)
-		return NULL;
-	if (atomic_inc_return(&slot->slot_refs) == 1) {
-		/* Took first reference to this slot; remove it from LRU list */
-		blk_ksm_remove_slot_from_lru_list(slot);
-	}
-	return slot;
-}
-
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot)
-{
-	return slot - slot->ksm->slots;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx);
-
-/**
- * blk_ksm_get_slot_for_key() - Program a key into a keyslot.
- * @ksm: The keyslot manager to program the key into.
- * @key: Pointer to the key object to program, including the raw key, crypto
- *	 mode, and data unit size.
- * @slot_ptr: A pointer to return the pointer of the allocated keyslot.
- *
- * Get a keyslot that's been programmed with the specified key.  If one already
- * exists, return it with incremented refcount.  Otherwise, wait for a keyslot
- * to become idle and program it.
- *
- * Context: Process context. Takes and releases ksm->lock.
- * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the
- *	   allocated keyslot), or some other blk_status_t otherwise (and
- *	   keyslot is set to NULL).
- */
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
-				      const struct blk_crypto_key *key,
-				      struct blk_ksm_keyslot **slot_ptr)
-{
-	struct blk_ksm_keyslot *slot;
-	int slot_idx;
-	int err;
-
-	*slot_ptr = NULL;
-
-	if (blk_ksm_is_passthrough(ksm))
-		return BLK_STS_OK;
-
-	down_read(&ksm->lock);
-	slot = blk_ksm_find_and_grab_keyslot(ksm, key);
-	up_read(&ksm->lock);
-	if (slot)
-		goto success;
-
-	for (;;) {
-		blk_ksm_hw_enter(ksm);
-		slot = blk_ksm_find_and_grab_keyslot(ksm, key);
-		if (slot) {
-			blk_ksm_hw_exit(ksm);
-			goto success;
-		}
-
-		/*
-		 * If we're here, that means there wasn't a slot that was
-		 * already programmed with the key. So try to program it.
-		 */
-		if (!list_empty(&ksm->idle_slots))
-			break;
-
-		blk_ksm_hw_exit(ksm);
-		wait_event(ksm->idle_slots_wait_queue,
-			   !list_empty(&ksm->idle_slots));
-	}
-
-	slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot,
-				idle_slot_node);
-	slot_idx = blk_ksm_get_slot_idx(slot);
-
-	err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx);
-	if (err) {
-		wake_up(&ksm->idle_slots_wait_queue);
-		blk_ksm_hw_exit(ksm);
-		return errno_to_blk_status(err);
-	}
-
-	/* Move this slot to the hash list for the new key. */
-	if (slot->key)
-		hlist_del(&slot->hash_node);
-	slot->key = key;
-	hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key));
-
-	atomic_set(&slot->slot_refs, 1);
-
-	blk_ksm_remove_slot_from_lru_list(slot);
-
-	blk_ksm_hw_exit(ksm);
-success:
-	*slot_ptr = slot;
-	return BLK_STS_OK;
-}
-
-/**
- * blk_ksm_put_slot() - Release a reference to a slot
- * @slot: The keyslot to release the reference of.
- *
- * Context: Any context.
- */
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot)
-{
-	struct blk_keyslot_manager *ksm;
-	unsigned long flags;
-
-	if (!slot)
-		return;
-
-	ksm = slot->ksm;
-
-	if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
-					&ksm->idle_slots_lock, flags)) {
-		list_add_tail(&slot->idle_slot_node, &ksm->idle_slots);
-		spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
-		wake_up(&ksm->idle_slots_wait_queue);
-	}
-}
-
-/**
- * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is
- *				    supported by a ksm.
- * @ksm: The keyslot manager to check
- * @cfg: The crypto configuration to check for.
- *
- * Checks for crypto_mode/data unit size/dun bytes support.
- *
- * Return: Whether or not this ksm supports the specified crypto config.
- */
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
-				  const struct blk_crypto_config *cfg)
-{
-	if (!ksm)
-		return false;
-	if (!(ksm->crypto_modes_supported[cfg->crypto_mode] &
-	      cfg->data_unit_size))
-		return false;
-	if (ksm->max_dun_bytes_supported < cfg->dun_bytes)
-		return false;
-	return true;
-}
-
-/**
- * blk_ksm_evict_key() - Evict a key from the lower layer device.
- * @ksm: The keyslot manager to evict from
- * @key: The key to evict
- *
- * Find the keyslot that the specified key was programmed into, and evict that
- * slot from the lower layer device. The slot must not be in use by any
- * in-flight IO when this function is called.
- *
- * Context: Process context. Takes and releases ksm->lock.
- * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
- *	   if the keyslot is still in use, or another -errno value on other
- *	   error.
- */
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
-		      const struct blk_crypto_key *key)
-{
-	struct blk_ksm_keyslot *slot;
-	int err = 0;
-
-	if (blk_ksm_is_passthrough(ksm)) {
-		if (ksm->ksm_ll_ops.keyslot_evict) {
-			blk_ksm_hw_enter(ksm);
-			err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1);
-			blk_ksm_hw_exit(ksm);
-			return err;
-		}
-		return 0;
-	}
-
-	blk_ksm_hw_enter(ksm);
-	slot = blk_ksm_find_keyslot(ksm, key);
-	if (!slot)
-		goto out_unlock;
-
-	if (WARN_ON_ONCE(atomic_read(&slot->slot_refs) != 0)) {
-		err = -EBUSY;
-		goto out_unlock;
-	}
-	err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
-					    blk_ksm_get_slot_idx(slot));
-	if (err)
-		goto out_unlock;
-
-	hlist_del(&slot->hash_node);
-	slot->key = NULL;
-	err = 0;
-out_unlock:
-	blk_ksm_hw_exit(ksm);
-	return err;
-}
-
-/**
- * blk_ksm_reprogram_all_keys() - Re-program all keyslots.
- * @ksm: The keyslot manager
- *
- * Re-program all keyslots that are supposed to have a key programmed.  This is
- * intended only for use by drivers for hardware that loses its keys on reset.
- *
- * Context: Process context. Takes and releases ksm->lock.
- */
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm)
-{
-	unsigned int slot;
-
-	if (blk_ksm_is_passthrough(ksm))
-		return;
-
-	/* This is for device initialization, so don't resume the device */
-	down_write(&ksm->lock);
-	for (slot = 0; slot < ksm->num_slots; slot++) {
-		const struct blk_crypto_key *key = ksm->slots[slot].key;
-		int err;
-
-		if (!key)
-			continue;
-
-		err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot);
-		WARN_ON(err);
-	}
-	up_write(&ksm->lock);
-}
-EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys);
-
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm)
-{
-	if (!ksm)
-		return;
-	kvfree(ksm->slot_hashtable);
-	kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots);
-	memzero_explicit(ksm, sizeof(*ksm));
-}
-EXPORT_SYMBOL_GPL(blk_ksm_destroy);
-
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q)
-{
-	if (blk_integrity_queue_supports_integrity(q)) {
-		pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
-		return false;
-	}
-	q->ksm = ksm;
-	return true;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_register);
-
-void blk_ksm_unregister(struct request_queue *q)
-{
-	q->ksm = NULL;
-}
-
-/**
- * blk_ksm_intersect_modes() - restrict supported modes by child device
- * @parent: The keyslot manager for parent device
- * @child: The keyslot manager for child device, or NULL
- *
- * Clear any crypto mode support bits in @parent that aren't set in @child.
- * If @child is NULL, then all parent bits are cleared.
- *
- * Only use this when setting up the keyslot manager for a layered device,
- * before it's been exposed yet.
- */
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
-			     const struct blk_keyslot_manager *child)
-{
-	if (child) {
-		unsigned int i;
-
-		parent->max_dun_bytes_supported =
-			min(parent->max_dun_bytes_supported,
-			    child->max_dun_bytes_supported);
-		for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported);
-		     i++) {
-			parent->crypto_modes_supported[i] &=
-				child->crypto_modes_supported[i];
-		}
-	} else {
-		parent->max_dun_bytes_supported = 0;
-		memset(parent->crypto_modes_supported, 0,
-		       sizeof(parent->crypto_modes_supported));
-	}
-}
-EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes);
-
-/**
- * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes
- *			   and DUN bytes that another KSM supports. Here,
- *			   "superset" refers to the mathematical meaning of the
- *			   word - i.e. if two KSMs have the *same* capabilities,
- *			   they *are* considered supersets of each other.
- * @ksm_superset: The KSM that we want to verify is a superset
- * @ksm_subset: The KSM that we want to verify is a subset
- *
- * Return: True if @ksm_superset supports a superset of the crypto modes and DUN
- *	   bytes that @ksm_subset supports.
- */
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
-			 struct blk_keyslot_manager *ksm_subset)
-{
-	int i;
-
-	if (!ksm_subset)
-		return true;
-
-	if (!ksm_superset)
-		return false;
-
-	for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) {
-		if (ksm_subset->crypto_modes_supported[i] &
-		    (~ksm_superset->crypto_modes_supported[i])) {
-			return false;
-		}
-	}
-
-	if (ksm_subset->max_dun_bytes_supported >
-	    ksm_superset->max_dun_bytes_supported) {
-		return false;
-	}
-
-	return true;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_is_superset);
-
-/**
- * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of
- *				   another KSM
- * @target_ksm: The KSM whose restrictions to update.
- * @reference_ksm: The KSM to whose restrictions this function will update
- *		   @target_ksm's restrictions to.
- *
- * Blk-crypto requires that crypto capabilities that were
- * advertised when a bio was created continue to be supported by the
- * device until that bio is ended. This is turn means that a device cannot
- * shrink its advertised crypto capabilities without any explicit
- * synchronization with upper layers. So if there's no such explicit
- * synchronization, @reference_ksm must support all the crypto capabilities that
- * @target_ksm does
- * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true).
- *
- * Note also that as long as the crypto capabilities are being expanded, the
- * order of updates becoming visible is not important because it's alright
- * for blk-crypto to see stale values - they only cause blk-crypto to
- * believe that a crypto capability isn't supported when it actually is (which
- * might result in blk-crypto-fallback being used if available, or the bio being
- * failed).
- */
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
-				 struct blk_keyslot_manager *reference_ksm)
-{
-	memcpy(target_ksm->crypto_modes_supported,
-	       reference_ksm->crypto_modes_supported,
-	       sizeof(target_ksm->crypto_modes_supported));
-
-	target_ksm->max_dun_bytes_supported =
-				reference_ksm->max_dun_bytes_supported;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities);
-
-/**
- * blk_ksm_init_passthrough() - Init a passthrough keyslot manager
- * @ksm: The keyslot manager to init
- *
- * Initialize a passthrough keyslot manager.
- * Called by e.g. storage drivers to set up a keyslot manager in their
- * request_queue, when the storage driver wants to manage its keys by itself.
- * This is useful for inline encryption hardware that doesn't have the concept
- * of keyslots, and for layered devices.
- */
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm)
-{
-	memset(ksm, 0, sizeof(*ksm));
-	init_rwsem(&ksm->lock);
-}
-EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 55dccdfbcb22..841ed87999e7 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -13,7 +13,7 @@
 #include <linux/ktime.h>
 #include <linux/genhd.h>
 #include <linux/blk-mq.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 
 #include <trace/events/block.h>
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7870e6460633..4184fd8ccb08 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -29,7 +29,7 @@
 #include <linux/refcount.h>
 #include <linux/part_stat.h>
 #include <linux/blk-crypto.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 
 #define DM_MSG_PREFIX "core"
 
diff --git a/drivers/mmc/host/cqhci-crypto.c b/drivers/mmc/host/cqhci-crypto.c
index 6419cfbb4ab7..628bbfaf8312 100644
--- a/drivers/mmc/host/cqhci-crypto.c
+++ b/drivers/mmc/host/cqhci-crypto.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/blk-crypto.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 #include <linux/mmc/host.h>
 
 #include "cqhci-crypto.h"
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 41f6e06f9185..885fcf2e5922 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -32,7 +32,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/bitfield.h>
 #include <linux/devfreq.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 #include "unipro.h"
 
 #include <asm/irq.h>
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
new file mode 100644
index 000000000000..a27605e2f826
--- /dev/null
+++ b/include/linux/blk-crypto-profile.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2019 Google LLC
+ */
+
+#ifndef __LINUX_KEYSLOT_MANAGER_H
+#define __LINUX_KEYSLOT_MANAGER_H
+
+#include <linux/bio.h>
+#include <linux/blk-crypto.h>
+
+struct blk_keyslot_manager;
+
+/**
+ * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
+ * @keyslot_program:	Program the specified key into the specified slot in the
+ *			inline encryption hardware.
+ * @keyslot_evict:	Evict key from the specified keyslot in the hardware.
+ *			The key is provided so that e.g. dm layers can evict
+ *			keys from the devices that they map over.
+ *			Returns 0 on success, -errno otherwise.
+ *
+ * This structure should be provided by storage device drivers when they set up
+ * a keyslot manager - this structure holds the function ptrs that the keyslot
+ * manager will use to manipulate keyslots in the hardware.
+ */
+struct blk_ksm_ll_ops {
+	int (*keyslot_program)(struct blk_keyslot_manager *ksm,
+			       const struct blk_crypto_key *key,
+			       unsigned int slot);
+	int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
+			     const struct blk_crypto_key *key,
+			     unsigned int slot);
+};
+
+struct blk_keyslot_manager {
+	/*
+	 * The struct blk_ksm_ll_ops that this keyslot manager will use
+	 * to perform operations like programming and evicting keys on the
+	 * device
+	 */
+	struct blk_ksm_ll_ops ksm_ll_ops;
+
+	/*
+	 * The maximum number of bytes supported for specifying the data unit
+	 * number.
+	 */
+	unsigned int max_dun_bytes_supported;
+
+	/*
+	 * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
+	 * whether a crypto mode and data unit size are supported. The i'th
+	 * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
+	 * size of (1 << i) is supported. We only support data unit sizes
+	 * that are powers of 2.
+	 */
+	unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
+
+	/* Device for runtime power management (NULL if none) */
+	struct device *dev;
+
+	/* Here onwards are *private* fields for internal keyslot manager use */
+
+	unsigned int num_slots;
+
+	/* Protects programming and evicting keys from the device */
+	struct rw_semaphore lock;
+
+	/* List of idle slots, with least recently used slot at front */
+	wait_queue_head_t idle_slots_wait_queue;
+	struct list_head idle_slots;
+	spinlock_t idle_slots_lock;
+
+	/*
+	 * Hash table which maps struct *blk_crypto_key to keyslots, so that we
+	 * can find a key's keyslot in O(1) time rather than O(num_slots).
+	 * Protected by 'lock'.
+	 */
+	struct hlist_head *slot_hashtable;
+	unsigned int log_slot_ht_size;
+
+	/* Per-keyslot data */
+	struct blk_ksm_keyslot *slots;
+};
+
+int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
+
+int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
+		      unsigned int num_slots);
+
+blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
+				      const struct blk_crypto_key *key,
+				      struct blk_ksm_keyslot **slot_ptr);
+
+unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
+
+void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
+
+bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
+				  const struct blk_crypto_config *cfg);
+
+int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
+		      const struct blk_crypto_key *key);
+
+void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
+
+void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
+
+void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
+			     const struct blk_keyslot_manager *child);
+
+void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm);
+
+bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
+			 struct blk_keyslot_manager *ksm_subset);
+
+void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
+				 struct blk_keyslot_manager *reference_ksm);
+
+#endif /* __LINUX_KEYSLOT_MANAGER_H */
diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h
deleted file mode 100644
index a27605e2f826..000000000000
--- a/include/linux/keyslot-manager.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright 2019 Google LLC
- */
-
-#ifndef __LINUX_KEYSLOT_MANAGER_H
-#define __LINUX_KEYSLOT_MANAGER_H
-
-#include <linux/bio.h>
-#include <linux/blk-crypto.h>
-
-struct blk_keyslot_manager;
-
-/**
- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
- * @keyslot_program:	Program the specified key into the specified slot in the
- *			inline encryption hardware.
- * @keyslot_evict:	Evict key from the specified keyslot in the hardware.
- *			The key is provided so that e.g. dm layers can evict
- *			keys from the devices that they map over.
- *			Returns 0 on success, -errno otherwise.
- *
- * This structure should be provided by storage device drivers when they set up
- * a keyslot manager - this structure holds the function ptrs that the keyslot
- * manager will use to manipulate keyslots in the hardware.
- */
-struct blk_ksm_ll_ops {
-	int (*keyslot_program)(struct blk_keyslot_manager *ksm,
-			       const struct blk_crypto_key *key,
-			       unsigned int slot);
-	int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
-			     const struct blk_crypto_key *key,
-			     unsigned int slot);
-};
-
-struct blk_keyslot_manager {
-	/*
-	 * The struct blk_ksm_ll_ops that this keyslot manager will use
-	 * to perform operations like programming and evicting keys on the
-	 * device
-	 */
-	struct blk_ksm_ll_ops ksm_ll_ops;
-
-	/*
-	 * The maximum number of bytes supported for specifying the data unit
-	 * number.
-	 */
-	unsigned int max_dun_bytes_supported;
-
-	/*
-	 * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
-	 * whether a crypto mode and data unit size are supported. The i'th
-	 * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
-	 * size of (1 << i) is supported. We only support data unit sizes
-	 * that are powers of 2.
-	 */
-	unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
-
-	/* Device for runtime power management (NULL if none) */
-	struct device *dev;
-
-	/* Here onwards are *private* fields for internal keyslot manager use */
-
-	unsigned int num_slots;
-
-	/* Protects programming and evicting keys from the device */
-	struct rw_semaphore lock;
-
-	/* List of idle slots, with least recently used slot at front */
-	wait_queue_head_t idle_slots_wait_queue;
-	struct list_head idle_slots;
-	spinlock_t idle_slots_lock;
-
-	/*
-	 * Hash table which maps struct *blk_crypto_key to keyslots, so that we
-	 * can find a key's keyslot in O(1) time rather than O(num_slots).
-	 * Protected by 'lock'.
-	 */
-	struct hlist_head *slot_hashtable;
-	unsigned int log_slot_ht_size;
-
-	/* Per-keyslot data */
-	struct blk_ksm_keyslot *slots;
-};
-
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
-
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
-		      unsigned int num_slots);
-
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
-				      const struct blk_crypto_key *key,
-				      struct blk_ksm_keyslot **slot_ptr);
-
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
-
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
-
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
-				  const struct blk_crypto_config *cfg);
-
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
-		      const struct blk_crypto_key *key);
-
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
-
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
-			     const struct blk_keyslot_manager *child);
-
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm);
-
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
-			 struct blk_keyslot_manager *ksm_subset);
-
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
-				 struct blk_keyslot_manager *reference_ksm);
-
-#endif /* __LINUX_KEYSLOT_MANAGER_H */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 0c0c9a0fdf57..725b1de41767 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,7 +15,7 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/pm.h>
 #include <linux/dma-direction.h>
-#include <linux/keyslot-manager.h>
+#include <linux/blk-crypto-profile.h>
 
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
-- 
cgit v1.2.3


From cb77cb5abe1f4fae4a33b735606aae22f9eaa1c7 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Mon, 18 Oct 2021 11:04:52 -0700
Subject: blk-crypto: rename blk_keyslot_manager to blk_crypto_profile

blk_keyslot_manager is misnamed because it doesn't necessarily manage
keyslots.  It actually does several different things:

  - Contains the crypto capabilities of the device.

  - Provides functions to control the inline encryption hardware.
    Originally these were just for programming/evicting keyslots;
    however, new functionality (hardware-wrapped keys) will require new
    functions here which are unrelated to keyslots.  Moreover,
    device-mapper devices already (ab)use "keyslot_evict" to pass key
    eviction requests to their underlying devices even though
    device-mapper devices don't have any keyslots themselves (so it
    really should be "evict_key", not "keyslot_evict").

  - Sometimes (but not always!) it manages keyslots.  Originally it
    always did, but device-mapper devices don't have keyslots
    themselves, so they use a "passthrough keyslot manager" which
    doesn't actually manage keyslots.  This hack works, but the
    terminology is unnatural.  Also, some hardware doesn't have keyslots
    and thus also uses a "passthrough keyslot manager" (support for such
    hardware is yet to be upstreamed, but it will happen eventually).

Let's stop having keyslot managers which don't actually manage keyslots.
Instead, rename blk_keyslot_manager to blk_crypto_profile.

This is a fairly big change, since for consistency it also has to update
keyslot manager-related function names, variable names, and comments --
not just the actual struct name.  However it's still a fairly
straightforward change, as it doesn't change any actual functionality.

Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # For MMC
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20211018180453.40441-4-ebiggers@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-crypto-fallback.c        |  71 +++--
 block/blk-crypto-profile.c         | 520 ++++++++++++++++++-------------------
 block/blk-crypto.c                 |  27 +-
 block/blk-integrity.c              |   4 +-
 drivers/md/dm-core.h               |   2 +-
 drivers/md/dm-table.c              | 168 ++++++------
 drivers/md/dm.c                    |   8 +-
 drivers/mmc/core/crypto.c          |  11 +-
 drivers/mmc/host/cqhci-crypto.c    |  31 +--
 drivers/scsi/ufs/ufshcd-crypto.c   |  32 +--
 drivers/scsi/ufs/ufshcd-crypto.h   |   9 +-
 drivers/scsi/ufs/ufshcd.c          |   2 +-
 drivers/scsi/ufs/ufshcd.h          |   4 +-
 include/linux/blk-crypto-profile.h | 164 +++++++-----
 include/linux/blk-mq.h             |   2 +-
 include/linux/blkdev.h             |  16 +-
 include/linux/device-mapper.h      |   4 +-
 include/linux/mmc/host.h           |   2 +-
 18 files changed, 555 insertions(+), 522 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index 08bfea292c75..c87aba8584c6 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot {
 	struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
 } *blk_crypto_keyslots;
 
-static struct blk_keyslot_manager blk_crypto_ksm;
+static struct blk_crypto_profile blk_crypto_fallback_profile;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
 static struct bio_set crypto_bio_split;
@@ -104,9 +104,10 @@ static void blk_crypto_fallback_evict_keyslot(unsigned int slot)
 	slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID;
 }
 
-static int blk_crypto_fallback_keyslot_program(struct blk_keyslot_manager *ksm,
-					       const struct blk_crypto_key *key,
-					       unsigned int slot)
+static int
+blk_crypto_fallback_keyslot_program(struct blk_crypto_profile *profile,
+				    const struct blk_crypto_key *key,
+				    unsigned int slot)
 {
 	struct blk_crypto_fallback_keyslot *slotp = &blk_crypto_keyslots[slot];
 	const enum blk_crypto_mode_num crypto_mode =
@@ -127,7 +128,7 @@ static int blk_crypto_fallback_keyslot_program(struct blk_keyslot_manager *ksm,
 	return 0;
 }
 
-static int blk_crypto_fallback_keyslot_evict(struct blk_keyslot_manager *ksm,
+static int blk_crypto_fallback_keyslot_evict(struct blk_crypto_profile *profile,
 					     const struct blk_crypto_key *key,
 					     unsigned int slot)
 {
@@ -135,14 +136,9 @@ static int blk_crypto_fallback_keyslot_evict(struct blk_keyslot_manager *ksm,
 	return 0;
 }
 
-/*
- * The crypto API fallback KSM ops - only used for a bio when it specifies a
- * blk_crypto_key that was not supported by the device's inline encryption
- * hardware.
- */
-static const struct blk_ksm_ll_ops blk_crypto_ksm_ll_ops = {
-	.keyslot_program	= blk_crypto_fallback_keyslot_program,
-	.keyslot_evict		= blk_crypto_fallback_keyslot_evict,
+static const struct blk_crypto_ll_ops blk_crypto_fallback_ll_ops = {
+	.keyslot_program        = blk_crypto_fallback_keyslot_program,
+	.keyslot_evict          = blk_crypto_fallback_keyslot_evict,
 };
 
 static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio)
@@ -188,13 +184,13 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
 }
 
 static bool
-blk_crypto_fallback_alloc_cipher_req(struct blk_ksm_keyslot *slot,
+blk_crypto_fallback_alloc_cipher_req(struct blk_crypto_keyslot *slot,
 				     struct skcipher_request **ciph_req_ret,
 				     struct crypto_wait *wait)
 {
 	struct skcipher_request *ciph_req;
 	const struct blk_crypto_fallback_keyslot *slotp;
-	int keyslot_idx = blk_ksm_get_slot_idx(slot);
+	int keyslot_idx = blk_crypto_keyslot_index(slot);
 
 	slotp = &blk_crypto_keyslots[keyslot_idx];
 	ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode],
@@ -266,7 +262,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
 {
 	struct bio *src_bio, *enc_bio;
 	struct bio_crypt_ctx *bc;
-	struct blk_ksm_keyslot *slot;
+	struct blk_crypto_keyslot *slot;
 	int data_unit_size;
 	struct skcipher_request *ciph_req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
@@ -293,10 +289,11 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
 	}
 
 	/*
-	 * Use the crypto API fallback keyslot manager to get a crypto_skcipher
-	 * for the algorithm and key specified for this bio.
+	 * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
+	 * this bio's algorithm and key.
 	 */
-	blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+	blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+					bc->bc_key, &slot);
 	if (blk_st != BLK_STS_OK) {
 		src_bio->bi_status = blk_st;
 		goto out_put_enc_bio;
@@ -364,7 +361,7 @@ out_free_bounce_pages:
 out_free_ciph_req:
 	skcipher_request_free(ciph_req);
 out_release_keyslot:
-	blk_ksm_put_slot(slot);
+	blk_crypto_put_keyslot(slot);
 out_put_enc_bio:
 	if (enc_bio)
 		bio_put(enc_bio);
@@ -382,7 +379,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
 		container_of(work, struct bio_fallback_crypt_ctx, work);
 	struct bio *bio = f_ctx->bio;
 	struct bio_crypt_ctx *bc = &f_ctx->crypt_ctx;
-	struct blk_ksm_keyslot *slot;
+	struct blk_crypto_keyslot *slot;
 	struct skcipher_request *ciph_req = NULL;
 	DECLARE_CRYPTO_WAIT(wait);
 	u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE];
@@ -395,10 +392,11 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
 	blk_status_t blk_st;
 
 	/*
-	 * Use the crypto API fallback keyslot manager to get a crypto_skcipher
-	 * for the algorithm and key specified for this bio.
+	 * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
+	 * this bio's algorithm and key.
 	 */
-	blk_st = blk_ksm_get_slot_for_key(&blk_crypto_ksm, bc->bc_key, &slot);
+	blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+					bc->bc_key, &slot);
 	if (blk_st != BLK_STS_OK) {
 		bio->bi_status = blk_st;
 		goto out_no_keyslot;
@@ -436,7 +434,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
 
 out:
 	skcipher_request_free(ciph_req);
-	blk_ksm_put_slot(slot);
+	blk_crypto_put_keyslot(slot);
 out_no_keyslot:
 	mempool_free(f_ctx, bio_fallback_crypt_ctx_pool);
 	bio_endio(bio);
@@ -501,8 +499,8 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
 		return false;
 	}
 
-	if (!blk_ksm_crypto_cfg_supported(&blk_crypto_ksm,
-					  &bc->bc_key->crypto_cfg)) {
+	if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+					&bc->bc_key->crypto_cfg)) {
 		bio->bi_status = BLK_STS_NOTSUPP;
 		return false;
 	}
@@ -528,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
 
 int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
 {
-	return blk_ksm_evict_key(&blk_crypto_ksm, key);
+	return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
 }
 
 static bool blk_crypto_fallback_inited;
@@ -536,6 +534,7 @@ static int blk_crypto_fallback_init(void)
 {
 	int i;
 	int err;
+	struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
 
 	if (blk_crypto_fallback_inited)
 		return 0;
@@ -546,24 +545,24 @@ static int blk_crypto_fallback_init(void)
 	if (err)
 		goto out;
 
-	err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+	err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
 	if (err)
 		goto fail_free_bioset;
 	err = -ENOMEM;
 
-	blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
-	blk_crypto_ksm.max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+	profile->ll_ops = blk_crypto_fallback_ll_ops;
+	profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
 
 	/* All blk-crypto modes have a crypto API fallback. */
 	for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
-		blk_crypto_ksm.crypto_modes_supported[i] = 0xFFFFFFFF;
-	blk_crypto_ksm.crypto_modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+		profile->modes_supported[i] = 0xFFFFFFFF;
+	profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
 
 	blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
 					WQ_UNBOUND | WQ_HIGHPRI |
 					WQ_MEM_RECLAIM, num_online_cpus());
 	if (!blk_crypto_wq)
-		goto fail_free_ksm;
+		goto fail_destroy_profile;
 
 	blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots,
 				      sizeof(blk_crypto_keyslots[0]),
@@ -597,8 +596,8 @@ fail_free_keyslots:
 	kfree(blk_crypto_keyslots);
 fail_free_wq:
 	destroy_workqueue(blk_crypto_wq);
-fail_free_ksm:
-	blk_ksm_destroy(&blk_crypto_ksm);
+fail_destroy_profile:
+	blk_crypto_profile_destroy(profile);
 fail_free_bioset:
 	bioset_exit(&crypto_bio_split);
 out:
diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c
index 1a235fa3c3e8..605ba0626a5c 100644
--- a/block/blk-crypto-profile.c
+++ b/block/blk-crypto-profile.c
@@ -4,26 +4,22 @@
  */
 
 /**
- * DOC: The Keyslot Manager
+ * DOC: blk-crypto profiles
  *
- * Many devices with inline encryption support have a limited number of "slots"
- * into which encryption contexts may be programmed, and requests can be tagged
- * with a slot number to specify the key to use for en/decryption.
+ * 'struct blk_crypto_profile' contains all generic inline encryption-related
+ * state for a particular inline encryption device.  blk_crypto_profile serves
+ * as the way that drivers for inline encryption hardware expose their crypto
+ * capabilities and certain functions (e.g., functions to program and evict
+ * keys) to upper layers.  Device drivers that want to support inline encryption
+ * construct a crypto profile, then associate it with the disk's request_queue.
  *
- * As the number of slots is limited, and programming keys is expensive on
- * many inline encryption hardware, we don't want to program the same key into
- * multiple slots - if multiple requests are using the same key, we want to
- * program just one slot with that key and use that slot for all requests.
+ * If the device has keyslots, then its blk_crypto_profile also handles managing
+ * these keyslots in a device-independent way, using the driver-provided
+ * functions to program and evict keys as needed.  This includes keeping track
+ * of which key and how many I/O requests are using each keyslot, getting
+ * keyslots for I/O requests, and handling key eviction requests.
  *
- * The keyslot manager manages these keyslots appropriately, and also acts as
- * an abstraction between the inline encryption hardware and the upper layers.
- *
- * Lower layer devices will set up a keyslot manager in their request queue
- * and tell it how to perform device specific operations like programming/
- * evicting keys from keyslots.
- *
- * Upper layers will call blk_ksm_get_slot_for_key() to program a
- * key into some slot in the inline encryption hardware.
+ * For more information, see Documentation/block/inline-encryption.rst.
  */
 
 #define pr_fmt(fmt) "blk-crypto: " fmt
@@ -37,77 +33,75 @@
 #include <linux/blkdev.h>
 #include <linux/blk-integrity.h>
 
-struct blk_ksm_keyslot {
+struct blk_crypto_keyslot {
 	atomic_t slot_refs;
 	struct list_head idle_slot_node;
 	struct hlist_node hash_node;
 	const struct blk_crypto_key *key;
-	struct blk_keyslot_manager *ksm;
+	struct blk_crypto_profile *profile;
 };
 
-static inline void blk_ksm_hw_enter(struct blk_keyslot_manager *ksm)
+static inline void blk_crypto_hw_enter(struct blk_crypto_profile *profile)
 {
 	/*
-	 * Calling into the driver requires ksm->lock held and the device
+	 * Calling into the driver requires profile->lock held and the device
 	 * resumed.  But we must resume the device first, since that can acquire
-	 * and release ksm->lock via blk_ksm_reprogram_all_keys().
+	 * and release profile->lock via blk_crypto_reprogram_all_keys().
 	 */
-	if (ksm->dev)
-		pm_runtime_get_sync(ksm->dev);
-	down_write(&ksm->lock);
+	if (profile->dev)
+		pm_runtime_get_sync(profile->dev);
+	down_write(&profile->lock);
 }
 
-static inline void blk_ksm_hw_exit(struct blk_keyslot_manager *ksm)
+static inline void blk_crypto_hw_exit(struct blk_crypto_profile *profile)
 {
-	up_write(&ksm->lock);
-	if (ksm->dev)
-		pm_runtime_put_sync(ksm->dev);
-}
-
-static inline bool blk_ksm_is_passthrough(struct blk_keyslot_manager *ksm)
-{
-	return ksm->num_slots == 0;
+	up_write(&profile->lock);
+	if (profile->dev)
+		pm_runtime_put_sync(profile->dev);
 }
 
 /**
- * blk_ksm_init() - Initialize a keyslot manager
- * @ksm: The keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
+ * blk_crypto_profile_init() - Initialize a blk_crypto_profile
+ * @profile: the blk_crypto_profile to initialize
+ * @num_slots: the number of keyslots
  *
- * Allocate memory for keyslots and initialize a keyslot manager. Called by
- * e.g. storage drivers to set up a keyslot manager in their request_queue.
+ * Storage drivers must call this when starting to set up a blk_crypto_profile,
+ * before filling in additional fields.
  *
  * Return: 0 on success, or else a negative error code.
  */
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
+int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+			    unsigned int num_slots)
 {
 	unsigned int slot;
 	unsigned int i;
 	unsigned int slot_hashtable_size;
 
-	memset(ksm, 0, sizeof(*ksm));
+	memset(profile, 0, sizeof(*profile));
+	init_rwsem(&profile->lock);
 
 	if (num_slots == 0)
-		return -EINVAL;
+		return 0;
 
-	ksm->slots = kvcalloc(num_slots, sizeof(ksm->slots[0]), GFP_KERNEL);
-	if (!ksm->slots)
-		return -ENOMEM;
+	/* Initialize keyslot management data. */
 
-	ksm->num_slots = num_slots;
+	profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
+				  GFP_KERNEL);
+	if (!profile->slots)
+		return -ENOMEM;
 
-	init_rwsem(&ksm->lock);
+	profile->num_slots = num_slots;
 
-	init_waitqueue_head(&ksm->idle_slots_wait_queue);
-	INIT_LIST_HEAD(&ksm->idle_slots);
+	init_waitqueue_head(&profile->idle_slots_wait_queue);
+	INIT_LIST_HEAD(&profile->idle_slots);
 
 	for (slot = 0; slot < num_slots; slot++) {
-		ksm->slots[slot].ksm = ksm;
-		list_add_tail(&ksm->slots[slot].idle_slot_node,
-			      &ksm->idle_slots);
+		profile->slots[slot].profile = profile;
+		list_add_tail(&profile->slots[slot].idle_slot_node,
+			      &profile->idle_slots);
 	}
 
-	spin_lock_init(&ksm->idle_slots_lock);
+	spin_lock_init(&profile->idle_slots_lock);
 
 	slot_hashtable_size = roundup_pow_of_two(num_slots);
 	/*
@@ -117,74 +111,80 @@ int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots)
 	if (slot_hashtable_size < 2)
 		slot_hashtable_size = 2;
 
-	ksm->log_slot_ht_size = ilog2(slot_hashtable_size);
-	ksm->slot_hashtable = kvmalloc_array(slot_hashtable_size,
-					     sizeof(ksm->slot_hashtable[0]),
-					     GFP_KERNEL);
-	if (!ksm->slot_hashtable)
-		goto err_destroy_ksm;
+	profile->log_slot_ht_size = ilog2(slot_hashtable_size);
+	profile->slot_hashtable =
+		kvmalloc_array(slot_hashtable_size,
+			       sizeof(profile->slot_hashtable[0]), GFP_KERNEL);
+	if (!profile->slot_hashtable)
+		goto err_destroy;
 	for (i = 0; i < slot_hashtable_size; i++)
-		INIT_HLIST_HEAD(&ksm->slot_hashtable[i]);
+		INIT_HLIST_HEAD(&profile->slot_hashtable[i]);
 
 	return 0;
 
-err_destroy_ksm:
-	blk_ksm_destroy(ksm);
+err_destroy:
+	blk_crypto_profile_destroy(profile);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(blk_ksm_init);
+EXPORT_SYMBOL_GPL(blk_crypto_profile_init);
 
-static void blk_ksm_destroy_callback(void *ksm)
+static void blk_crypto_profile_destroy_callback(void *profile)
 {
-	blk_ksm_destroy(ksm);
+	blk_crypto_profile_destroy(profile);
 }
 
 /**
- * devm_blk_ksm_init() - Resource-managed blk_ksm_init()
- * @dev: The device which owns the blk_keyslot_manager.
- * @ksm: The blk_keyslot_manager to initialize.
- * @num_slots: The number of key slots to manage.
+ * devm_blk_crypto_profile_init() - Resource-managed blk_crypto_profile_init()
+ * @dev: the device which owns the blk_crypto_profile
+ * @profile: the blk_crypto_profile to initialize
+ * @num_slots: the number of keyslots
  *
- * Like blk_ksm_init(), but causes blk_ksm_destroy() to be called automatically
- * on driver detach.
+ * Like blk_crypto_profile_init(), but causes blk_crypto_profile_destroy() to be
+ * called automatically on driver detach.
  *
  * Return: 0 on success, or else a negative error code.
  */
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
-		      unsigned int num_slots)
+int devm_blk_crypto_profile_init(struct device *dev,
+				 struct blk_crypto_profile *profile,
+				 unsigned int num_slots)
 {
-	int err = blk_ksm_init(ksm, num_slots);
+	int err = blk_crypto_profile_init(profile, num_slots);
 
 	if (err)
 		return err;
 
-	return devm_add_action_or_reset(dev, blk_ksm_destroy_callback, ksm);
+	return devm_add_action_or_reset(dev,
+					blk_crypto_profile_destroy_callback,
+					profile);
 }
-EXPORT_SYMBOL_GPL(devm_blk_ksm_init);
+EXPORT_SYMBOL_GPL(devm_blk_crypto_profile_init);
 
 static inline struct hlist_head *
-blk_ksm_hash_bucket_for_key(struct blk_keyslot_manager *ksm,
-			    const struct blk_crypto_key *key)
+blk_crypto_hash_bucket_for_key(struct blk_crypto_profile *profile,
+			       const struct blk_crypto_key *key)
 {
-	return &ksm->slot_hashtable[hash_ptr(key, ksm->log_slot_ht_size)];
+	return &profile->slot_hashtable[
+			hash_ptr(key, profile->log_slot_ht_size)];
 }
 
-static void blk_ksm_remove_slot_from_lru_list(struct blk_ksm_keyslot *slot)
+static void
+blk_crypto_remove_slot_from_lru_list(struct blk_crypto_keyslot *slot)
 {
-	struct blk_keyslot_manager *ksm = slot->ksm;
+	struct blk_crypto_profile *profile = slot->profile;
 	unsigned long flags;
 
-	spin_lock_irqsave(&ksm->idle_slots_lock, flags);
+	spin_lock_irqsave(&profile->idle_slots_lock, flags);
 	list_del(&slot->idle_slot_node);
-	spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
+	spin_unlock_irqrestore(&profile->idle_slots_lock, flags);
 }
 
-static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
-					struct blk_keyslot_manager *ksm,
-					const struct blk_crypto_key *key)
+static struct blk_crypto_keyslot *
+blk_crypto_find_keyslot(struct blk_crypto_profile *profile,
+			const struct blk_crypto_key *key)
 {
-	const struct hlist_head *head = blk_ksm_hash_bucket_for_key(ksm, key);
-	struct blk_ksm_keyslot *slotp;
+	const struct hlist_head *head =
+		blk_crypto_hash_bucket_for_key(profile, key);
+	struct blk_crypto_keyslot *slotp;
 
 	hlist_for_each_entry(slotp, head, hash_node) {
 		if (slotp->key == key)
@@ -193,68 +193,79 @@ static struct blk_ksm_keyslot *blk_ksm_find_keyslot(
 	return NULL;
 }
 
-static struct blk_ksm_keyslot *blk_ksm_find_and_grab_keyslot(
-					struct blk_keyslot_manager *ksm,
-					const struct blk_crypto_key *key)
+static struct blk_crypto_keyslot *
+blk_crypto_find_and_grab_keyslot(struct blk_crypto_profile *profile,
+				 const struct blk_crypto_key *key)
 {
-	struct blk_ksm_keyslot *slot;
+	struct blk_crypto_keyslot *slot;
 
-	slot = blk_ksm_find_keyslot(ksm, key);
+	slot = blk_crypto_find_keyslot(profile, key);
 	if (!slot)
 		return NULL;
 	if (atomic_inc_return(&slot->slot_refs) == 1) {
 		/* Took first reference to this slot; remove it from LRU list */
-		blk_ksm_remove_slot_from_lru_list(slot);
+		blk_crypto_remove_slot_from_lru_list(slot);
 	}
 	return slot;
 }
 
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot)
+/**
+ * blk_crypto_keyslot_index() - Get the index of a keyslot
+ * @slot: a keyslot that blk_crypto_get_keyslot() returned
+ *
+ * Return: the 0-based index of the keyslot within the device's keyslots.
+ */
+unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot)
 {
-	return slot - slot->ksm->slots;
+	return slot - slot->profile->slots;
 }
-EXPORT_SYMBOL_GPL(blk_ksm_get_slot_idx);
+EXPORT_SYMBOL_GPL(blk_crypto_keyslot_index);
 
 /**
- * blk_ksm_get_slot_for_key() - Program a key into a keyslot.
- * @ksm: The keyslot manager to program the key into.
- * @key: Pointer to the key object to program, including the raw key, crypto
- *	 mode, and data unit size.
- * @slot_ptr: A pointer to return the pointer of the allocated keyslot.
+ * blk_crypto_get_keyslot() - Get a keyslot for a key, if needed.
+ * @profile: the crypto profile of the device the key will be used on
+ * @key: the key that will be used
+ * @slot_ptr: If a keyslot is allocated, an opaque pointer to the keyslot struct
+ *	      will be stored here; otherwise NULL will be stored here.
+ *
+ * If the device has keyslots, this gets a keyslot that's been programmed with
+ * the specified key.  If the key is already in a slot, this reuses it;
+ * otherwise this waits for a slot to become idle and programs the key into it.
  *
- * Get a keyslot that's been programmed with the specified key.  If one already
- * exists, return it with incremented refcount.  Otherwise, wait for a keyslot
- * to become idle and program it.
+ * This must be paired with a call to blk_crypto_put_keyslot().
  *
- * Context: Process context. Takes and releases ksm->lock.
- * Return: BLK_STS_OK on success (and keyslot is set to the pointer of the
- *	   allocated keyslot), or some other blk_status_t otherwise (and
- *	   keyslot is set to NULL).
+ * Context: Process context. Takes and releases profile->lock.
+ * Return: BLK_STS_OK on success, meaning that either a keyslot was allocated or
+ *	   one wasn't needed; or a blk_status_t error on failure.
  */
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
-				      const struct blk_crypto_key *key,
-				      struct blk_ksm_keyslot **slot_ptr)
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+				    const struct blk_crypto_key *key,
+				    struct blk_crypto_keyslot **slot_ptr)
 {
-	struct blk_ksm_keyslot *slot;
+	struct blk_crypto_keyslot *slot;
 	int slot_idx;
 	int err;
 
 	*slot_ptr = NULL;
 
-	if (blk_ksm_is_passthrough(ksm))
+	/*
+	 * If the device has no concept of "keyslots", then there is no need to
+	 * get one.
+	 */
+	if (profile->num_slots == 0)
 		return BLK_STS_OK;
 
-	down_read(&ksm->lock);
-	slot = blk_ksm_find_and_grab_keyslot(ksm, key);
-	up_read(&ksm->lock);
+	down_read(&profile->lock);
+	slot = blk_crypto_find_and_grab_keyslot(profile, key);
+	up_read(&profile->lock);
 	if (slot)
 		goto success;
 
 	for (;;) {
-		blk_ksm_hw_enter(ksm);
-		slot = blk_ksm_find_and_grab_keyslot(ksm, key);
+		blk_crypto_hw_enter(profile);
+		slot = blk_crypto_find_and_grab_keyslot(profile, key);
 		if (slot) {
-			blk_ksm_hw_exit(ksm);
+			blk_crypto_hw_exit(profile);
 			goto success;
 		}
 
@@ -262,22 +273,22 @@ blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
 		 * If we're here, that means there wasn't a slot that was
 		 * already programmed with the key. So try to program it.
 		 */
-		if (!list_empty(&ksm->idle_slots))
+		if (!list_empty(&profile->idle_slots))
 			break;
 
-		blk_ksm_hw_exit(ksm);
-		wait_event(ksm->idle_slots_wait_queue,
-			   !list_empty(&ksm->idle_slots));
+		blk_crypto_hw_exit(profile);
+		wait_event(profile->idle_slots_wait_queue,
+			   !list_empty(&profile->idle_slots));
 	}
 
-	slot = list_first_entry(&ksm->idle_slots, struct blk_ksm_keyslot,
+	slot = list_first_entry(&profile->idle_slots, struct blk_crypto_keyslot,
 				idle_slot_node);
-	slot_idx = blk_ksm_get_slot_idx(slot);
+	slot_idx = blk_crypto_keyslot_index(slot);
 
-	err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot_idx);
+	err = profile->ll_ops.keyslot_program(profile, key, slot_idx);
 	if (err) {
-		wake_up(&ksm->idle_slots_wait_queue);
-		blk_ksm_hw_exit(ksm);
+		wake_up(&profile->idle_slots_wait_queue);
+		blk_crypto_hw_exit(profile);
 		return errno_to_blk_status(err);
 	}
 
@@ -285,97 +296,98 @@ blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
 	if (slot->key)
 		hlist_del(&slot->hash_node);
 	slot->key = key;
-	hlist_add_head(&slot->hash_node, blk_ksm_hash_bucket_for_key(ksm, key));
+	hlist_add_head(&slot->hash_node,
+		       blk_crypto_hash_bucket_for_key(profile, key));
 
 	atomic_set(&slot->slot_refs, 1);
 
-	blk_ksm_remove_slot_from_lru_list(slot);
+	blk_crypto_remove_slot_from_lru_list(slot);
 
-	blk_ksm_hw_exit(ksm);
+	blk_crypto_hw_exit(profile);
 success:
 	*slot_ptr = slot;
 	return BLK_STS_OK;
 }
 
 /**
- * blk_ksm_put_slot() - Release a reference to a slot
- * @slot: The keyslot to release the reference of.
+ * blk_crypto_put_keyslot() - Release a reference to a keyslot
+ * @slot: The keyslot to release the reference of (may be NULL).
  *
  * Context: Any context.
  */
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot)
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot)
 {
-	struct blk_keyslot_manager *ksm;
+	struct blk_crypto_profile *profile;
 	unsigned long flags;
 
 	if (!slot)
 		return;
 
-	ksm = slot->ksm;
+	profile = slot->profile;
 
 	if (atomic_dec_and_lock_irqsave(&slot->slot_refs,
-					&ksm->idle_slots_lock, flags)) {
-		list_add_tail(&slot->idle_slot_node, &ksm->idle_slots);
-		spin_unlock_irqrestore(&ksm->idle_slots_lock, flags);
-		wake_up(&ksm->idle_slots_wait_queue);
+					&profile->idle_slots_lock, flags)) {
+		list_add_tail(&slot->idle_slot_node, &profile->idle_slots);
+		spin_unlock_irqrestore(&profile->idle_slots_lock, flags);
+		wake_up(&profile->idle_slots_wait_queue);
 	}
 }
 
 /**
- * blk_ksm_crypto_cfg_supported() - Find out if a crypto configuration is
- *				    supported by a ksm.
- * @ksm: The keyslot manager to check
- * @cfg: The crypto configuration to check for.
- *
- * Checks for crypto_mode/data unit size/dun bytes support.
+ * __blk_crypto_cfg_supported() - Check whether the given crypto profile
+ *				  supports the given crypto configuration.
+ * @profile: the crypto profile to check
+ * @cfg: the crypto configuration to check for
  *
- * Return: Whether or not this ksm supports the specified crypto config.
+ * Return: %true if @profile supports the given @cfg.
  */
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
-				  const struct blk_crypto_config *cfg)
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+				const struct blk_crypto_config *cfg)
 {
-	if (!ksm)
+	if (!profile)
 		return false;
-	if (!(ksm->crypto_modes_supported[cfg->crypto_mode] &
-	      cfg->data_unit_size))
+	if (!(profile->modes_supported[cfg->crypto_mode] & cfg->data_unit_size))
 		return false;
-	if (ksm->max_dun_bytes_supported < cfg->dun_bytes)
+	if (profile->max_dun_bytes_supported < cfg->dun_bytes)
 		return false;
 	return true;
 }
 
 /**
- * blk_ksm_evict_key() - Evict a key from the lower layer device.
- * @ksm: The keyslot manager to evict from
- * @key: The key to evict
+ * __blk_crypto_evict_key() - Evict a key from a device.
+ * @profile: the crypto profile of the device
+ * @key: the key to evict.  It must not still be used in any I/O.
+ *
+ * If the device has keyslots, this finds the keyslot (if any) that contains the
+ * specified key and calls the driver's keyslot_evict function to evict it.
  *
- * Find the keyslot that the specified key was programmed into, and evict that
- * slot from the lower layer device. The slot must not be in use by any
- * in-flight IO when this function is called.
+ * Otherwise, this just calls the driver's keyslot_evict function if it is
+ * implemented, passing just the key (without any particular keyslot).  This
+ * allows layered devices to evict the key from their underlying devices.
  *
- * Context: Process context. Takes and releases ksm->lock.
+ * Context: Process context. Takes and releases profile->lock.
  * Return: 0 on success or if there's no keyslot with the specified key, -EBUSY
  *	   if the keyslot is still in use, or another -errno value on other
  *	   error.
  */
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
-		      const struct blk_crypto_key *key)
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+			   const struct blk_crypto_key *key)
 {
-	struct blk_ksm_keyslot *slot;
+	struct blk_crypto_keyslot *slot;
 	int err = 0;
 
-	if (blk_ksm_is_passthrough(ksm)) {
-		if (ksm->ksm_ll_ops.keyslot_evict) {
-			blk_ksm_hw_enter(ksm);
-			err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1);
-			blk_ksm_hw_exit(ksm);
+	if (profile->num_slots == 0) {
+		if (profile->ll_ops.keyslot_evict) {
+			blk_crypto_hw_enter(profile);
+			err = profile->ll_ops.keyslot_evict(profile, key, -1);
+			blk_crypto_hw_exit(profile);
 			return err;
 		}
 		return 0;
 	}
 
-	blk_ksm_hw_enter(ksm);
-	slot = blk_ksm_find_keyslot(ksm, key);
+	blk_crypto_hw_enter(profile);
+	slot = blk_crypto_find_keyslot(profile, key);
 	if (!slot)
 		goto out_unlock;
 
@@ -383,8 +395,8 @@ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
 		err = -EBUSY;
 		goto out_unlock;
 	}
-	err = ksm->ksm_ll_ops.keyslot_evict(ksm, key,
-					    blk_ksm_get_slot_idx(slot));
+	err = profile->ll_ops.keyslot_evict(profile, key,
+					    blk_crypto_keyslot_index(slot));
 	if (err)
 		goto out_unlock;
 
@@ -392,81 +404,84 @@ int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
 	slot->key = NULL;
 	err = 0;
 out_unlock:
-	blk_ksm_hw_exit(ksm);
+	blk_crypto_hw_exit(profile);
 	return err;
 }
 
 /**
- * blk_ksm_reprogram_all_keys() - Re-program all keyslots.
- * @ksm: The keyslot manager
+ * blk_crypto_reprogram_all_keys() - Re-program all keyslots.
+ * @profile: The crypto profile
  *
  * Re-program all keyslots that are supposed to have a key programmed.  This is
  * intended only for use by drivers for hardware that loses its keys on reset.
  *
- * Context: Process context. Takes and releases ksm->lock.
+ * Context: Process context. Takes and releases profile->lock.
  */
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm)
+void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile)
 {
 	unsigned int slot;
 
-	if (blk_ksm_is_passthrough(ksm))
+	if (profile->num_slots == 0)
 		return;
 
 	/* This is for device initialization, so don't resume the device */
-	down_write(&ksm->lock);
-	for (slot = 0; slot < ksm->num_slots; slot++) {
-		const struct blk_crypto_key *key = ksm->slots[slot].key;
+	down_write(&profile->lock);
+	for (slot = 0; slot < profile->num_slots; slot++) {
+		const struct blk_crypto_key *key = profile->slots[slot].key;
 		int err;
 
 		if (!key)
 			continue;
 
-		err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot);
+		err = profile->ll_ops.keyslot_program(profile, key, slot);
 		WARN_ON(err);
 	}
-	up_write(&ksm->lock);
+	up_write(&profile->lock);
 }
-EXPORT_SYMBOL_GPL(blk_ksm_reprogram_all_keys);
+EXPORT_SYMBOL_GPL(blk_crypto_reprogram_all_keys);
 
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm)
+void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
 {
-	if (!ksm)
+	if (!profile)
 		return;
-	kvfree(ksm->slot_hashtable);
-	kvfree_sensitive(ksm->slots, sizeof(ksm->slots[0]) * ksm->num_slots);
-	memzero_explicit(ksm, sizeof(*ksm));
+	kvfree(profile->slot_hashtable);
+	kvfree_sensitive(profile->slots,
+			 sizeof(profile->slots[0]) * profile->num_slots);
+	memzero_explicit(profile, sizeof(*profile));
 }
-EXPORT_SYMBOL_GPL(blk_ksm_destroy);
+EXPORT_SYMBOL_GPL(blk_crypto_profile_destroy);
 
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q)
+bool blk_crypto_register(struct blk_crypto_profile *profile,
+			 struct request_queue *q)
 {
 	if (blk_integrity_queue_supports_integrity(q)) {
 		pr_warn("Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
 		return false;
 	}
-	q->ksm = ksm;
+	q->crypto_profile = profile;
 	return true;
 }
-EXPORT_SYMBOL_GPL(blk_ksm_register);
+EXPORT_SYMBOL_GPL(blk_crypto_register);
 
-void blk_ksm_unregister(struct request_queue *q)
+void blk_crypto_unregister(struct request_queue *q)
 {
-	q->ksm = NULL;
+	q->crypto_profile = NULL;
 }
 
 /**
- * blk_ksm_intersect_modes() - restrict supported modes by child device
- * @parent: The keyslot manager for parent device
- * @child: The keyslot manager for child device, or NULL
+ * blk_crypto_intersect_capabilities() - restrict supported crypto capabilities
+ *					 by child device
+ * @parent: the crypto profile for the parent device
+ * @child: the crypto profile for the child device, or NULL
  *
- * Clear any crypto mode support bits in @parent that aren't set in @child.
- * If @child is NULL, then all parent bits are cleared.
+ * This clears all crypto capabilities in @parent that aren't set in @child.  If
+ * @child is NULL, then this clears all parent capabilities.
  *
- * Only use this when setting up the keyslot manager for a layered device,
- * before it's been exposed yet.
+ * Only use this when setting up the crypto profile for a layered device, before
+ * it's been exposed yet.
  */
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
-			     const struct blk_keyslot_manager *child)
+void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
+				       const struct blk_crypto_profile *child)
 {
 	if (child) {
 		unsigned int i;
@@ -474,73 +489,63 @@ void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
 		parent->max_dun_bytes_supported =
 			min(parent->max_dun_bytes_supported,
 			    child->max_dun_bytes_supported);
-		for (i = 0; i < ARRAY_SIZE(child->crypto_modes_supported);
-		     i++) {
-			parent->crypto_modes_supported[i] &=
-				child->crypto_modes_supported[i];
-		}
+		for (i = 0; i < ARRAY_SIZE(child->modes_supported); i++)
+			parent->modes_supported[i] &= child->modes_supported[i];
 	} else {
 		parent->max_dun_bytes_supported = 0;
-		memset(parent->crypto_modes_supported, 0,
-		       sizeof(parent->crypto_modes_supported));
+		memset(parent->modes_supported, 0,
+		       sizeof(parent->modes_supported));
 	}
 }
-EXPORT_SYMBOL_GPL(blk_ksm_intersect_modes);
+EXPORT_SYMBOL_GPL(blk_crypto_intersect_capabilities);
 
 /**
- * blk_ksm_is_superset() - Check if a KSM supports a superset of crypto modes
- *			   and DUN bytes that another KSM supports. Here,
- *			   "superset" refers to the mathematical meaning of the
- *			   word - i.e. if two KSMs have the *same* capabilities,
- *			   they *are* considered supersets of each other.
- * @ksm_superset: The KSM that we want to verify is a superset
- * @ksm_subset: The KSM that we want to verify is a subset
+ * blk_crypto_has_capabilities() - Check whether @target supports at least all
+ *				   the crypto capabilities that @reference does.
+ * @target: the target profile
+ * @reference: the reference profile
  *
- * Return: True if @ksm_superset supports a superset of the crypto modes and DUN
- *	   bytes that @ksm_subset supports.
+ * Return: %true if @target supports all the crypto capabilities of @reference.
  */
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
-			 struct blk_keyslot_manager *ksm_subset)
+bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
+				 const struct blk_crypto_profile *reference)
 {
 	int i;
 
-	if (!ksm_subset)
+	if (!reference)
 		return true;
 
-	if (!ksm_superset)
+	if (!target)
 		return false;
 
-	for (i = 0; i < ARRAY_SIZE(ksm_superset->crypto_modes_supported); i++) {
-		if (ksm_subset->crypto_modes_supported[i] &
-		    (~ksm_superset->crypto_modes_supported[i])) {
+	for (i = 0; i < ARRAY_SIZE(target->modes_supported); i++) {
+		if (reference->modes_supported[i] & ~target->modes_supported[i])
 			return false;
-		}
 	}
 
-	if (ksm_subset->max_dun_bytes_supported >
-	    ksm_superset->max_dun_bytes_supported) {
+	if (reference->max_dun_bytes_supported >
+	    target->max_dun_bytes_supported)
 		return false;
-	}
 
 	return true;
 }
-EXPORT_SYMBOL_GPL(blk_ksm_is_superset);
+EXPORT_SYMBOL_GPL(blk_crypto_has_capabilities);
 
 /**
- * blk_ksm_update_capabilities() - Update the restrictions of a KSM to those of
- *				   another KSM
- * @target_ksm: The KSM whose restrictions to update.
- * @reference_ksm: The KSM to whose restrictions this function will update
- *		   @target_ksm's restrictions to.
+ * blk_crypto_update_capabilities() - Update the capabilities of a crypto
+ *				      profile to match those of another crypto
+ *				      profile.
+ * @dst: The crypto profile whose capabilities to update.
+ * @src: The crypto profile whose capabilities this function will update @dst's
+ *	 capabilities to.
  *
  * Blk-crypto requires that crypto capabilities that were
  * advertised when a bio was created continue to be supported by the
  * device until that bio is ended. This is turn means that a device cannot
  * shrink its advertised crypto capabilities without any explicit
  * synchronization with upper layers. So if there's no such explicit
- * synchronization, @reference_ksm must support all the crypto capabilities that
- * @target_ksm does
- * (i.e. we need blk_ksm_is_superset(@reference_ksm, @target_ksm) == true).
+ * synchronization, @src must support all the crypto capabilities that
+ * @dst does (i.e. we need blk_crypto_has_capabilities(@src, @dst)).
  *
  * Note also that as long as the crypto capabilities are being expanded, the
  * order of updates becoming visible is not important because it's alright
@@ -549,31 +554,12 @@ EXPORT_SYMBOL_GPL(blk_ksm_is_superset);
  * might result in blk-crypto-fallback being used if available, or the bio being
  * failed).
  */
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
-				 struct blk_keyslot_manager *reference_ksm)
+void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
+				    const struct blk_crypto_profile *src)
 {
-	memcpy(target_ksm->crypto_modes_supported,
-	       reference_ksm->crypto_modes_supported,
-	       sizeof(target_ksm->crypto_modes_supported));
+	memcpy(dst->modes_supported, src->modes_supported,
+	       sizeof(dst->modes_supported));
 
-	target_ksm->max_dun_bytes_supported =
-				reference_ksm->max_dun_bytes_supported;
-}
-EXPORT_SYMBOL_GPL(blk_ksm_update_capabilities);
-
-/**
- * blk_ksm_init_passthrough() - Init a passthrough keyslot manager
- * @ksm: The keyslot manager to init
- *
- * Initialize a passthrough keyslot manager.
- * Called by e.g. storage drivers to set up a keyslot manager in their
- * request_queue, when the storage driver wants to manage its keys by itself.
- * This is useful for inline encryption hardware that doesn't have the concept
- * of keyslots, and for layered devices.
- */
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm)
-{
-	memset(ksm, 0, sizeof(*ksm));
-	init_rwsem(&ksm->lock);
+	dst->max_dun_bytes_supported = src->max_dun_bytes_supported;
 }
-EXPORT_SYMBOL_GPL(blk_ksm_init_passthrough);
+EXPORT_SYMBOL_GPL(blk_crypto_update_capabilities);
diff --git a/block/blk-crypto.c b/block/blk-crypto.c
index 76ce7a5d2676..ec9efeeeca91 100644
--- a/block/blk-crypto.c
+++ b/block/blk-crypto.c
@@ -218,8 +218,9 @@ static bool bio_crypt_check_alignment(struct bio *bio)
 
 blk_status_t __blk_crypto_init_request(struct request *rq)
 {
-	return blk_ksm_get_slot_for_key(rq->q->ksm, rq->crypt_ctx->bc_key,
-					&rq->crypt_keyslot);
+	return blk_crypto_get_keyslot(rq->q->crypto_profile,
+				      rq->crypt_ctx->bc_key,
+				      &rq->crypt_keyslot);
 }
 
 /**
@@ -233,7 +234,7 @@ blk_status_t __blk_crypto_init_request(struct request *rq)
  */
 void __blk_crypto_free_request(struct request *rq)
 {
-	blk_ksm_put_slot(rq->crypt_keyslot);
+	blk_crypto_put_keyslot(rq->crypt_keyslot);
 	mempool_free(rq->crypt_ctx, bio_crypt_ctx_pool);
 	blk_crypto_rq_set_defaults(rq);
 }
@@ -264,6 +265,7 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
 {
 	struct bio *bio = *bio_ptr;
 	const struct blk_crypto_key *bc_key = bio->bi_crypt_context->bc_key;
+	struct blk_crypto_profile *profile;
 
 	/* Error if bio has no data. */
 	if (WARN_ON_ONCE(!bio_has_data(bio))) {
@@ -280,8 +282,8 @@ bool __blk_crypto_bio_prep(struct bio **bio_ptr)
 	 * Success if device supports the encryption context, or if we succeeded
 	 * in falling back to the crypto API.
 	 */
-	if (blk_ksm_crypto_cfg_supported(bdev_get_queue(bio->bi_bdev)->ksm,
-					 &bc_key->crypto_cfg))
+	profile = bdev_get_queue(bio->bi_bdev)->crypto_profile;
+	if (__blk_crypto_cfg_supported(profile, &bc_key->crypto_cfg))
 		return true;
 
 	if (blk_crypto_fallback_bio_prep(bio_ptr))
@@ -357,7 +359,7 @@ bool blk_crypto_config_supported(struct request_queue *q,
 				 const struct blk_crypto_config *cfg)
 {
 	return IS_ENABLED(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) ||
-	       blk_ksm_crypto_cfg_supported(q->ksm, cfg);
+	       __blk_crypto_cfg_supported(q->crypto_profile, cfg);
 }
 
 /**
@@ -378,7 +380,7 @@ bool blk_crypto_config_supported(struct request_queue *q,
 int blk_crypto_start_using_key(const struct blk_crypto_key *key,
 			       struct request_queue *q)
 {
-	if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
+	if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
 		return 0;
 	return blk_crypto_fallback_start_using_mode(key->crypto_cfg.crypto_mode);
 }
@@ -394,18 +396,17 @@ int blk_crypto_start_using_key(const struct blk_crypto_key *key,
  * evicted from any hardware that it might have been programmed into.  The key
  * must not be in use by any in-flight IO when this function is called.
  *
- * Return: 0 on success or if key is not present in the q's ksm, -err on error.
+ * Return: 0 on success or if the key wasn't in any keyslot; -errno on error.
  */
 int blk_crypto_evict_key(struct request_queue *q,
 			 const struct blk_crypto_key *key)
 {
-	if (blk_ksm_crypto_cfg_supported(q->ksm, &key->crypto_cfg))
-		return blk_ksm_evict_key(q->ksm, key);
+	if (__blk_crypto_cfg_supported(q->crypto_profile, &key->crypto_cfg))
+		return __blk_crypto_evict_key(q->crypto_profile, key);
 
 	/*
-	 * If the request queue's associated inline encryption hardware didn't
-	 * have support for the key, then the key might have been programmed
-	 * into the fallback keyslot manager, so try to evict from there.
+	 * If the request_queue didn't support the key, then blk-crypto-fallback
+	 * may have been used, so try to evict the key from blk-crypto-fallback.
 	 */
 	return blk_crypto_fallback_evict_key(key);
 }
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index cef534a7cbc9..d670d54e5f7a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -409,9 +409,9 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template
 	blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue);
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
-	if (disk->queue->ksm) {
+	if (disk->queue->crypto_profile) {
 		pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n");
-		blk_ksm_unregister(disk->queue);
+		blk_crypto_unregister(disk->queue);
 	}
 #endif
 }
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 841ed87999e7..b855fef4f38a 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -200,7 +200,7 @@ struct dm_table {
 	struct dm_md_mempools *mempools;
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
-	struct blk_keyslot_manager *ksm;
+	struct blk_crypto_profile *crypto_profile;
 #endif
 };
 
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 1fa4d5582dca..8b0f27a745d9 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -170,7 +170,7 @@ static void free_devices(struct list_head *devices, struct mapped_device *md)
 	}
 }
 
-static void dm_table_destroy_keyslot_manager(struct dm_table *t);
+static void dm_table_destroy_crypto_profile(struct dm_table *t);
 
 void dm_table_destroy(struct dm_table *t)
 {
@@ -200,7 +200,7 @@ void dm_table_destroy(struct dm_table *t)
 
 	dm_free_md_mempools(t->mempools);
 
-	dm_table_destroy_keyslot_manager(t);
+	dm_table_destroy_crypto_profile(t);
 
 	kfree(t);
 }
@@ -1187,8 +1187,8 @@ static int dm_table_register_integrity(struct dm_table *t)
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 
-struct dm_keyslot_manager {
-	struct blk_keyslot_manager ksm;
+struct dm_crypto_profile {
+	struct blk_crypto_profile profile;
 	struct mapped_device *md;
 };
 
@@ -1214,13 +1214,11 @@ static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev,
  * When an inline encryption key is evicted from a device-mapper device, evict
  * it from all the underlying devices.
  */
-static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
+static int dm_keyslot_evict(struct blk_crypto_profile *profile,
 			    const struct blk_crypto_key *key, unsigned int slot)
 {
-	struct dm_keyslot_manager *dksm = container_of(ksm,
-						       struct dm_keyslot_manager,
-						       ksm);
-	struct mapped_device *md = dksm->md;
+	struct mapped_device *md =
+		container_of(profile, struct dm_crypto_profile, profile)->md;
 	struct dm_keyslot_evict_args args = { key };
 	struct dm_table *t;
 	int srcu_idx;
@@ -1240,150 +1238,148 @@ static int dm_keyslot_evict(struct blk_keyslot_manager *ksm,
 	return args.err;
 }
 
-static const struct blk_ksm_ll_ops dm_ksm_ll_ops = {
-	.keyslot_evict = dm_keyslot_evict,
-};
-
-static int device_intersect_crypto_modes(struct dm_target *ti,
-					 struct dm_dev *dev, sector_t start,
-					 sector_t len, void *data)
+static int
+device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
+				     sector_t start, sector_t len, void *data)
 {
-	struct blk_keyslot_manager *parent = data;
-	struct blk_keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm;
+	struct blk_crypto_profile *parent = data;
+	struct blk_crypto_profile *child =
+		bdev_get_queue(dev->bdev)->crypto_profile;
 
-	blk_ksm_intersect_modes(parent, child);
+	blk_crypto_intersect_capabilities(parent, child);
 	return 0;
 }
 
-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
+void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
 {
-	struct dm_keyslot_manager *dksm = container_of(ksm,
-						       struct dm_keyslot_manager,
-						       ksm);
+	struct dm_crypto_profile *dmcp = container_of(profile,
+						      struct dm_crypto_profile,
+						      profile);
 
-	if (!ksm)
+	if (!profile)
 		return;
 
-	blk_ksm_destroy(ksm);
-	kfree(dksm);
+	blk_crypto_profile_destroy(profile);
+	kfree(dmcp);
 }
 
-static void dm_table_destroy_keyslot_manager(struct dm_table *t)
+static void dm_table_destroy_crypto_profile(struct dm_table *t)
 {
-	dm_destroy_keyslot_manager(t->ksm);
-	t->ksm = NULL;
+	dm_destroy_crypto_profile(t->crypto_profile);
+	t->crypto_profile = NULL;
 }
 
 /*
- * Constructs and initializes t->ksm with a keyslot manager that
- * represents the common set of crypto capabilities of the devices
- * described by the dm_table. However, if the constructed keyslot
- * manager does not support a superset of the crypto capabilities
- * supported by the current keyslot manager of the mapped_device,
- * it returns an error instead, since we don't support restricting
- * crypto capabilities on table changes. Finally, if the constructed
- * keyslot manager doesn't actually support any crypto modes at all,
- * it just returns NULL.
+ * Constructs and initializes t->crypto_profile with a crypto profile that
+ * represents the common set of crypto capabilities of the devices described by
+ * the dm_table.  However, if the constructed crypto profile doesn't support all
+ * crypto capabilities that are supported by the current mapped_device, it
+ * returns an error instead, since we don't support removing crypto capabilities
+ * on table changes.  Finally, if the constructed crypto profile is "empty" (has
+ * no crypto capabilities at all), it just sets t->crypto_profile to NULL.
  */
-static int dm_table_construct_keyslot_manager(struct dm_table *t)
+static int dm_table_construct_crypto_profile(struct dm_table *t)
 {
-	struct dm_keyslot_manager *dksm;
-	struct blk_keyslot_manager *ksm;
+	struct dm_crypto_profile *dmcp;
+	struct blk_crypto_profile *profile;
 	struct dm_target *ti;
 	unsigned int i;
-	bool ksm_is_empty = true;
+	bool empty_profile = true;
 
-	dksm = kmalloc(sizeof(*dksm), GFP_KERNEL);
-	if (!dksm)
+	dmcp = kmalloc(sizeof(*dmcp), GFP_KERNEL);
+	if (!dmcp)
 		return -ENOMEM;
-	dksm->md = t->md;
+	dmcp->md = t->md;
 
-	ksm = &dksm->ksm;
-	blk_ksm_init_passthrough(ksm);
-	ksm->ksm_ll_ops = dm_ksm_ll_ops;
-	ksm->max_dun_bytes_supported = UINT_MAX;
-	memset(ksm->crypto_modes_supported, 0xFF,
-	       sizeof(ksm->crypto_modes_supported));
+	profile = &dmcp->profile;
+	blk_crypto_profile_init(profile, 0);
+	profile->ll_ops.keyslot_evict = dm_keyslot_evict;
+	profile->max_dun_bytes_supported = UINT_MAX;
+	memset(profile->modes_supported, 0xFF,
+	       sizeof(profile->modes_supported));
 
 	for (i = 0; i < dm_table_get_num_targets(t); i++) {
 		ti = dm_table_get_target(t, i);
 
 		if (!dm_target_passes_crypto(ti->type)) {
-			blk_ksm_intersect_modes(ksm, NULL);
+			blk_crypto_intersect_capabilities(profile, NULL);
 			break;
 		}
 		if (!ti->type->iterate_devices)
 			continue;
-		ti->type->iterate_devices(ti, device_intersect_crypto_modes,
-					  ksm);
+		ti->type->iterate_devices(ti,
+					  device_intersect_crypto_capabilities,
+					  profile);
 	}
 
-	if (t->md->queue && !blk_ksm_is_superset(ksm, t->md->queue->ksm)) {
+	if (t->md->queue &&
+	    !blk_crypto_has_capabilities(profile,
+					 t->md->queue->crypto_profile)) {
 		DMWARN("Inline encryption capabilities of new DM table were more restrictive than the old table's. This is not supported!");
-		dm_destroy_keyslot_manager(ksm);
+		dm_destroy_crypto_profile(profile);
 		return -EINVAL;
 	}
 
 	/*
-	 * If the new KSM doesn't actually support any crypto modes, we may as
-	 * well represent it with a NULL ksm.
+	 * If the new profile doesn't actually support any crypto capabilities,
+	 * we may as well represent it with a NULL profile.
 	 */
-	ksm_is_empty = true;
-	for (i = 0; i < ARRAY_SIZE(ksm->crypto_modes_supported); i++) {
-		if (ksm->crypto_modes_supported[i]) {
-			ksm_is_empty = false;
+	for (i = 0; i < ARRAY_SIZE(profile->modes_supported); i++) {
+		if (profile->modes_supported[i]) {
+			empty_profile = false;
 			break;
 		}
 	}
 
-	if (ksm_is_empty) {
-		dm_destroy_keyslot_manager(ksm);
-		ksm = NULL;
+	if (empty_profile) {
+		dm_destroy_crypto_profile(profile);
+		profile = NULL;
 	}
 
 	/*
-	 * t->ksm is only set temporarily while the table is being set
-	 * up, and it gets set to NULL after the capabilities have
-	 * been transferred to the request_queue.
+	 * t->crypto_profile is only set temporarily while the table is being
+	 * set up, and it gets set to NULL after the profile has been
+	 * transferred to the request_queue.
 	 */
-	t->ksm = ksm;
+	t->crypto_profile = profile;
 
 	return 0;
 }
 
-static void dm_update_keyslot_manager(struct request_queue *q,
-				      struct dm_table *t)
+static void dm_update_crypto_profile(struct request_queue *q,
+				     struct dm_table *t)
 {
-	if (!t->ksm)
+	if (!t->crypto_profile)
 		return;
 
-	/* Make the ksm less restrictive */
-	if (!q->ksm) {
-		blk_ksm_register(t->ksm, q);
+	/* Make the crypto profile less restrictive. */
+	if (!q->crypto_profile) {
+		blk_crypto_register(t->crypto_profile, q);
 	} else {
-		blk_ksm_update_capabilities(q->ksm, t->ksm);
-		dm_destroy_keyslot_manager(t->ksm);
+		blk_crypto_update_capabilities(q->crypto_profile,
+					       t->crypto_profile);
+		dm_destroy_crypto_profile(t->crypto_profile);
 	}
-	t->ksm = NULL;
+	t->crypto_profile = NULL;
 }
 
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
 
-static int dm_table_construct_keyslot_manager(struct dm_table *t)
+static int dm_table_construct_crypto_profile(struct dm_table *t)
 {
 	return 0;
 }
 
-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm)
+void dm_destroy_crypto_profile(struct blk_crypto_profile *profile)
 {
 }
 
-static void dm_table_destroy_keyslot_manager(struct dm_table *t)
+static void dm_table_destroy_crypto_profile(struct dm_table *t)
 {
 }
 
-static void dm_update_keyslot_manager(struct request_queue *q,
-				      struct dm_table *t)
+static void dm_update_crypto_profile(struct request_queue *q,
+				     struct dm_table *t)
 {
 }
 
@@ -1415,9 +1411,9 @@ int dm_table_complete(struct dm_table *t)
 		return r;
 	}
 
-	r = dm_table_construct_keyslot_manager(t);
+	r = dm_table_construct_crypto_profile(t);
 	if (r) {
-		DMERR("could not construct keyslot manager.");
+		DMERR("could not construct crypto profile.");
 		return r;
 	}
 
@@ -2071,7 +2067,7 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			return r;
 	}
 
-	dm_update_keyslot_manager(q, t);
+	dm_update_crypto_profile(q, t);
 	disk_update_readahead(t->md->disk);
 
 	return 0;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4184fd8ccb08..8b91f4f0e053 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1663,14 +1663,14 @@ static const struct dax_operations dm_dax_ops;
 static void dm_wq_work(struct work_struct *work);
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
-static void dm_queue_destroy_keyslot_manager(struct request_queue *q)
+static void dm_queue_destroy_crypto_profile(struct request_queue *q)
 {
-	dm_destroy_keyslot_manager(q->ksm);
+	dm_destroy_crypto_profile(q->crypto_profile);
 }
 
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
 
-static inline void dm_queue_destroy_keyslot_manager(struct request_queue *q)
+static inline void dm_queue_destroy_crypto_profile(struct request_queue *q)
 {
 }
 #endif /* !CONFIG_BLK_INLINE_ENCRYPTION */
@@ -1696,7 +1696,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 			dm_sysfs_exit(md);
 			del_gendisk(md->disk);
 		}
-		dm_queue_destroy_keyslot_manager(md->queue);
+		dm_queue_destroy_crypto_profile(md->queue);
 		blk_cleanup_disk(md->disk);
 	}
 
diff --git a/drivers/mmc/core/crypto.c b/drivers/mmc/core/crypto.c
index 67557808cada..fec4fbf16a5b 100644
--- a/drivers/mmc/core/crypto.c
+++ b/drivers/mmc/core/crypto.c
@@ -16,13 +16,13 @@ void mmc_crypto_set_initial_state(struct mmc_host *host)
 {
 	/* Reset might clear all keys, so reprogram all the keys. */
 	if (host->caps2 & MMC_CAP2_CRYPTO)
-		blk_ksm_reprogram_all_keys(&host->ksm);
+		blk_crypto_reprogram_all_keys(&host->crypto_profile);
 }
 
 void mmc_crypto_setup_queue(struct request_queue *q, struct mmc_host *host)
 {
 	if (host->caps2 & MMC_CAP2_CRYPTO)
-		blk_ksm_register(&host->ksm, q);
+		blk_crypto_register(&host->crypto_profile, q);
 }
 EXPORT_SYMBOL_GPL(mmc_crypto_setup_queue);
 
@@ -30,12 +30,15 @@ void mmc_crypto_prepare_req(struct mmc_queue_req *mqrq)
 {
 	struct request *req = mmc_queue_req_to_req(mqrq);
 	struct mmc_request *mrq = &mqrq->brq.mrq;
+	struct blk_crypto_keyslot *keyslot;
 
 	if (!req->crypt_ctx)
 		return;
 
 	mrq->crypto_ctx = req->crypt_ctx;
-	if (req->crypt_keyslot)
-		mrq->crypto_key_slot = blk_ksm_get_slot_idx(req->crypt_keyslot);
+
+	keyslot = req->crypt_keyslot;
+	if (keyslot)
+		mrq->crypto_key_slot = blk_crypto_keyslot_index(keyslot);
 }
 EXPORT_SYMBOL_GPL(mmc_crypto_prepare_req);
diff --git a/drivers/mmc/host/cqhci-crypto.c b/drivers/mmc/host/cqhci-crypto.c
index 628bbfaf8312..d5f4b6972f63 100644
--- a/drivers/mmc/host/cqhci-crypto.c
+++ b/drivers/mmc/host/cqhci-crypto.c
@@ -23,9 +23,10 @@ static const struct cqhci_crypto_alg_entry {
 };
 
 static inline struct cqhci_host *
-cqhci_host_from_ksm(struct blk_keyslot_manager *ksm)
+cqhci_host_from_crypto_profile(struct blk_crypto_profile *profile)
 {
-	struct mmc_host *mmc = container_of(ksm, struct mmc_host, ksm);
+	struct mmc_host *mmc =
+		container_of(profile, struct mmc_host, crypto_profile);
 
 	return mmc->cqe_private;
 }
@@ -57,12 +58,12 @@ static int cqhci_crypto_program_key(struct cqhci_host *cq_host,
 	return 0;
 }
 
-static int cqhci_crypto_keyslot_program(struct blk_keyslot_manager *ksm,
+static int cqhci_crypto_keyslot_program(struct blk_crypto_profile *profile,
 					const struct blk_crypto_key *key,
 					unsigned int slot)
 
 {
-	struct cqhci_host *cq_host = cqhci_host_from_ksm(ksm);
+	struct cqhci_host *cq_host = cqhci_host_from_crypto_profile(profile);
 	const union cqhci_crypto_cap_entry *ccap_array =
 		cq_host->crypto_cap_array;
 	const struct cqhci_crypto_alg_entry *alg =
@@ -115,11 +116,11 @@ static int cqhci_crypto_clear_keyslot(struct cqhci_host *cq_host, int slot)
 	return cqhci_crypto_program_key(cq_host, &cfg, slot);
 }
 
-static int cqhci_crypto_keyslot_evict(struct blk_keyslot_manager *ksm,
+static int cqhci_crypto_keyslot_evict(struct blk_crypto_profile *profile,
 				      const struct blk_crypto_key *key,
 				      unsigned int slot)
 {
-	struct cqhci_host *cq_host = cqhci_host_from_ksm(ksm);
+	struct cqhci_host *cq_host = cqhci_host_from_crypto_profile(profile);
 
 	return cqhci_crypto_clear_keyslot(cq_host, slot);
 }
@@ -132,7 +133,7 @@ static int cqhci_crypto_keyslot_evict(struct blk_keyslot_manager *ksm,
  * "enabled" when these are called, i.e. CQHCI_ENABLE might not be set in the
  * CQHCI_CFG register.  But the hardware allows that.
  */
-static const struct blk_ksm_ll_ops cqhci_ksm_ops = {
+static const struct blk_crypto_ll_ops cqhci_crypto_ops = {
 	.keyslot_program	= cqhci_crypto_keyslot_program,
 	.keyslot_evict		= cqhci_crypto_keyslot_evict,
 };
@@ -157,8 +158,8 @@ cqhci_find_blk_crypto_mode(union cqhci_crypto_cap_entry cap)
  *
  * If the driver previously set MMC_CAP2_CRYPTO and the CQE declares
  * CQHCI_CAP_CS, initialize the crypto support.  This involves reading the
- * crypto capability registers, initializing the keyslot manager, clearing all
- * keyslots, and enabling 128-bit task descriptors.
+ * crypto capability registers, initializing the blk_crypto_profile, clearing
+ * all keyslots, and enabling 128-bit task descriptors.
  *
  * Return: 0 if crypto was initialized or isn't supported; whether
  *	   MMC_CAP2_CRYPTO remains set indicates which one of those cases it is.
@@ -168,7 +169,7 @@ int cqhci_crypto_init(struct cqhci_host *cq_host)
 {
 	struct mmc_host *mmc = cq_host->mmc;
 	struct device *dev = mmc_dev(mmc);
-	struct blk_keyslot_manager *ksm = &mmc->ksm;
+	struct blk_crypto_profile *profile = &mmc->crypto_profile;
 	unsigned int num_keyslots;
 	unsigned int cap_idx;
 	enum blk_crypto_mode_num blk_mode_num;
@@ -199,15 +200,15 @@ int cqhci_crypto_init(struct cqhci_host *cq_host)
 	 */
 	num_keyslots = cq_host->crypto_capabilities.config_count + 1;
 
-	err = devm_blk_ksm_init(dev, ksm, num_keyslots);
+	err = devm_blk_crypto_profile_init(dev, profile, num_keyslots);
 	if (err)
 		goto out;
 
-	ksm->ksm_ll_ops = cqhci_ksm_ops;
-	ksm->dev = dev;
+	profile->ll_ops = cqhci_crypto_ops;
+	profile->dev = dev;
 
 	/* Unfortunately, CQHCI crypto only supports 32 DUN bits. */
-	ksm->max_dun_bytes_supported = 4;
+	profile->max_dun_bytes_supported = 4;
 
 	/*
 	 * Cache all the crypto capabilities and advertise the supported crypto
@@ -223,7 +224,7 @@ int cqhci_crypto_init(struct cqhci_host *cq_host)
 					cq_host->crypto_cap_array[cap_idx]);
 		if (blk_mode_num == BLK_ENCRYPTION_MODE_INVALID)
 			continue;
-		ksm->crypto_modes_supported[blk_mode_num] |=
+		profile->modes_supported[blk_mode_num] |=
 			cq_host->crypto_cap_array[cap_idx].sdus_mask * 512;
 	}
 
diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c
index d70cdcd35e43..67402baf6fae 100644
--- a/drivers/scsi/ufs/ufshcd-crypto.c
+++ b/drivers/scsi/ufs/ufshcd-crypto.c
@@ -48,11 +48,12 @@ out:
 	return err;
 }
 
-static int ufshcd_crypto_keyslot_program(struct blk_keyslot_manager *ksm,
+static int ufshcd_crypto_keyslot_program(struct blk_crypto_profile *profile,
 					 const struct blk_crypto_key *key,
 					 unsigned int slot)
 {
-	struct ufs_hba *hba = container_of(ksm, struct ufs_hba, ksm);
+	struct ufs_hba *hba =
+		container_of(profile, struct ufs_hba, crypto_profile);
 	const union ufs_crypto_cap_entry *ccap_array = hba->crypto_cap_array;
 	const struct ufs_crypto_alg_entry *alg =
 			&ufs_crypto_algs[key->crypto_cfg.crypto_mode];
@@ -105,11 +106,12 @@ static int ufshcd_clear_keyslot(struct ufs_hba *hba, int slot)
 	return ufshcd_program_key(hba, &cfg, slot);
 }
 
-static int ufshcd_crypto_keyslot_evict(struct blk_keyslot_manager *ksm,
+static int ufshcd_crypto_keyslot_evict(struct blk_crypto_profile *profile,
 				       const struct blk_crypto_key *key,
 				       unsigned int slot)
 {
-	struct ufs_hba *hba = container_of(ksm, struct ufs_hba, ksm);
+	struct ufs_hba *hba =
+		container_of(profile, struct ufs_hba, crypto_profile);
 
 	return ufshcd_clear_keyslot(hba, slot);
 }
@@ -120,11 +122,11 @@ bool ufshcd_crypto_enable(struct ufs_hba *hba)
 		return false;
 
 	/* Reset might clear all keys, so reprogram all the keys. */
-	blk_ksm_reprogram_all_keys(&hba->ksm);
+	blk_crypto_reprogram_all_keys(&hba->crypto_profile);
 	return true;
 }
 
-static const struct blk_ksm_ll_ops ufshcd_ksm_ops = {
+static const struct blk_crypto_ll_ops ufshcd_crypto_ops = {
 	.keyslot_program	= ufshcd_crypto_keyslot_program,
 	.keyslot_evict		= ufshcd_crypto_keyslot_evict,
 };
@@ -179,15 +181,16 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba)
 	}
 
 	/* The actual number of configurations supported is (CFGC+1) */
-	err = devm_blk_ksm_init(hba->dev, &hba->ksm,
-				hba->crypto_capabilities.config_count + 1);
+	err = devm_blk_crypto_profile_init(
+			hba->dev, &hba->crypto_profile,
+			hba->crypto_capabilities.config_count + 1);
 	if (err)
 		goto out;
 
-	hba->ksm.ksm_ll_ops = ufshcd_ksm_ops;
+	hba->crypto_profile.ll_ops = ufshcd_crypto_ops;
 	/* UFS only supports 8 bytes for any DUN */
-	hba->ksm.max_dun_bytes_supported = 8;
-	hba->ksm.dev = hba->dev;
+	hba->crypto_profile.max_dun_bytes_supported = 8;
+	hba->crypto_profile.dev = hba->dev;
 
 	/*
 	 * Cache all the UFS crypto capabilities and advertise the supported
@@ -202,7 +205,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba)
 		blk_mode_num = ufshcd_find_blk_crypto_mode(
 						hba->crypto_cap_array[cap_idx]);
 		if (blk_mode_num != BLK_ENCRYPTION_MODE_INVALID)
-			hba->ksm.crypto_modes_supported[blk_mode_num] |=
+			hba->crypto_profile.modes_supported[blk_mode_num] |=
 				hba->crypto_cap_array[cap_idx].sdus_mask * 512;
 	}
 
@@ -230,9 +233,8 @@ void ufshcd_init_crypto(struct ufs_hba *hba)
 		ufshcd_clear_keyslot(hba, slot);
 }
 
-void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba,
-					    struct request_queue *q)
+void ufshcd_crypto_register(struct ufs_hba *hba, struct request_queue *q)
 {
 	if (hba->caps & UFSHCD_CAP_CRYPTO)
-		blk_ksm_register(&hba->ksm, q);
+		blk_crypto_register(&hba->crypto_profile, q);
 }
diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h
index 78a58e788dff..e18c01276873 100644
--- a/drivers/scsi/ufs/ufshcd-crypto.h
+++ b/drivers/scsi/ufs/ufshcd-crypto.h
@@ -18,7 +18,7 @@ static inline void ufshcd_prepare_lrbp_crypto(struct request *rq,
 		return;
 	}
 
-	lrbp->crypto_key_slot = blk_ksm_get_slot_idx(rq->crypt_keyslot);
+	lrbp->crypto_key_slot = blk_crypto_keyslot_index(rq->crypt_keyslot);
 	lrbp->data_unit_num = rq->crypt_ctx->bc_dun[0];
 }
 
@@ -40,8 +40,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba);
 
 void ufshcd_init_crypto(struct ufs_hba *hba);
 
-void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba,
-					    struct request_queue *q);
+void ufshcd_crypto_register(struct ufs_hba *hba, struct request_queue *q);
 
 #else /* CONFIG_SCSI_UFS_CRYPTO */
 
@@ -64,8 +63,8 @@ static inline int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba)
 
 static inline void ufshcd_init_crypto(struct ufs_hba *hba) { }
 
-static inline void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba,
-						struct request_queue *q) { }
+static inline void ufshcd_crypto_register(struct ufs_hba *hba,
+					  struct request_queue *q) { }
 
 #endif /* CONFIG_SCSI_UFS_CRYPTO */
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 95be7ecdfe10..bf81da2ecf98 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -4986,7 +4986,7 @@ static int ufshcd_slave_configure(struct scsi_device *sdev)
 	else if (ufshcd_is_rpm_autosuspend_allowed(hba))
 		sdev->rpm_autosuspend = 1;
 
-	ufshcd_crypto_setup_rq_keyslot_manager(hba, q);
+	ufshcd_crypto_register(hba, q);
 
 	return 0;
 }
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 885fcf2e5922..62bdc412d38a 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -766,7 +766,7 @@ struct ufs_hba_monitor {
  * @crypto_capabilities: Content of crypto capabilities register (0x100)
  * @crypto_cap_array: Array of crypto capabilities
  * @crypto_cfg_register: Start of the crypto cfg array
- * @ksm: the keyslot manager tied to this hba
+ * @crypto_profile: the crypto profile of this hba (if applicable)
  */
 struct ufs_hba {
 	void __iomem *mmio_base;
@@ -911,7 +911,7 @@ struct ufs_hba {
 	union ufs_crypto_capabilities crypto_capabilities;
 	union ufs_crypto_cap_entry *crypto_cap_array;
 	u32 crypto_cfg_register;
-	struct blk_keyslot_manager ksm;
+	struct blk_crypto_profile crypto_profile;
 #endif
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debugfs_root;
diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
index a27605e2f826..bbab65bd5428 100644
--- a/include/linux/blk-crypto-profile.h
+++ b/include/linux/blk-crypto-profile.h
@@ -3,67 +3,113 @@
  * Copyright 2019 Google LLC
  */
 
-#ifndef __LINUX_KEYSLOT_MANAGER_H
-#define __LINUX_KEYSLOT_MANAGER_H
+#ifndef __LINUX_BLK_CRYPTO_PROFILE_H
+#define __LINUX_BLK_CRYPTO_PROFILE_H
 
 #include <linux/bio.h>
 #include <linux/blk-crypto.h>
 
-struct blk_keyslot_manager;
+struct blk_crypto_profile;
 
 /**
- * struct blk_ksm_ll_ops - functions to manage keyslots in hardware
- * @keyslot_program:	Program the specified key into the specified slot in the
- *			inline encryption hardware.
- * @keyslot_evict:	Evict key from the specified keyslot in the hardware.
- *			The key is provided so that e.g. dm layers can evict
- *			keys from the devices that they map over.
- *			Returns 0 on success, -errno otherwise.
+ * struct blk_crypto_ll_ops - functions to control inline encryption hardware
  *
- * This structure should be provided by storage device drivers when they set up
- * a keyslot manager - this structure holds the function ptrs that the keyslot
- * manager will use to manipulate keyslots in the hardware.
+ * Low-level operations for controlling inline encryption hardware.  This
+ * interface must be implemented by storage drivers that support inline
+ * encryption.  All functions may sleep, are serialized by profile->lock, and
+ * are never called while profile->dev (if set) is runtime-suspended.
  */
-struct blk_ksm_ll_ops {
-	int (*keyslot_program)(struct blk_keyslot_manager *ksm,
+struct blk_crypto_ll_ops {
+
+	/**
+	 * @keyslot_program: Program a key into the inline encryption hardware.
+	 *
+	 * Program @key into the specified @slot in the inline encryption
+	 * hardware, overwriting any key that the keyslot may already contain.
+	 * The keyslot is guaranteed to not be in-use by any I/O.
+	 *
+	 * This is required if the device has keyslots.  Otherwise (i.e. if the
+	 * device is a layered device, or if the device is real hardware that
+	 * simply doesn't have the concept of keyslots) it is never called.
+	 *
+	 * Must return 0 on success, or -errno on failure.
+	 */
+	int (*keyslot_program)(struct blk_crypto_profile *profile,
 			       const struct blk_crypto_key *key,
 			       unsigned int slot);
-	int (*keyslot_evict)(struct blk_keyslot_manager *ksm,
+
+	/**
+	 * @keyslot_evict: Evict a key from the inline encryption hardware.
+	 *
+	 * If the device has keyslots, this function must evict the key from the
+	 * specified @slot.  The slot will contain @key, but there should be no
+	 * need for the @key argument to be used as @slot should be sufficient.
+	 * The keyslot is guaranteed to not be in-use by any I/O.
+	 *
+	 * If the device doesn't have keyslots itself, this function must evict
+	 * @key from any underlying devices.  @slot won't be valid in this case.
+	 *
+	 * If there are no keyslots and no underlying devices, this function
+	 * isn't required.
+	 *
+	 * Must return 0 on success, or -errno on failure.
+	 */
+	int (*keyslot_evict)(struct blk_crypto_profile *profile,
 			     const struct blk_crypto_key *key,
 			     unsigned int slot);
 };
 
-struct blk_keyslot_manager {
-	/*
-	 * The struct blk_ksm_ll_ops that this keyslot manager will use
-	 * to perform operations like programming and evicting keys on the
-	 * device
+/**
+ * struct blk_crypto_profile - inline encryption profile for a device
+ *
+ * This struct contains a storage device's inline encryption capabilities (e.g.
+ * the supported crypto algorithms), driver-provided functions to control the
+ * inline encryption hardware (e.g. programming and evicting keys), and optional
+ * device-independent keyslot management data.
+ */
+struct blk_crypto_profile {
+
+	/* public: Drivers must initialize the following fields. */
+
+	/**
+	 * @ll_ops: Driver-provided functions to control the inline encryption
+	 * hardware, e.g. program and evict keys.
 	 */
-	struct blk_ksm_ll_ops ksm_ll_ops;
+	struct blk_crypto_ll_ops ll_ops;
 
-	/*
-	 * The maximum number of bytes supported for specifying the data unit
-	 * number.
+	/**
+	 * @max_dun_bytes_supported: The maximum number of bytes supported for
+	 * specifying the data unit number (DUN).  Specifically, the range of
+	 * supported DUNs is 0 through (1 << (8 * max_dun_bytes_supported)) - 1.
 	 */
 	unsigned int max_dun_bytes_supported;
 
-	/*
-	 * Array of size BLK_ENCRYPTION_MODE_MAX of bitmasks that represents
-	 * whether a crypto mode and data unit size are supported. The i'th
-	 * bit of crypto_mode_supported[crypto_mode] is set iff a data unit
-	 * size of (1 << i) is supported. We only support data unit sizes
-	 * that are powers of 2.
+	/**
+	 * @modes_supported: Array of bitmasks that specifies whether each
+	 * combination of crypto mode and data unit size is supported.
+	 * Specifically, the i'th bit of modes_supported[crypto_mode] is set if
+	 * crypto_mode can be used with a data unit size of (1 << i).  Note that
+	 * only data unit sizes that are powers of 2 can be supported.
 	 */
-	unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX];
+	unsigned int modes_supported[BLK_ENCRYPTION_MODE_MAX];
 
-	/* Device for runtime power management (NULL if none) */
+	/**
+	 * @dev: An optional device for runtime power management.  If the driver
+	 * provides this device, it will be runtime-resumed before any function
+	 * in @ll_ops is called and will remain resumed during the call.
+	 */
 	struct device *dev;
 
-	/* Here onwards are *private* fields for internal keyslot manager use */
+	/* private: The following fields shouldn't be accessed by drivers. */
 
+	/* Number of keyslots, or 0 if not applicable */
 	unsigned int num_slots;
 
-	/* Protects programming and evicting keys from the device */
+	/*
+	 * Serializes all calls to functions in @ll_ops as well as all changes
+	 * to @slot_hashtable.  This can also be taken in read mode to look up
+	 * keyslots while ensuring that they can't be changed concurrently.
+	 */
 	struct rw_semaphore lock;
 
 	/* List of idle slots, with least recently used slot at front */
@@ -80,41 +126,41 @@ struct blk_keyslot_manager {
 	unsigned int log_slot_ht_size;
 
 	/* Per-keyslot data */
-	struct blk_ksm_keyslot *slots;
+	struct blk_crypto_keyslot *slots;
 };
 
-int blk_ksm_init(struct blk_keyslot_manager *ksm, unsigned int num_slots);
-
-int devm_blk_ksm_init(struct device *dev, struct blk_keyslot_manager *ksm,
-		      unsigned int num_slots);
+int blk_crypto_profile_init(struct blk_crypto_profile *profile,
+			    unsigned int num_slots);
 
-blk_status_t blk_ksm_get_slot_for_key(struct blk_keyslot_manager *ksm,
-				      const struct blk_crypto_key *key,
-				      struct blk_ksm_keyslot **slot_ptr);
+int devm_blk_crypto_profile_init(struct device *dev,
+				 struct blk_crypto_profile *profile,
+				 unsigned int num_slots);
 
-unsigned int blk_ksm_get_slot_idx(struct blk_ksm_keyslot *slot);
+unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
 
-void blk_ksm_put_slot(struct blk_ksm_keyslot *slot);
+blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
+				    const struct blk_crypto_key *key,
+				    struct blk_crypto_keyslot **slot_ptr);
 
-bool blk_ksm_crypto_cfg_supported(struct blk_keyslot_manager *ksm,
-				  const struct blk_crypto_config *cfg);
+void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
 
-int blk_ksm_evict_key(struct blk_keyslot_manager *ksm,
-		      const struct blk_crypto_key *key);
+bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
+				const struct blk_crypto_config *cfg);
 
-void blk_ksm_reprogram_all_keys(struct blk_keyslot_manager *ksm);
+int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
+			   const struct blk_crypto_key *key);
 
-void blk_ksm_destroy(struct blk_keyslot_manager *ksm);
+void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
 
-void blk_ksm_intersect_modes(struct blk_keyslot_manager *parent,
-			     const struct blk_keyslot_manager *child);
+void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
 
-void blk_ksm_init_passthrough(struct blk_keyslot_manager *ksm);
+void blk_crypto_intersect_capabilities(struct blk_crypto_profile *parent,
+				       const struct blk_crypto_profile *child);
 
-bool blk_ksm_is_superset(struct blk_keyslot_manager *ksm_superset,
-			 struct blk_keyslot_manager *ksm_subset);
+bool blk_crypto_has_capabilities(const struct blk_crypto_profile *target,
+				 const struct blk_crypto_profile *reference);
 
-void blk_ksm_update_capabilities(struct blk_keyslot_manager *target_ksm,
-				 struct blk_keyslot_manager *reference_ksm);
+void blk_crypto_update_capabilities(struct blk_crypto_profile *dst,
+				    const struct blk_crypto_profile *src);
 
-#endif /* __LINUX_KEYSLOT_MANAGER_H */
+#endif /* __LINUX_BLK_CRYPTO_PROFILE_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e13780236550..b4039fdf1b04 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -133,7 +133,7 @@ struct request {
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 	struct bio_crypt_ctx *crypt_ctx;
-	struct blk_ksm_keyslot *crypt_keyslot;
+	struct blk_crypto_keyslot *crypt_keyslot;
 #endif
 
 	unsigned short write_hint;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c7b1e9355123..f72ccb2829db 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -30,7 +30,7 @@ struct pr_ops;
 struct rq_qos;
 struct blk_queue_stats;
 struct blk_stat_callback;
-struct blk_keyslot_manager;
+struct blk_crypto_profile;
 
 /* Must be consistent with blk_mq_poll_stats_bkt() */
 #define BLK_MQ_POLL_STATS_BKTS 16
@@ -224,8 +224,7 @@ struct request_queue {
 	unsigned int		dma_alignment;
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
-	/* Inline crypto capabilities */
-	struct blk_keyslot_manager *ksm;
+	struct blk_crypto_profile *crypto_profile;
 #endif
 
 	unsigned int		rq_timeout;
@@ -1142,19 +1141,20 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned lo
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 
-bool blk_ksm_register(struct blk_keyslot_manager *ksm, struct request_queue *q);
+bool blk_crypto_register(struct blk_crypto_profile *profile,
+			 struct request_queue *q);
 
-void blk_ksm_unregister(struct request_queue *q);
+void blk_crypto_unregister(struct request_queue *q);
 
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
 
-static inline bool blk_ksm_register(struct blk_keyslot_manager *ksm,
-				    struct request_queue *q)
+static inline bool blk_crypto_register(struct blk_crypto_profile *profile,
+				       struct request_queue *q)
 {
 	return true;
 }
 
-static inline void blk_ksm_unregister(struct request_queue *q) { }
+static inline void blk_crypto_unregister(struct request_queue *q) { }
 
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 114553b487ef..a7df155ea49b 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -576,9 +576,9 @@ struct dm_table *dm_swap_table(struct mapped_device *md,
 			       struct dm_table *t);
 
 /*
- * Table keyslot manager functions
+ * Table blk_crypto_profile functions
  */
-void dm_destroy_keyslot_manager(struct blk_keyslot_manager *ksm);
+void dm_destroy_crypto_profile(struct blk_crypto_profile *profile);
 
 /*-----------------------------------------------------------------
  * Macros.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 725b1de41767..52eae8c45b8d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -492,7 +492,7 @@ struct mmc_host {
 
 	/* Inline encryption support */
 #ifdef CONFIG_MMC_CRYPTO
-	struct blk_keyslot_manager ksm;
+	struct blk_crypto_profile crypto_profile;
 #endif
 
 	/* Host Software Queue support */
-- 
cgit v1.2.3


From f64dd4627ec6edc39bf1430fe6dbc923d2300a88 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 8 Oct 2021 11:13:34 +0200
Subject: ftrace: Add multi direct register/unregister interface

Adding interface to register multiple direct functions
within single call. Adding following functions:

  register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
  unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)

The register_ftrace_direct_multi registers direct function (addr)
with all functions in ops filter. The ops filter can be updated
before with ftrace_set_filter_ip calls.

All requested functions must not have direct function currently
registered, otherwise register_ftrace_direct_multi will fail.

The unregister_ftrace_direct_multi unregisters ops related direct
functions.

Link: https://lkml.kernel.org/r/20211008091336.33616-7-jolsa@kernel.org

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  11 ++++
 kernel/trace/ftrace.c  | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 16a7baaba702..0158261cac9f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -324,7 +324,10 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
 				unsigned long old_addr,
 				unsigned long new_addr);
 unsigned long ftrace_find_rec_direct(unsigned long ip);
+int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr);
+int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr);
 #else
+struct ftrace_ops;
 # define ftrace_direct_func_count 0
 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr)
 {
@@ -354,6 +357,14 @@ static inline unsigned long ftrace_find_rec_direct(unsigned long ip)
 {
 	return 0;
 }
+static inline int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	return -ENODEV;
+}
+static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ccbd8377e580..a05b25fb77d8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5401,6 +5401,148 @@ int modify_ftrace_direct(unsigned long ip,
 	return ret;
 }
 EXPORT_SYMBOL_GPL(modify_ftrace_direct);
+
+#define MULTI_FLAGS (FTRACE_OPS_FL_IPMODIFY | FTRACE_OPS_FL_DIRECT | \
+		     FTRACE_OPS_FL_SAVE_REGS)
+
+static int check_direct_multi(struct ftrace_ops *ops)
+{
+	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED))
+		return -EINVAL;
+	if ((ops->flags & MULTI_FLAGS) != MULTI_FLAGS)
+		return -EINVAL;
+	return 0;
+}
+
+static void remove_direct_functions_hash(struct ftrace_hash *hash, unsigned long addr)
+{
+	struct ftrace_func_entry *entry, *del;
+	int size, i;
+
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			del = __ftrace_lookup_ip(direct_functions, entry->ip);
+			if (del && del->direct == addr) {
+				remove_hash_entry(direct_functions, del);
+				kfree(del);
+			}
+		}
+	}
+}
+
+/**
+ * register_ftrace_direct_multi - Call a custom trampoline directly
+ * for multiple functions registered in @ops
+ * @ops: The address of the struct ftrace_ops object
+ * @addr: The address of the trampoline to call at @ops functions
+ *
+ * This is used to connect a direct calls to @addr from the nop locations
+ * of the functions registered in @ops (with by ftrace_set_filter_ip
+ * function).
+ *
+ * The location that it calls (@addr) must be able to handle a direct call,
+ * and save the parameters of the function being traced, and restore them
+ * (or inject new ones if needed), before returning.
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL  - The @ops object was already registered with this call or
+ *             when there are no functions in @ops object.
+ *  -EBUSY   - Another direct function is already attached (there can be only one)
+ *  -ENODEV  - @ip does not point to a ftrace nop location (or not supported)
+ *  -ENOMEM  - There was an allocation failure.
+ */
+int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	struct ftrace_hash *hash, *free_hash = NULL;
+	struct ftrace_func_entry *entry, *new;
+	int err = -EBUSY, size, i;
+
+	if (ops->func || ops->trampoline)
+		return -EINVAL;
+	if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED))
+		return -EINVAL;
+	if (ops->flags & FTRACE_OPS_FL_ENABLED)
+		return -EINVAL;
+
+	hash = ops->func_hash->filter_hash;
+	if (ftrace_hash_empty(hash))
+		return -EINVAL;
+
+	mutex_lock(&direct_mutex);
+
+	/* Make sure requested entries are not already registered.. */
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			if (ftrace_find_rec_direct(entry->ip))
+				goto out_unlock;
+		}
+	}
+
+	/* ... and insert them to direct_functions hash. */
+	err = -ENOMEM;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(entry, &hash->buckets[i], hlist) {
+			new = ftrace_add_rec_direct(entry->ip, addr, &free_hash);
+			if (!new)
+				goto out_remove;
+			entry->direct = addr;
+		}
+	}
+
+	ops->func = call_direct_funcs;
+	ops->flags = MULTI_FLAGS;
+	ops->trampoline = FTRACE_REGS_ADDR;
+
+	err = register_ftrace_function(ops);
+
+ out_remove:
+	if (err)
+		remove_direct_functions_hash(hash, addr);
+
+ out_unlock:
+	mutex_unlock(&direct_mutex);
+
+	if (free_hash) {
+		synchronize_rcu_tasks();
+		free_ftrace_hash(free_hash);
+	}
+	return err;
+}
+EXPORT_SYMBOL_GPL(register_ftrace_direct_multi);
+
+/**
+ * unregister_ftrace_direct_multi - Remove calls to custom trampoline
+ * previously registered by register_ftrace_direct_multi for @ops object.
+ * @ops: The address of the struct ftrace_ops object
+ *
+ * This is used to remove a direct calls to @addr from the nop locations
+ * of the functions registered in @ops (with by ftrace_set_filter_ip
+ * function).
+ *
+ * Returns:
+ *  0 on success
+ *  -EINVAL - The @ops object was not properly registered.
+ */
+int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
+	int err;
+
+	if (check_direct_multi(ops))
+		return -EINVAL;
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return -EINVAL;
+
+	mutex_lock(&direct_mutex);
+	err = unregister_ftrace_function(ops);
+	remove_direct_functions_hash(hash, addr);
+	mutex_unlock(&direct_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi);
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 /**
-- 
cgit v1.2.3


From ccf5a89efd6f0a9483cea8acd4a0822b1a47e59a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 8 Oct 2021 11:13:35 +0200
Subject: ftrace: Add multi direct modify interface

Adding interface to modify registered direct function
for ftrace_ops. Adding following function:

   modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)

The function changes the currently registered direct
function for all attached functions.

Link: https://lkml.kernel.org/r/20211008091336.33616-8-jolsa@kernel.org

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  6 +++++
 kernel/trace/ftrace.c  | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 0158261cac9f..9999e29187de 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -326,6 +326,8 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
 unsigned long ftrace_find_rec_direct(unsigned long ip);
 int register_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr);
 int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr);
+int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr);
+
 #else
 struct ftrace_ops;
 # define ftrace_direct_func_count 0
@@ -365,6 +367,10 @@ static inline int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigne
 {
 	return -ENODEV;
 }
+static inline int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	return -ENODEV;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a05b25fb77d8..30120342176e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5543,6 +5543,68 @@ int unregister_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
 	return err;
 }
 EXPORT_SYMBOL_GPL(unregister_ftrace_direct_multi);
+
+/**
+ * modify_ftrace_direct_multi - Modify an existing direct 'multi' call
+ * to call something else
+ * @ops: The address of the struct ftrace_ops object
+ * @addr: The address of the new trampoline to call at @ops functions
+ *
+ * This is used to unregister currently registered direct caller and
+ * register new one @addr on functions registered in @ops object.
+ *
+ * Note there's window between ftrace_shutdown and ftrace_startup calls
+ * where there will be no callbacks called.
+ *
+ * Returns: zero on success. Non zero on error, which includes:
+ *  -EINVAL - The @ops object was not properly registered.
+ */
+int modify_ftrace_direct_multi(struct ftrace_ops *ops, unsigned long addr)
+{
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
+	struct ftrace_func_entry *entry, *iter;
+	int i, size;
+	int err;
+
+	if (check_direct_multi(ops))
+		return -EINVAL;
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return -EINVAL;
+
+	mutex_lock(&direct_mutex);
+	mutex_lock(&ftrace_lock);
+
+	/*
+	 * Shutdown the ops, change 'direct' pointer for each
+	 * ops entry in direct_functions hash and startup the
+	 * ops back again.
+	 *
+	 * Note there is no callback called for @ops object after
+	 * this ftrace_shutdown call until ftrace_startup is called
+	 * later on.
+	 */
+	err = ftrace_shutdown(ops, 0);
+	if (err)
+		goto out_unlock;
+
+	size = 1 << hash->size_bits;
+	for (i = 0; i < size; i++) {
+		hlist_for_each_entry(iter, &hash->buckets[i], hlist) {
+			entry = __ftrace_lookup_ip(direct_functions, iter->ip);
+			if (!entry)
+				continue;
+			entry->direct = addr;
+		}
+	}
+
+	err = ftrace_startup(ops, 0);
+
+ out_unlock:
+	mutex_unlock(&ftrace_lock);
+	mutex_unlock(&direct_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(modify_ftrace_direct_multi);
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 
 /**
-- 
cgit v1.2.3


From bce5c81cb31f7f124ce231ec79df9e85a8bac132 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 19 Oct 2021 09:25:20 -0400
Subject: tracing: Explain the trace recursion transition bit better

The current text of the explanation of the transition bit in the trace
recursion protection is not very clear. Improve the text, so that when all
the archs no longer have the issue of tracing between a start of a new
(interrupt) context and updating the preempt_count to reflect the new
context, that it may be removed.

Link: https://lore.kernel.org/all/20211018220203.064a42ed@gandalf.local.home/

Suggested-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_recursion.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 1d8cce02c3fb..24f284eb55a7 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -168,8 +168,12 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
 	bit = trace_get_context_bit() + start;
 	if (unlikely(val & (1 << bit))) {
 		/*
-		 * It could be that preempt_count has not been updated during
-		 * a switch between contexts. Allow for a single recursion.
+		 * If an interrupt occurs during a trace, and another trace
+		 * happens in that interrupt but before the preempt_count is
+		 * updated to reflect the new interrupt context, then this
+		 * will think a recursion occurred, and the event will be dropped.
+		 * Let a single instance happen via the TRANSITION_BIT to
+		 * not drop those events.
 		 */
 		bit = TRACE_TRANSITION_BIT;
 		if (val & (1 << bit)) {
-- 
cgit v1.2.3


From 17b5b576ff5faff99a4c8140d521cd4d7fff5c16 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Thu, 7 Oct 2021 15:46:39 +0200
Subject: mux: add support for delay after muxing

Hardware may require some time for the muxed analog signals to settle
after the muxing is changed.  Allow users of the mux subsystem to
specify this delay with the new mux_control_select_delay() function (and
the _try equivalent).

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Tested-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Peter Rosin <peda@axentia.se>
Link: https://lore.kernel.org/r/20211007134641.13417-2-vincent.whitchurch@axis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/mux/core.c           | 38 ++++++++++++++++++++++++++++++++------
 include/linux/mux/consumer.h | 23 +++++++++++++++++++----
 include/linux/mux/driver.h   |  4 ++++
 3 files changed, 55 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mux/core.c b/drivers/mux/core.c
index 1fb22388e7e0..22f4709768d1 100644
--- a/drivers/mux/core.c
+++ b/drivers/mux/core.c
@@ -9,6 +9,7 @@
 
 #define pr_fmt(fmt) "mux-core: " fmt
 
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/export.h>
@@ -116,6 +117,7 @@ struct mux_chip *mux_chip_alloc(struct device *dev,
 		sema_init(&mux->lock, 1);
 		mux->cached_state = MUX_CACHE_UNKNOWN;
 		mux->idle_state = MUX_IDLE_AS_IS;
+		mux->last_change = ktime_get();
 	}
 
 	device_initialize(&mux_chip->dev);
@@ -129,6 +131,8 @@ static int mux_control_set(struct mux_control *mux, int state)
 	int ret = mux->chip->ops->set(mux, state);
 
 	mux->cached_state = ret < 0 ? MUX_CACHE_UNKNOWN : state;
+	if (ret >= 0)
+		mux->last_change = ktime_get();
 
 	return ret;
 }
@@ -314,10 +318,25 @@ static int __mux_control_select(struct mux_control *mux, int state)
 	return ret;
 }
 
+static void mux_control_delay(struct mux_control *mux, unsigned int delay_us)
+{
+	ktime_t delayend;
+	s64 remaining;
+
+	if (!delay_us)
+		return;
+
+	delayend = ktime_add_us(mux->last_change, delay_us);
+	remaining = ktime_us_delta(delayend, ktime_get());
+	if (remaining > 0)
+		fsleep(remaining);
+}
+
 /**
- * mux_control_select() - Select the given multiplexer state.
+ * mux_control_select_delay() - Select the given multiplexer state.
  * @mux: The mux-control to request a change of state from.
  * @state: The new requested state.
+ * @delay_us: The time to delay (in microseconds) if the mux state is changed.
  *
  * On successfully selecting the mux-control state, it will be locked until
  * there is a call to mux_control_deselect(). If the mux-control is already
@@ -331,7 +350,8 @@ static int __mux_control_select(struct mux_control *mux, int state)
  * Return: 0 when the mux-control state has the requested state or a negative
  * errno on error.
  */
-int mux_control_select(struct mux_control *mux, unsigned int state)
+int mux_control_select_delay(struct mux_control *mux, unsigned int state,
+			     unsigned int delay_us)
 {
 	int ret;
 
@@ -340,18 +360,21 @@ int mux_control_select(struct mux_control *mux, unsigned int state)
 		return ret;
 
 	ret = __mux_control_select(mux, state);
+	if (ret >= 0)
+		mux_control_delay(mux, delay_us);
 
 	if (ret < 0)
 		up(&mux->lock);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(mux_control_select);
+EXPORT_SYMBOL_GPL(mux_control_select_delay);
 
 /**
- * mux_control_try_select() - Try to select the given multiplexer state.
+ * mux_control_try_select_delay() - Try to select the given multiplexer state.
  * @mux: The mux-control to request a change of state from.
  * @state: The new requested state.
+ * @delay_us: The time to delay (in microseconds) if the mux state is changed.
  *
  * On successfully selecting the mux-control state, it will be locked until
  * mux_control_deselect() called.
@@ -363,7 +386,8 @@ EXPORT_SYMBOL_GPL(mux_control_select);
  * Return: 0 when the mux-control state has the requested state or a negative
  * errno on error. Specifically -EBUSY if the mux-control is contended.
  */
-int mux_control_try_select(struct mux_control *mux, unsigned int state)
+int mux_control_try_select_delay(struct mux_control *mux, unsigned int state,
+				 unsigned int delay_us)
 {
 	int ret;
 
@@ -371,13 +395,15 @@ int mux_control_try_select(struct mux_control *mux, unsigned int state)
 		return -EBUSY;
 
 	ret = __mux_control_select(mux, state);
+	if (ret >= 0)
+		mux_control_delay(mux, delay_us);
 
 	if (ret < 0)
 		up(&mux->lock);
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(mux_control_try_select);
+EXPORT_SYMBOL_GPL(mux_control_try_select_delay);
 
 /**
  * mux_control_deselect() - Deselect the previously selected multiplexer state.
diff --git a/include/linux/mux/consumer.h b/include/linux/mux/consumer.h
index 5fc6bb2fefad..7a09b040ac39 100644
--- a/include/linux/mux/consumer.h
+++ b/include/linux/mux/consumer.h
@@ -16,10 +16,25 @@ struct device;
 struct mux_control;
 
 unsigned int mux_control_states(struct mux_control *mux);
-int __must_check mux_control_select(struct mux_control *mux,
-				    unsigned int state);
-int __must_check mux_control_try_select(struct mux_control *mux,
-					unsigned int state);
+int __must_check mux_control_select_delay(struct mux_control *mux,
+					  unsigned int state,
+					  unsigned int delay_us);
+int __must_check mux_control_try_select_delay(struct mux_control *mux,
+					      unsigned int state,
+					      unsigned int delay_us);
+
+static inline int __must_check mux_control_select(struct mux_control *mux,
+						  unsigned int state)
+{
+	return mux_control_select_delay(mux, state, 0);
+}
+
+static inline int __must_check mux_control_try_select(struct mux_control *mux,
+						      unsigned int state)
+{
+	return mux_control_try_select_delay(mux, state, 0);
+}
+
 int mux_control_deselect(struct mux_control *mux);
 
 struct mux_control *mux_control_get(struct device *dev, const char *mux_name);
diff --git a/include/linux/mux/driver.h b/include/linux/mux/driver.h
index 627a2c6bc02d..18824064f8c0 100644
--- a/include/linux/mux/driver.h
+++ b/include/linux/mux/driver.h
@@ -12,6 +12,7 @@
 
 #include <dt-bindings/mux/mux.h>
 #include <linux/device.h>
+#include <linux/ktime.h>
 #include <linux/semaphore.h>
 
 struct mux_chip;
@@ -33,6 +34,7 @@ struct mux_control_ops {
  * @states:		The number of mux controller states.
  * @idle_state:		The mux controller state to use when inactive, or one
  *			of MUX_IDLE_AS_IS and MUX_IDLE_DISCONNECT.
+ * @last_change:	Timestamp of last change
  *
  * Mux drivers may only change @states and @idle_state, and may only do so
  * between allocation and registration of the mux controller. Specifically,
@@ -47,6 +49,8 @@ struct mux_control {
 
 	unsigned int states;
 	int idle_state;
+
+	ktime_t last_change;
 };
 
 /**
-- 
cgit v1.2.3


From 9eeb3aa33ae005526f672b394c1791578463513f Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Thu, 21 Oct 2021 21:47:51 +0800
Subject: bpf: Add bpf_skc_to_unix_sock() helper

The helper is used in tracing programs to cast a socket
pointer to a unix_sock pointer.
The return value could be NULL if the casting is illegal.

Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211021134752.1223426-2-hengqi.chen@gmail.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  7 +++++++
 kernel/trace/bpf_trace.c       |  2 ++
 net/core/filter.c              | 23 +++++++++++++++++++++++
 scripts/bpf_doc.py             |  2 ++
 tools/include/uapi/linux/bpf.h |  7 +++++++
 6 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d604c8251d88..be3102b4554b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2093,6 +2093,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
+extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_snprintf_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6fc59d61937a..22e7a3f38b9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4909,6 +4909,12 @@ union bpf_attr {
  *	Return
  *		The number of bytes written to the buffer, or a negative error
  *		in case of failure.
+ *
+ * struct unix_sock *bpf_skc_to_unix_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or **NULL** otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5089,6 +5095,7 @@ union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(skc_to_unix_sock),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 6b3153841a33..cbcd0d6fca7c 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1608,6 +1608,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skc_to_tcp_request_sock_proto;
 	case BPF_FUNC_skc_to_udp6_sock:
 		return &bpf_skc_to_udp6_sock_proto;
+	case BPF_FUNC_skc_to_unix_sock:
+		return &bpf_skc_to_unix_sock_proto;
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_tracing_proto;
 	case BPF_FUNC_sk_storage_delete:
diff --git a/net/core/filter.c b/net/core/filter.c
index 4bace37a6a44..8e8d3b49c297 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10723,6 +10723,26 @@ const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
 	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
 };
 
+BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
+{
+	/* unix_sock type is not generated in dwarf and hence btf,
+	 * trigger an explicit type generation here.
+	 */
+	BTF_TYPE_EMIT(struct unix_sock);
+	if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
+	.func			= bpf_skc_to_unix_sock,
+	.gpl_only		= false,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.ret_btf_id		= &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
+};
+
 BPF_CALL_1(bpf_sock_from_file, struct file *, file)
 {
 	return (unsigned long)sock_from_file(file);
@@ -10762,6 +10782,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_skc_to_udp6_sock:
 		func = &bpf_skc_to_udp6_sock_proto;
 		break;
+	case BPF_FUNC_skc_to_unix_sock:
+		func = &bpf_skc_to_unix_sock_proto;
+		break;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 00ac7b79cddb..a6403ddf5de7 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -537,6 +537,7 @@ class PrinterHelpers(Printer):
             'struct tcp_timewait_sock',
             'struct tcp_request_sock',
             'struct udp6_sock',
+            'struct unix_sock',
             'struct task_struct',
 
             'struct __sk_buff',
@@ -589,6 +590,7 @@ class PrinterHelpers(Printer):
             'struct tcp_timewait_sock',
             'struct tcp_request_sock',
             'struct udp6_sock',
+            'struct unix_sock',
             'struct task_struct',
             'struct path',
             'struct btf_ptr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6fc59d61937a..22e7a3f38b9f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4909,6 +4909,12 @@ union bpf_attr {
  *	Return
  *		The number of bytes written to the buffer, or a negative error
  *		in case of failure.
+ *
+ * struct unix_sock *bpf_skc_to_unix_sock(void *sk)
+ * 	Description
+ *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
+ *	Return
+ *		*sk* if casting is valid, or **NULL** otherwise.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5089,6 +5095,7 @@ union bpf_attr {
 	FN(task_pt_regs),		\
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
+	FN(skc_to_unix_sock),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From d08fd747d0ed682b6c6c280ba454cafcad33563d Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Thu, 14 Oct 2021 16:35:11 +0200
Subject: Compiler Attributes: remove GCC 5.1 mention

GCC 5.1 is now the minimum version.

Acked-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/linux/compiler_attributes.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index e6ec63403965..87d1e773400c 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -104,7 +104,6 @@
 #define __deprecated
 
 /*
- * Optional: only supported since gcc >= 5.1
  * Optional: not supported by clang
  * Optional: not supported by icc
  *
-- 
cgit v1.2.3


From aba64c7da98330141dcdadd5612f088043a83696 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Wed, 20 Oct 2021 00:48:17 -0700
Subject: bpf: Add verified_insns to bpf_prog_info and fdinfo

This stat is currently printed in the verifier log and not stored
anywhere. To ease consumption of this data, add a field to bpf_prog_aux
so it can be exposed via BPF_OBJ_GET_INFO_BY_FD and fdinfo.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20211020074818.1017682-2-davemarchevsky@fb.com
---
 include/linux/bpf.h            | 1 +
 include/uapi/linux/bpf.h       | 1 +
 kernel/bpf/syscall.c           | 8 ++++++--
 kernel/bpf/verifier.c          | 1 +
 tools/include/uapi/linux/bpf.h | 1 +
 5 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be3102b4554b..31421c74ba08 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -887,6 +887,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
+	u32 verified_insns;
 	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 22e7a3f38b9f..c10820037883 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5620,6 +5620,7 @@ struct bpf_prog_info {
 	__u64 run_time_ns;
 	__u64 run_cnt;
 	__u64 recursion_misses;
+	__u32 verified_insns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4e50c0bfdb7d..5beb321b3b3b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1848,7 +1848,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "prog_id:\t%u\n"
 		   "run_time_ns:\t%llu\n"
 		   "run_cnt:\t%llu\n"
-		   "recursion_misses:\t%llu\n",
+		   "recursion_misses:\t%llu\n"
+		   "verified_insns:\t%u\n",
 		   prog->type,
 		   prog->jited,
 		   prog_tag,
@@ -1856,7 +1857,8 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 		   prog->aux->id,
 		   stats.nsecs,
 		   stats.cnt,
-		   stats.misses);
+		   stats.misses,
+		   prog->aux->verified_insns);
 }
 #endif
 
@@ -3625,6 +3627,8 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	info.run_cnt = stats.cnt;
 	info.recursion_misses = stats.misses;
 
+	info.verified_insns = prog->aux->verified_insns;
+
 	if (!bpf_capable()) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 21cdff35a2f9..c6616e325803 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -14033,6 +14033,7 @@ skip_full_check:
 
 	env->verification_time = ktime_get_ns() - start_time;
 	print_verification_stats(env);
+	env->prog->aux->verified_insns = env->insn_processed;
 
 	if (log->level && bpf_verifier_log_full(log))
 		ret = -ENOSPC;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 22e7a3f38b9f..c10820037883 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5620,6 +5620,7 @@ struct bpf_prog_info {
 	__u64 run_time_ns;
 	__u64 run_cnt;
 	__u64 recursion_misses;
+	__u32 verified_insns;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
-- 
cgit v1.2.3


From 7c00621dcaeea206d7489b3e8b50b1864841ae69 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Thu, 14 Oct 2021 15:23:31 +0200
Subject: compiler_types: mark __compiletime_assert failure as __noreturn

`__compiletime_assert` declares a fake `extern` function
which appears (to the compiler) to be called when the test fails.

Therefore, compilers may emit possibly-uninitialized warnings
in some cases, even if it will be an error anyway (for compilers
supporting the `error` attribute, e.g. GCC and Clang >= 14)
or a link failure (for those that do not, e.g. Clang < 14).

Annotating the fake function as `__noreturn` gives them
the information they need to avoid the warning,
e.g. see https://godbolt.org/z/x1v69jjYY.

Link: https://lore.kernel.org/llvm/202110100514.3h9CI4s0-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 include/linux/compiler_types.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index b6ff83a714ca..ca1a66b8cd2f 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -298,7 +298,13 @@ struct ftrace_likely_data {
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		extern void prefix ## suffix(void) __compiletime_error(msg); \
+		/*							\
+		 * __noreturn is needed to give the compiler enough	\
+		 * information to avoid certain possibly-uninitialized	\
+		 * warnings (regardless of the build failing).		\
+		 */							\
+		__noreturn extern void prefix ## suffix(void)		\
+			__compiletime_error(msg);			\
 		if (!(condition))					\
 			prefix ## suffix();				\
 	} while (0)
-- 
cgit v1.2.3


From 008d3825a805557464c5e75f9eb806a3aa2f5e6d Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 19 Oct 2021 15:53:04 -0500
Subject: fsi: occ: Use a large buffer for responses

Allocate a large buffer for each OCC to handle response data. This
removes memory allocation during an operation, and also allows for
the maximum amount of SBE FFDC.

Previously for the putsram and attn commands, only 32 words would have
been available, and for getsram, only up to the size of the transfer.
SBE FFDC might be up to 8Kb.

The SBE interface expects data to be specified in units of words (4
bytes), defined as OCC_MAX_RESP_WORDS.

This change allows the full FFDC capture to be implemented, where before
it was not available.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Link: https://lore.kernel.org/r/20211019205307.36946-2-eajames@linux.ibm.com
Signed-off-by: Joel Stanley <joel@jms.id.au>
---
 drivers/fsi/fsi-occ.c   | 110 +++++++++++++++++++-----------------------------
 include/linux/fsi-occ.h |   2 +
 2 files changed, 46 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/fsi/fsi-occ.c b/drivers/fsi/fsi-occ.c
index ecf738411fe2..b4302776026d 100644
--- a/drivers/fsi/fsi-occ.c
+++ b/drivers/fsi/fsi-occ.c
@@ -10,6 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/miscdevice.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/fsi-occ.h>
@@ -33,13 +34,6 @@
 
 #define OCC_P10_SRAM_MODE	0x58	/* Normal mode, OCB channel 2 */
 
-/*
- * Assume we don't have much FFDC, if we do we'll overflow and
- * fail the command. This needs to be big enough for simple
- * commands as well.
- */
-#define OCC_SBE_STATUS_WORDS	32
-
 #define OCC_TIMEOUT_MS		1000
 #define OCC_CMD_IN_PRG_WAIT_MS	50
 
@@ -51,6 +45,7 @@ struct occ {
 	char name[32];
 	int idx;
 	u8 sequence_number;
+	void *buffer;
 	enum versions version;
 	struct miscdevice mdev;
 	struct mutex occ_lock;
@@ -241,8 +236,10 @@ static int occ_verify_checksum(struct occ *occ, struct occ_response *resp,
 static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len)
 {
 	u32 data_len = ((len + 7) / 8) * 8;	/* must be multiples of 8 B */
-	size_t cmd_len, resp_len, resp_data_len;
-	__be32 *resp, cmd[6];
+	size_t cmd_len, resp_data_len;
+	size_t resp_len = OCC_MAX_RESP_WORDS;
+	__be32 *resp = occ->buffer;
+	__be32 cmd[6];
 	int idx = 0, rc;
 
 	/*
@@ -269,19 +266,19 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len)
 	cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_OCC_SRAM);
 	cmd[4 + idx] = cpu_to_be32(data_len);
 
-	resp_len = (data_len >> 2) + OCC_SBE_STATUS_WORDS;
-	resp = kzalloc(resp_len << 2, GFP_KERNEL);
-	if (!resp)
-		return -ENOMEM;
-
 	rc = sbefifo_submit(occ->sbefifo, cmd, cmd_len, resp, &resp_len);
 	if (rc)
-		goto free;
+		return rc;
 
 	rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_GET_OCC_SRAM,
 				  resp, resp_len, &resp_len);
-	if (rc)
-		goto free;
+	if (rc > 0) {
+		dev_err(occ->dev, "SRAM read returned failure status: %08x\n",
+			rc);
+		return -EBADMSG;
+	} else if (rc) {
+		return rc;
+	}
 
 	resp_data_len = be32_to_cpu(resp[resp_len - 1]);
 	if (resp_data_len != data_len) {
@@ -292,39 +289,21 @@ static int occ_getsram(struct occ *occ, u32 offset, void *data, ssize_t len)
 		memcpy(data, resp, len);
 	}
 
-free:
-	/* Convert positive SBEI status */
-	if (rc > 0) {
-		dev_err(occ->dev, "SRAM read returned failure status: %08x\n",
-			rc);
-		rc = -EBADMSG;
-	}
-
-	kfree(resp);
 	return rc;
 }
 
 static int occ_putsram(struct occ *occ, const void *data, ssize_t len,
 		       u8 seq_no, u16 checksum)
 {
-	size_t cmd_len, buf_len, resp_len, resp_data_len;
 	u32 data_len = ((len + 7) / 8) * 8;	/* must be multiples of 8 B */
-	__be32 *buf;
+	size_t cmd_len, resp_data_len;
+	size_t resp_len = OCC_MAX_RESP_WORDS;
+	__be32 *buf = occ->buffer;
 	u8 *byte_buf;
 	int idx = 0, rc;
 
 	cmd_len = (occ->version == occ_p10) ? 6 : 5;
-
-	/*
-	 * We use the same buffer for command and response, make
-	 * sure it's big enough
-	 */
-	resp_len = OCC_SBE_STATUS_WORDS;
 	cmd_len += data_len >> 2;
-	buf_len = max(cmd_len, resp_len);
-	buf = kzalloc(buf_len << 2, GFP_KERNEL);
-	if (!buf)
-		return -ENOMEM;
 
 	/*
 	 * Magic sequence to do SBE putsram command. SBE will transfer
@@ -361,12 +340,17 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len,
 
 	rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len);
 	if (rc)
-		goto free;
+		return rc;
 
 	rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM,
 				  buf, resp_len, &resp_len);
-	if (rc)
-		goto free;
+	if (rc > 0) {
+		dev_err(occ->dev, "SRAM write returned failure status: %08x\n",
+			rc);
+		return -EBADMSG;
+	} else if (rc) {
+		return rc;
+	}
 
 	if (resp_len != 1) {
 		dev_err(occ->dev, "SRAM write response length invalid: %zd\n",
@@ -382,27 +366,16 @@ static int occ_putsram(struct occ *occ, const void *data, ssize_t len,
 		}
 	}
 
-free:
-	/* Convert positive SBEI status */
-	if (rc > 0) {
-		dev_err(occ->dev, "SRAM write returned failure status: %08x\n",
-			rc);
-		rc = -EBADMSG;
-	}
-
-	kfree(buf);
 	return rc;
 }
 
 static int occ_trigger_attn(struct occ *occ)
 {
-	__be32 buf[OCC_SBE_STATUS_WORDS];
-	size_t cmd_len, resp_len, resp_data_len;
+	__be32 *buf = occ->buffer;
+	size_t cmd_len, resp_data_len;
+	size_t resp_len = OCC_MAX_RESP_WORDS;
 	int idx = 0, rc;
 
-	BUILD_BUG_ON(OCC_SBE_STATUS_WORDS < 8);
-	resp_len = OCC_SBE_STATUS_WORDS;
-
 	switch (occ->version) {
 	default:
 	case occ_p9:
@@ -427,12 +400,17 @@ static int occ_trigger_attn(struct occ *occ)
 
 	rc = sbefifo_submit(occ->sbefifo, buf, cmd_len, buf, &resp_len);
 	if (rc)
-		goto error;
+		return rc;
 
 	rc = sbefifo_parse_status(occ->sbefifo, SBEFIFO_CMD_PUT_OCC_SRAM,
 				  buf, resp_len, &resp_len);
-	if (rc)
-		goto error;
+	if (rc > 0) {
+		dev_err(occ->dev, "SRAM attn returned failure status: %08x\n",
+			rc);
+		return -EBADMSG;
+	} else if (rc) {
+		return rc;
+	}
 
 	if (resp_len != 1) {
 		dev_err(occ->dev, "SRAM attn response length invalid: %zd\n",
@@ -448,14 +426,6 @@ static int occ_trigger_attn(struct occ *occ)
 		}
 	}
 
- error:
-	/* Convert positive SBEI status */
-	if (rc > 0) {
-		dev_err(occ->dev, "SRAM attn returned failure status: %08x\n",
-			rc);
-		rc = -EBADMSG;
-	}
-
 	return rc;
 }
 
@@ -590,6 +560,11 @@ static int occ_probe(struct platform_device *pdev)
 	if (!occ)
 		return -ENOMEM;
 
+	/* SBE words are always four bytes */
+	occ->buffer = kvmalloc(OCC_MAX_RESP_WORDS * 4, GFP_KERNEL);
+	if (!occ->buffer)
+		return -ENOMEM;
+
 	occ->version = (uintptr_t)of_device_get_match_data(dev);
 	occ->dev = dev;
 	occ->sbefifo = dev->parent;
@@ -625,6 +600,7 @@ static int occ_probe(struct platform_device *pdev)
 	if (rc) {
 		dev_err(dev, "failed to register miscdevice: %d\n", rc);
 		ida_simple_remove(&occ_ida, occ->idx);
+		kvfree(occ->buffer);
 		return rc;
 	}
 
@@ -640,6 +616,8 @@ static int occ_remove(struct platform_device *pdev)
 {
 	struct occ *occ = platform_get_drvdata(pdev);
 
+	kvfree(occ->buffer);
+
 	misc_deregister(&occ->mdev);
 
 	device_for_each_child(&pdev->dev, NULL, occ_unregister_child);
diff --git a/include/linux/fsi-occ.h b/include/linux/fsi-occ.h
index d4cdc2aa6e33..7ee3dbd7f4b3 100644
--- a/include/linux/fsi-occ.h
+++ b/include/linux/fsi-occ.h
@@ -19,6 +19,8 @@ struct device;
 #define OCC_RESP_CRIT_OCB		0xE3
 #define OCC_RESP_CRIT_HW		0xE4
 
+#define OCC_MAX_RESP_WORDS		2048
+
 int fsi_occ_submit(struct device *dev, const void *request, size_t req_len,
 		   void *response, size_t *resp_len);
 
-- 
cgit v1.2.3


From dc0fd0acb6e0e8025a0a43ada54513b216254fac Mon Sep 17 00:00:00 2001
From: Maximilian Luz <luzmaximilian@gmail.com>
Date: Thu, 21 Oct 2021 15:09:03 +0200
Subject: HID: surface-hid: Use correct event registry for managing HID events

Until now, we have only ever seen the REG-category registry being used
on devices addressed with target ID 2. In fact, we have only ever seen
Surface Aggregator Module (SAM) HID devices with target ID 2. For those
devices, the registry also has to be addressed with target ID 2.

Some devices, like the new Surface Laptop Studio, however, address their
HID devices on target ID 1. As a result of this, any target ID 2
commands time out. This includes event management commands addressed to
the target ID 2 REG-category registry. For these devices, the registry
has to be addressed via target ID 1 instead.

We therefore assume that the target ID of the registry to be used
depends on the target ID of the respective device. Implement this
accordingly.

Note that we currently allow the surface HID driver to only load against
devices with target ID 2, so these timeouts are not happening (yet).
This is just a preparation step before we allow the driver to load
against all target IDs.

Cc: stable@vger.kernel.org # 5.14+
Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
Acked-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211021130904.862610-3-luzmaximilian@gmail.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/hid/surface-hid/surface_hid.c         | 2 +-
 include/linux/surface_aggregator/controller.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/surface-hid/surface_hid.c b/drivers/hid/surface-hid/surface_hid.c
index a3a70e4f3f6c..daa452367c0b 100644
--- a/drivers/hid/surface-hid/surface_hid.c
+++ b/drivers/hid/surface-hid/surface_hid.c
@@ -209,7 +209,7 @@ static int surface_hid_probe(struct ssam_device *sdev)
 
 	shid->notif.base.priority = 1;
 	shid->notif.base.fn = ssam_hid_event_fn;
-	shid->notif.event.reg = SSAM_EVENT_REGISTRY_REG;
+	shid->notif.event.reg = SSAM_EVENT_REGISTRY_REG(sdev->uid.target);
 	shid->notif.event.id.target_category = sdev->uid.category;
 	shid->notif.event.id.instance = sdev->uid.instance;
 	shid->notif.event.mask = SSAM_EVENT_MASK_STRICT;
diff --git a/include/linux/surface_aggregator/controller.h b/include/linux/surface_aggregator/controller.h
index 068e1982ad37..74bfdffaf7b0 100644
--- a/include/linux/surface_aggregator/controller.h
+++ b/include/linux/surface_aggregator/controller.h
@@ -792,8 +792,8 @@ enum ssam_event_mask {
 #define SSAM_EVENT_REGISTRY_KIP	\
 	SSAM_EVENT_REGISTRY(SSAM_SSH_TC_KIP, 0x02, 0x27, 0x28)
 
-#define SSAM_EVENT_REGISTRY_REG \
-	SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, 0x02, 0x01, 0x02)
+#define SSAM_EVENT_REGISTRY_REG(tid)\
+	SSAM_EVENT_REGISTRY(SSAM_SSH_TC_REG, tid, 0x01, 0x02)
 
 /**
  * enum ssam_event_notifier_flags - Flags for event notifiers.
-- 
cgit v1.2.3


From dd66f56caea6bb1a3703fb3bfc3106444d05a930 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 21 Oct 2021 08:55:24 +0200
Subject: dma-buf: fix kerneldoc for renamed members
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Those members where renamed, update the kerneldoc as well.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Sumit Semwal <sumit.semwal@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211021141945.84023-1-christian.koenig@amd.com
---
 include/linux/dma-buf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 02c2eb874da6..9807aef33685 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -433,8 +433,8 @@ struct dma_buf {
 	/** @poll: for userspace poll support */
 	wait_queue_head_t poll;
 
-	/** @cb_excl: for userspace poll support */
-	/** @cb_shared: for userspace poll support */
+	/** @cb_in: for userspace poll support */
+	/** @cb_out: for userspace poll support */
 	struct dma_buf_poll_cb_t {
 		struct dma_fence_cb cb;
 		wait_queue_head_t *poll;
-- 
cgit v1.2.3


From 48d09e97876bed4bcc503d528bdba8c907e43cb3 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Thu, 21 Oct 2021 08:58:34 -0700
Subject: firmware_loader: formalize built-in firmware API

Formalize the built-in firmware with a proper API. This can later
be used by other callers where all they need is built-in firmware.

We export the firmware_request_builtin() call for now only
under the TEST_FIRMWARE symbol namespace as there are no
direct modular users for it. If they pop up they are free
to export it generally. Built-in code always gets access to
the callers and we'll demonstrate a hidden user which has been
lurking in the kernel for a while and the reason why using a
proper API was better long term.

Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20211021155843.1969401-2-mcgrof@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/builtin/Makefile |   6 +-
 drivers/base/firmware_loader/builtin/main.c   | 100 ++++++++++++++++++++++++++
 drivers/base/firmware_loader/firmware.h       |  17 +++++
 drivers/base/firmware_loader/main.c           |  78 +-------------------
 include/linux/firmware.h                      |  15 ++++
 5 files changed, 137 insertions(+), 79 deletions(-)
 create mode 100644 drivers/base/firmware_loader/builtin/main.c

(limited to 'include/linux')

diff --git a/drivers/base/firmware_loader/builtin/Makefile b/drivers/base/firmware_loader/builtin/Makefile
index 101754ad48d9..eb4be452062a 100644
--- a/drivers/base/firmware_loader/builtin/Makefile
+++ b/drivers/base/firmware_loader/builtin/Makefile
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-y  += main.o
 
 # Create $(fwdir) from $(CONFIG_EXTRA_FIRMWARE_DIR) -- if it doesn't have a
 # leading /, it's relative to $(srctree).
 fwdir := $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE_DIR))
 fwdir := $(addprefix $(srctree)/,$(filter-out /%,$(fwdir)))$(filter /%,$(fwdir))
 
-obj-y  := $(addsuffix .gen.o, $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)))
+firmware  := $(addsuffix .gen.o, $(subst $(quote),,$(CONFIG_EXTRA_FIRMWARE)))
+obj-y += $(firmware)
 
 FWNAME    = $(patsubst $(obj)/%.gen.S,%,$@)
 FWSTR     = $(subst $(comma),_,$(subst /,_,$(subst .,_,$(subst -,_,$(FWNAME)))))
@@ -34,7 +36,7 @@ $(obj)/%.gen.S: FORCE
 	$(call filechk,fwbin)
 
 # The .o files depend on the binaries directly; the .S files don't.
-$(addprefix $(obj)/, $(obj-y)): $(obj)/%.gen.o: $(fwdir)/%
+$(addprefix $(obj)/, $(firmware)): $(obj)/%.gen.o: $(fwdir)/%
 
 targets := $(patsubst $(obj)/%,%, \
                                 $(shell find $(obj) -name \*.gen.S 2>/dev/null))
diff --git a/drivers/base/firmware_loader/builtin/main.c b/drivers/base/firmware_loader/builtin/main.c
new file mode 100644
index 000000000000..d85626b2fdf5
--- /dev/null
+++ b/drivers/base/firmware_loader/builtin/main.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Builtin firmware support */
+
+#include <linux/firmware.h>
+#include "../firmware.h"
+
+/* Only if FW_LOADER=y */
+#ifdef CONFIG_FW_LOADER
+
+extern struct builtin_fw __start_builtin_fw[];
+extern struct builtin_fw __end_builtin_fw[];
+
+static bool fw_copy_to_prealloc_buf(struct firmware *fw,
+				    void *buf, size_t size)
+{
+	if (!buf)
+		return true;
+	if (size < fw->size)
+		return false;
+	memcpy(buf, fw->data, fw->size);
+	return true;
+}
+
+/**
+ * firmware_request_builtin() - load builtin firmware
+ * @fw: pointer to firmware struct
+ * @name: name of firmware file
+ *
+ * Some use cases in the kernel have a requirement so that no memory allocator
+ * is involved as these calls take place early in boot process. An example is
+ * the x86 CPU microcode loader. In these cases all the caller wants is to see
+ * if the firmware was built-in and if so use it right away. This can be used
+ * for such cases.
+ *
+ * This looks for the firmware in the built-in kernel. Only if the kernel was
+ * built-in with the firmware you are looking for will this return successfully.
+ *
+ * Callers of this API do not need to use release_firmware() as the pointer to
+ * the firmware is expected to be provided locally on the stack of the caller.
+ **/
+bool firmware_request_builtin(struct firmware *fw, const char *name)
+{
+	struct builtin_fw *b_fw;
+
+	if (!fw)
+		return false;
+
+	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
+		if (strcmp(name, b_fw->name) == 0) {
+			fw->size = b_fw->size;
+			fw->data = b_fw->data;
+			return true;
+		}
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_NS_GPL(firmware_request_builtin, TEST_FIRMWARE);
+
+/**
+ * firmware_request_builtin_buf() - load builtin firmware into optional buffer
+ * @fw: pointer to firmware struct
+ * @name: name of firmware file
+ * @buf: If set this lets you use a pre-allocated buffer so that the built-in
+ *	firmware into is copied into. This field can be NULL. It is used by
+ *	callers such as request_firmware_into_buf() and
+ *	request_partial_firmware_into_buf()
+ * @size: if buf was provided, the max size of the allocated buffer available.
+ *	If the built-in firmware does not fit into the pre-allocated @buf this
+ *	call will fail.
+ *
+ * This looks for the firmware in the built-in kernel. Only if the kernel was
+ * built-in with the firmware you are looking for will this call possibly
+ * succeed. If you passed a @buf the firmware will be copied into it *iff* the
+ * built-in firmware fits into the pre-allocated buffer size specified in
+ * @size.
+ *
+ * This caller is to be used internally by the firmware_loader only.
+ **/
+bool firmware_request_builtin_buf(struct firmware *fw, const char *name,
+				  void *buf, size_t size)
+{
+	if (!firmware_request_builtin(fw, name))
+		return false;
+
+	return fw_copy_to_prealloc_buf(fw, buf, size);
+}
+
+bool firmware_is_builtin(const struct firmware *fw)
+{
+	struct builtin_fw *b_fw;
+
+	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++)
+		if (fw->data == b_fw->data)
+			return true;
+
+	return false;
+}
+
+#endif
diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index a3014e9e2c85..2889f446ad41 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -151,6 +151,23 @@ static inline void fw_state_done(struct fw_priv *fw_priv)
 
 int assign_fw(struct firmware *fw, struct device *device);
 
+#ifdef CONFIG_FW_LOADER
+bool firmware_is_builtin(const struct firmware *fw);
+bool firmware_request_builtin_buf(struct firmware *fw, const char *name,
+				  void *buf, size_t size);
+#else /* module case */
+static inline bool firmware_is_builtin(const struct firmware *fw)
+{
+	return false;
+}
+static inline bool firmware_request_builtin_buf(struct firmware *fw,
+						const char *name,
+						void *buf, size_t size)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_FW_LOADER_PAGED_BUF
 void fw_free_paged_buf(struct fw_priv *fw_priv);
 int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed);
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index d95b5fe5f700..94d1789a233e 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -93,82 +93,6 @@ DEFINE_MUTEX(fw_lock);
 
 static struct firmware_cache fw_cache;
 
-/* Builtin firmware support */
-
-#ifdef CONFIG_FW_LOADER
-
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
-
-static bool fw_copy_to_prealloc_buf(struct firmware *fw,
-				    void *buf, size_t size)
-{
-	if (!buf)
-		return true;
-	if (size < fw->size)
-		return false;
-	memcpy(buf, fw->data, fw->size);
-	return true;
-}
-
-static bool firmware_request_builtin(struct firmware *fw, const char *name)
-{
-	struct builtin_fw *b_fw;
-
-	if (!fw)
-		return false;
-
-	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++) {
-		if (strcmp(name, b_fw->name) == 0) {
-			fw->size = b_fw->size;
-			fw->data = b_fw->data;
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static bool firmware_request_builtin_buf(struct firmware *fw, const char *name,
-					 void *buf, size_t size)
-{
-	if (!firmware_request_builtin(fw, name))
-		return false;
-	return fw_copy_to_prealloc_buf(fw, buf, size);
-}
-
-static bool fw_is_builtin_firmware(const struct firmware *fw)
-{
-	struct builtin_fw *b_fw;
-
-	for (b_fw = __start_builtin_fw; b_fw != __end_builtin_fw; b_fw++)
-		if (fw->data == b_fw->data)
-			return true;
-
-	return false;
-}
-
-#else /* Module case - no builtin firmware support */
-
-static inline bool firmware_request_builtin(struct firmware *fw,
-					    const char *name)
-{
-	return false;
-}
-
-static inline bool firmware_request_builtin_buf(struct firmware *fw,
-						const char *name, void *buf,
-						size_t size)
-{
-	return false;
-}
-
-static inline bool fw_is_builtin_firmware(const struct firmware *fw)
-{
-	return false;
-}
-#endif
-
 static void fw_state_init(struct fw_priv *fw_priv)
 {
 	struct fw_state *fw_st = &fw_priv->fw_st;
@@ -1068,7 +992,7 @@ EXPORT_SYMBOL(request_partial_firmware_into_buf);
 void release_firmware(const struct firmware *fw)
 {
 	if (fw) {
-		if (!fw_is_builtin_firmware(fw))
+		if (!firmware_is_builtin(fw))
 			firmware_free_data(fw);
 		kfree(fw);
 	}
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 25109192cebe..d743a8d1c2fe 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -20,12 +20,19 @@ struct firmware {
 struct module;
 struct device;
 
+/*
+ * Built-in firmware functionality is only available if FW_LOADER=y, but not
+ * FW_LOADER=m
+ */
+#ifdef CONFIG_FW_LOADER
 struct builtin_fw {
 	char *name;
 	void *data;
 	unsigned long size;
 };
 
+bool firmware_request_builtin(struct firmware *fw, const char *name);
+
 /* We have to play tricks here much like stringify() to get the
    __COUNTER__ macro to be expanded as we want it */
 #define __fw_concat1(x, y) x##y
@@ -38,6 +45,14 @@ struct builtin_fw {
 	static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \
 	__used __section(".builtin_fw") = { name, blob, size }
 
+#else
+static inline bool firmware_request_builtin(struct firmware *fw,
+					    const char *name)
+{
+	return false;
+}
+#endif
+
 #if defined(CONFIG_FW_LOADER) || (defined(CONFIG_FW_LOADER_MODULE) && defined(MODULE))
 int request_firmware(const struct firmware **fw, const char *name,
 		     struct device *device);
-- 
cgit v1.2.3


From e520ecf4546fdaa7169ba75a35d24e2c53403a6e Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Thu, 21 Oct 2021 08:58:35 -0700
Subject: firmware_loader: remove old DECLARE_BUILTIN_FIRMWARE()

This was never used upstream. Time to get rid of it. We
don't carry around unused baggage.

Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20211021155843.1969401-3-mcgrof@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/firmware.h | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index d743a8d1c2fe..34e8d5844fa0 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -32,19 +32,6 @@ struct builtin_fw {
 };
 
 bool firmware_request_builtin(struct firmware *fw, const char *name);
-
-/* We have to play tricks here much like stringify() to get the
-   __COUNTER__ macro to be expanded as we want it */
-#define __fw_concat1(x, y) x##y
-#define __fw_concat(x, y) __fw_concat1(x, y)
-
-#define DECLARE_BUILTIN_FIRMWARE(name, blob)				     \
-	DECLARE_BUILTIN_FIRMWARE_SIZE(name, &(blob), sizeof(blob))
-
-#define DECLARE_BUILTIN_FIRMWARE_SIZE(name, blob, size)			     \
-	static const struct builtin_fw __fw_concat(__builtin_fw,__COUNTER__) \
-	__used __section(".builtin_fw") = { name, blob, size }
-
 #else
 static inline bool firmware_request_builtin(struct firmware *fw,
 					    const char *name)
-- 
cgit v1.2.3


From e2e2c0f20f321b0ec36e8bde467259c0adf1fecb Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Thu, 21 Oct 2021 08:58:37 -0700
Subject: firmware_loader: move struct builtin_fw to the only place used

Now that x86 doesn't abuse picking at internals to the firmware
loader move out the built-in firmware struct to its only user.

Reviewed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20211021155843.1969401-5-mcgrof@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/builtin/main.c | 6 ++++++
 include/linux/firmware.h                    | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_loader/builtin/main.c b/drivers/base/firmware_loader/builtin/main.c
index d85626b2fdf5..a065c3150897 100644
--- a/drivers/base/firmware_loader/builtin/main.c
+++ b/drivers/base/firmware_loader/builtin/main.c
@@ -7,6 +7,12 @@
 /* Only if FW_LOADER=y */
 #ifdef CONFIG_FW_LOADER
 
+struct builtin_fw {
+	char *name;
+	void *data;
+	unsigned long size;
+};
+
 extern struct builtin_fw __start_builtin_fw[];
 extern struct builtin_fw __end_builtin_fw[];
 
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 34e8d5844fa0..3b057dfc8284 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -25,12 +25,6 @@ struct device;
  * FW_LOADER=m
  */
 #ifdef CONFIG_FW_LOADER
-struct builtin_fw {
-	char *name;
-	void *data;
-	unsigned long size;
-};
-
 bool firmware_request_builtin(struct firmware *fw, const char *name);
 #else
 static inline bool firmware_request_builtin(struct firmware *fw,
-- 
cgit v1.2.3


From 9208d414975895f69e9aca49153060ddd31b18d0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Oct 2021 08:06:01 +0200
Subject: block: add a ->get_unique_id method

Add a method to query unique IDs from block devices.  It will be used to
remove code that deeply pokes into SCSI internals in the NFS server.
The implementation in the sd driver itself is also much nicer as it can
use the cached VPD page instead of always sending a command as the
current NFS code does.

For now the interface is kept very minimal but could be easily
extended when other users like a block-layer sysfs interface for
uniquue IDs shows up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20211021060607.264371-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f72ccb2829db..0d5826066e16 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1158,6 +1158,14 @@ static inline void blk_crypto_unregister(struct request_queue *q) { }
 
 #endif /* CONFIG_BLK_INLINE_ENCRYPTION */
 
+enum blk_unique_id {
+	/* these match the Designator Types specified in SPC */
+	BLK_UID_T10	= 1,
+	BLK_UID_EUI64	= 2,
+	BLK_UID_NAA	= 3,
+};
+
+#define NFL4_UFLG_MASK			0x0000003F
 
 struct block_device_operations {
 	void (*submit_bio)(struct bio *bio);
@@ -1176,6 +1184,9 @@ struct block_device_operations {
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
 	char *(*devnode)(struct gendisk *disk, umode_t *mode);
+	/* returns the length of the identifier or a negative errno: */
+	int (*get_unique_id)(struct gendisk *disk, u8 id[16],
+			enum blk_unique_id id_type);
 	struct module *owner;
 	const struct pr_ops *pr_ops;
 
-- 
cgit v1.2.3


From 4abafdc4360d993104c2b2f85943938a0c6ad025 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Oct 2021 08:06:06 +0200
Subject: block: remove the initialize_rq_fn blk_mq_ops method

Entirely unused now.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20211021060607.264371-7-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c       | 9 +--------
 include/linux/blk-mq.h | 5 -----
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index fd389a16013c..5ffe05b1d17c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -606,16 +606,9 @@ EXPORT_SYMBOL(blk_get_queue);
 struct request *blk_get_request(struct request_queue *q, unsigned int op,
 				blk_mq_req_flags_t flags)
 {
-	struct request *req;
-
 	WARN_ON_ONCE(op & REQ_NOWAIT);
 	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
-
-	req = blk_mq_alloc_request(q, op, flags);
-	if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
-		q->mq_ops->initialize_rq_fn(req);
-
-	return req;
+	return blk_mq_alloc_request(q, op, flags);
 }
 EXPORT_SYMBOL(blk_get_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b4039fdf1b04..ebc45cf0450b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -566,11 +566,6 @@ struct blk_mq_ops {
 	void (*exit_request)(struct blk_mq_tag_set *set, struct request *,
 			     unsigned int);
 
-	/**
-	 * @initialize_rq_fn: Called from inside blk_get_request().
-	 */
-	void (*initialize_rq_fn)(struct request *rq);
-
 	/**
 	 * @cleanup_rq: Called before freeing one request which isn't completed
 	 * yet, and usually for freeing the driver private data.
-- 
cgit v1.2.3


From 4845012eb5b4e56cadb5f484cb55dd4fd9d1df80 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Oct 2021 08:06:07 +0200
Subject: block: remove QUEUE_FLAG_SCSI_PASSTHROUGH

Export scsi_device_from_queue for use with pktcdvd and use that instead
of the otherwise unused QUEUE_FLAG_SCSI_PASSTHROUGH queue flag.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Link: https://lore.kernel.org/r/20211021060607.264371-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-debugfs.c   | 1 -
 drivers/block/pktcdvd.c  | 5 ++++-
 drivers/scsi/scsi_lib.c  | 8 ++++++++
 drivers/scsi/scsi_scan.c | 1 -
 include/linux/blkdev.h   | 3 ---
 5 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 68ca5d21cda7..a317f05de466 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -124,7 +124,6 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(STATS),
 	QUEUE_FLAG_NAME(POLL_STATS),
 	QUEUE_FLAG_NAME(REGISTERED),
-	QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
 	QUEUE_FLAG_NAME(QUIESCED),
 	QUEUE_FLAG_NAME(PCI_P2PDMA),
 	QUEUE_FLAG_NAME(ZONE_RESETALL),
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ea2262ec76d2..cacf64eedad8 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2536,6 +2536,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	int i;
 	char b[BDEVNAME_SIZE];
 	struct block_device *bdev;
+	struct scsi_device *sdev;
 
 	if (pd->pkt_dev == dev) {
 		pkt_err(pd, "recursive setup not allowed\n");
@@ -2559,10 +2560,12 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
 	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
 	if (IS_ERR(bdev))
 		return PTR_ERR(bdev);
-	if (!blk_queue_scsi_passthrough(bdev_get_queue(bdev))) {
+	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
+	if (!sdev) {
 		blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
 		return -EINVAL;
 	}
+	put_device(&sdev->sdev_gendev);
 
 	/* This is safe, since we have a reference from open(). */
 	__module_get(THIS_MODULE);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a0f801fc8943..9823b65d1536 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1967,6 +1967,14 @@ struct scsi_device *scsi_device_from_queue(struct request_queue *q)
 
 	return sdev;
 }
+/*
+ * pktcdvd should have been integrated into the SCSI layers, but for historical
+ * reasons like the old IDE driver it isn't.  This export allows it to safely
+ * probe if a given device is a SCSI one and only attach to that.
+ */
+#ifdef CONFIG_CDROM_PKTCDVD_MODULE
+EXPORT_SYMBOL_GPL(scsi_device_from_queue);
+#endif
 
 /**
  * scsi_block_requests - Utility function used by low-level drivers to prevent
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index fe22191522a3..2808c0cb5711 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -280,7 +280,6 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	sdev->request_queue = q;
 	q->queuedata = sdev;
 	__scsi_init_queue(sdev->host, q);
-	blk_queue_flag_set(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
 	WARN_ON_ONCE(!blk_get_queue(q));
 
 	depth = sdev->host->cmd_per_lun ?: 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0d5826066e16..1ad30f85d30e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -357,7 +357,6 @@ struct request_queue {
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_POLL_STATS	21	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
-#define QUEUE_FLAG_SCSI_PASSTHROUGH 23	/* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 #define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
@@ -391,8 +390,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
-#define blk_queue_scsi_passthrough(q)	\
-	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
 #define blk_queue_pci_p2pdma(q)	\
 	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME
-- 
cgit v1.2.3


From 70164eb6ccb76ab679b016b4b60123bf4ec6c162 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Oct 2021 08:25:25 +0200
Subject: block: remove __sync_blockdev

Instead offer a new sync_blockdev_nowait helper for the !wait case.
This new helper is exported as it will grow modular callers in a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211019062530.2174626-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c           | 11 ++++++-----
 fs/internal.h          |  5 -----
 fs/sync.c              |  7 ++++---
 include/linux/blkdev.h |  5 +++++
 4 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index 7e6156203a71..9a33c414f450 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -185,14 +185,13 @@ int sb_min_blocksize(struct super_block *sb, int size)
 
 EXPORT_SYMBOL(sb_min_blocksize);
 
-int __sync_blockdev(struct block_device *bdev, int wait)
+int sync_blockdev_nowait(struct block_device *bdev)
 {
 	if (!bdev)
 		return 0;
-	if (!wait)
-		return filemap_flush(bdev->bd_inode->i_mapping);
-	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+	return filemap_flush(bdev->bd_inode->i_mapping);
 }
+EXPORT_SYMBOL_GPL(sync_blockdev_nowait);
 
 /*
  * Write out and wait upon all the dirty data associated with a block
@@ -200,7 +199,9 @@ int __sync_blockdev(struct block_device *bdev, int wait)
  */
 int sync_blockdev(struct block_device *bdev)
 {
-	return __sync_blockdev(bdev, 1);
+	if (!bdev)
+		return 0;
+	return filemap_write_and_wait(bdev->bd_inode->i_mapping);
 }
 EXPORT_SYMBOL(sync_blockdev);
 
diff --git a/fs/internal.h b/fs/internal.h
index 3cd065c8a66b..b5caa16f4645 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -23,7 +23,6 @@ struct pipe_inode_info;
 #ifdef CONFIG_BLOCK
 extern void __init bdev_cache_init(void);
 
-extern int __sync_blockdev(struct block_device *bdev, int wait);
 void iterate_bdevs(void (*)(struct block_device *, void *), void *);
 void emergency_thaw_bdev(struct super_block *sb);
 #else
@@ -31,10 +30,6 @@ static inline void bdev_cache_init(void)
 {
 }
 
-static inline int __sync_blockdev(struct block_device *bdev, int wait)
-{
-	return 0;
-}
 static inline void iterate_bdevs(void (*f)(struct block_device *, void *),
 		void *arg)
 {
diff --git a/fs/sync.c b/fs/sync.c
index 0d6cdc507cb9..a621089eb07e 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -3,6 +3,7 @@
  * High-level sync()-related operations
  */
 
+#include <linux/blkdev.h>
 #include <linux/kernel.h>
 #include <linux/file.h>
 #include <linux/fs.h>
@@ -45,7 +46,7 @@ int sync_filesystem(struct super_block *sb)
 	/*
 	 * Do the filesystem syncing work.  For simple filesystems
 	 * writeback_inodes_sb(sb) just dirties buffers with inodes so we have
-	 * to submit I/O for these buffers via __sync_blockdev().  This also
+	 * to submit I/O for these buffers via sync_blockdev().  This also
 	 * speeds up the wait == 1 case since in that case write_inode()
 	 * methods call sync_dirty_buffer() and thus effectively write one block
 	 * at a time.
@@ -53,14 +54,14 @@ int sync_filesystem(struct super_block *sb)
 	writeback_inodes_sb(sb, WB_REASON_SYNC);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, 0);
-	ret = __sync_blockdev(sb->s_bdev, 0);
+	ret = sync_blockdev_nowait(sb->s_bdev);
 	if (ret < 0)
 		return ret;
 
 	sync_inodes_sb(sb);
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, 1);
-	return __sync_blockdev(sb->s_bdev, 1);
+	return sync_blockdev(sb->s_bdev);
 }
 EXPORT_SYMBOL(sync_filesystem);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f72ccb2829db..4b5a6bbdacd0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1266,6 +1266,7 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
 #ifdef CONFIG_BLOCK
 void invalidate_bdev(struct block_device *bdev);
 int sync_blockdev(struct block_device *bdev);
+int sync_blockdev_nowait(struct block_device *bdev);
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {
@@ -1274,6 +1275,10 @@ static inline int sync_blockdev(struct block_device *bdev)
 {
 	return 0;
 }
+static inline int sync_blockdev_nowait(struct block_device *bdev)
+{
+	return 0;
+}
 #endif
 int fsync_bdev(struct block_device *bdev);
 
-- 
cgit v1.2.3


From 1e03a36bdff4709c1bbf0f57f60ae3f776d51adf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 Oct 2021 08:25:30 +0200
Subject: block: simplify the block device syncing code

Get rid of the indirections and just provide a sync_bdevs
helper for the generic sync code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211019062530.2174626-8-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bdev.c           | 17 ++++++++++++++---
 fs/internal.h          |  6 ------
 fs/sync.c              | 23 ++++-------------------
 include/linux/blkdev.h |  4 ++++
 4 files changed, 22 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/block/bdev.c b/block/bdev.c
index 9a33c414f450..b4dab2fb6a74 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1021,7 +1021,7 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
 }
 EXPORT_SYMBOL(__invalidate_device);
 
-void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+void sync_bdevs(bool wait)
 {
 	struct inode *inode, *old_inode = NULL;
 
@@ -1052,8 +1052,19 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 		bdev = I_BDEV(inode);
 
 		mutex_lock(&bdev->bd_disk->open_mutex);
-		if (bdev->bd_openers)
-			func(bdev, arg);
+		if (!bdev->bd_openers) {
+			; /* skip */
+		} else if (wait) {
+			/*
+			 * We keep the error status of individual mapping so
+			 * that applications can catch the writeback error using
+			 * fsync(2). See filemap_fdatawait_keep_errors() for
+			 * details.
+			 */
+			filemap_fdatawait_keep_errors(inode->i_mapping);
+		} else {
+			filemap_fdatawrite(inode->i_mapping);
+		}
 		mutex_unlock(&bdev->bd_disk->open_mutex);
 
 		spin_lock(&blockdev_superblock->s_inode_list_lock);
diff --git a/fs/internal.h b/fs/internal.h
index b5caa16f4645..cdd83d4899bb 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -23,17 +23,11 @@ struct pipe_inode_info;
 #ifdef CONFIG_BLOCK
 extern void __init bdev_cache_init(void);
 
-void iterate_bdevs(void (*)(struct block_device *, void *), void *);
 void emergency_thaw_bdev(struct super_block *sb);
 #else
 static inline void bdev_cache_init(void)
 {
 }
-
-static inline void iterate_bdevs(void (*f)(struct block_device *, void *),
-		void *arg)
-{
-}
 static inline int emergency_thaw_bdev(struct super_block *sb)
 {
 	return 0;
diff --git a/fs/sync.c b/fs/sync.c
index a621089eb07e..3ce8e2137f31 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -78,21 +78,6 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg)
 		sb->s_op->sync_fs(sb, *(int *)arg);
 }
 
-static void fdatawrite_one_bdev(struct block_device *bdev, void *arg)
-{
-	filemap_fdatawrite(bdev->bd_inode->i_mapping);
-}
-
-static void fdatawait_one_bdev(struct block_device *bdev, void *arg)
-{
-	/*
-	 * We keep the error status of individual mapping so that
-	 * applications can catch the writeback error using fsync(2).
-	 * See filemap_fdatawait_keep_errors() for details.
-	 */
-	filemap_fdatawait_keep_errors(bdev->bd_inode->i_mapping);
-}
-
 /*
  * Sync everything. We start by waking flusher threads so that most of
  * writeback runs on all devices in parallel. Then we sync all inodes reliably
@@ -111,8 +96,8 @@ void ksys_sync(void)
 	iterate_supers(sync_inodes_one_sb, NULL);
 	iterate_supers(sync_fs_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &wait);
-	iterate_bdevs(fdatawrite_one_bdev, NULL);
-	iterate_bdevs(fdatawait_one_bdev, NULL);
+	sync_bdevs(false);
+	sync_bdevs(true);
 	if (unlikely(laptop_mode))
 		laptop_sync_completion();
 }
@@ -133,10 +118,10 @@ static void do_sync_work(struct work_struct *work)
 	 */
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(fdatawrite_one_bdev, NULL);
+	sync_bdevs(false);
 	iterate_supers(sync_inodes_one_sb, &nowait);
 	iterate_supers(sync_fs_one_sb, &nowait);
-	iterate_bdevs(fdatawrite_one_bdev, NULL);
+	sync_bdevs(false);
 	printk("Emergency Sync complete\n");
 	kfree(work);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4b5a6bbdacd0..09fdf7018b7f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1267,6 +1267,7 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart,
 void invalidate_bdev(struct block_device *bdev);
 int sync_blockdev(struct block_device *bdev);
 int sync_blockdev_nowait(struct block_device *bdev);
+void sync_bdevs(bool wait);
 #else
 static inline void invalidate_bdev(struct block_device *bdev)
 {
@@ -1279,6 +1280,9 @@ static inline int sync_blockdev_nowait(struct block_device *bdev)
 {
 	return 0;
 }
+static inline void sync_bdevs(bool wait)
+{
+}
 #endif
 int fsync_bdev(struct block_device *bdev);
 
-- 
cgit v1.2.3


From fadb7ff1a6c2c565af56b4aacdd086b067eed440 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 14 Oct 2021 15:25:53 +0100
Subject: bpf: Prevent increasing bpf_jit_limit above max

Restrict bpf_jit_limit to the maximum supported by the arch's JIT.

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211014142554.53120-4-lmb@cloudflare.com
---
 include/linux/filter.h     | 1 +
 kernel/bpf/core.c          | 4 +++-
 net/core/sysctl_net_core.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4a93c12543ee..ef03ff34234d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -1051,6 +1051,7 @@ extern int bpf_jit_enable;
 extern int bpf_jit_harden;
 extern int bpf_jit_kallsyms;
 extern long bpf_jit_limit;
+extern long bpf_jit_limit_max;
 
 typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index d6b7dfdd8066..c1e7eb3f1876 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -524,6 +524,7 @@ int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 int bpf_jit_harden   __read_mostly;
 long bpf_jit_limit   __read_mostly;
+long bpf_jit_limit_max __read_mostly;
 
 static void
 bpf_prog_ksym_set_addr(struct bpf_prog *prog)
@@ -817,7 +818,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void)
 static int __init bpf_jit_charge_init(void)
 {
 	/* Only used as heuristic here to derive limit. */
-	bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2,
+	bpf_jit_limit_max = bpf_jit_alloc_exec_limit();
+	bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2,
 					    PAGE_SIZE), LONG_MAX);
 	return 0;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c8496c1142c9..5f88526ad61c 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0600,
 		.proc_handler	= proc_dolongvec_minmax_bpf_restricted,
 		.extra1		= &long_one,
-		.extra2		= &long_max,
+		.extra2		= &bpf_jit_limit_max,
 	},
 #endif
 	{
-- 
cgit v1.2.3


From 599593a82fc57f5e9453c8ef7420df3206934a0c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 22 Oct 2021 19:35:45 -0600
Subject: sched: make task_struct->plug always defined
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_BLOCK isn't set, then it's an empty struct anyway. Just make
it generally available, so we don't break the compile:

kernel/sched/core.c: In function ‘sched_submit_work’:
kernel/sched/core.c:6346:35: error: ‘struct task_struct’ has no member named ‘plug’
 6346 |                 blk_flush_plug(tsk->plug, true);
      |                                   ^~
kernel/sched/core.c: In function ‘io_schedule_prepare’:
kernel/sched/core.c:8357:20: error: ‘struct task_struct’ has no member named ‘plug’
 8357 |         if (current->plug)
      |                    ^~
kernel/sched/core.c:8358:39: error: ‘struct task_struct’ has no member named ‘plug’
 8358 |                 blk_flush_plug(current->plug, true);
      |                                       ^~

Reported-by: Nathan Chancellor <nathan@kernel.org>
Fixes: 008f75a20e70 ("block: cleanup the flush plug helpers")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sched.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a927ddec64..e0454e60fe8f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1160,10 +1160,8 @@ struct task_struct {
 	/* Stacked block device info: */
 	struct bio_list			*bio_list;
 
-#ifdef CONFIG_BLOCK
 	/* Stack plugging: */
 	struct blk_plug			*plug;
-#endif
 
 	/* VM state: */
 	struct reclaim_state		*reclaim_state;
-- 
cgit v1.2.3


From 0ebecb2644c834d8a69f07c5d44a130835950171 Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Fri, 22 Oct 2021 11:59:12 -0400
Subject: net: mdio: Add helper functions for accessing MDIO devices

This adds some helpers for accessing non-phy MDIO devices. They are
analogous to phy_(read|write|modify), except that they take an mdio_device
and not a phy_device.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index f622888a4ba8..9f3587a61e14 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -352,6 +352,30 @@ int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
 int mdiobus_modify_changed(struct mii_bus *bus, int addr, u32 regnum,
 			   u16 mask, u16 set);
 
+static inline int mdiodev_read(struct mdio_device *mdiodev, u32 regnum)
+{
+	return mdiobus_read(mdiodev->bus, mdiodev->addr, regnum);
+}
+
+static inline int mdiodev_write(struct mdio_device *mdiodev, u32 regnum,
+				u16 val)
+{
+	return mdiobus_write(mdiodev->bus, mdiodev->addr, regnum, val);
+}
+
+static inline int mdiodev_modify(struct mdio_device *mdiodev, u32 regnum,
+				 u16 mask, u16 set)
+{
+	return mdiobus_modify(mdiodev->bus, mdiodev->addr, regnum, mask, set);
+}
+
+static inline int mdiodev_modify_changed(struct mdio_device *mdiodev,
+					 u32 regnum, u16 mask, u16 set)
+{
+	return mdiobus_modify_changed(mdiodev->bus, mdiodev->addr, regnum,
+				      mask, set);
+}
+
 static inline u32 mdiobus_c45_addr(int devad, u16 regnum)
 {
 	return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum;
-- 
cgit v1.2.3


From 218f23e8a96fea7185dac68ceb3722d0831a8bcb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Oct 2021 09:17:01 -0700
Subject: net: phy: bcm7xxx: Add EPHY entry for 7712

7712 is a 16nm process SoC with a 10/100 integrated Ethernet PHY,
utilize the recently defined 16nm EPHY macro to configure that PHY.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 2 ++
 include/linux/brcmphy.h   | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 6ceadd2a0082..75593e7d1118 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -936,6 +936,7 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"),
 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"),
+	BCM7XXX_16NM_EPHY(PHY_ID_BCM7712, "Broadcom BCM7712"),
 };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
@@ -958,6 +959,7 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7435, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
+	{ PHY_ID_BCM7712, 0xfffffff0, },
 	{ }
 };
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 27d9b6683f0e..747fad264033 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -50,6 +50,7 @@
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7439_2		0xae025080
 #define PHY_ID_BCM7445			0x600d8510
+#define PHY_ID_BCM7712			0x35905330
 
 #define PHY_ID_BCM_CYGNUS		0xae025200
 #define PHY_ID_BCM_OMEGA		0xae025100
-- 
cgit v1.2.3


From 97308f8b0d867e9ef59528cd97f0db55ffdf5651 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 23 Jul 2021 01:59:41 +0200
Subject: iomap: Support partial direct I/O on user copy failures

In iomap_dio_rw, when iomap_apply returns an -EFAULT error and the
IOMAP_DIO_PARTIAL flag is set, complete the request synchronously and
return a partial result.  This allows the caller to deal with the page
fault and retry the remainder of the request.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/direct-io.c  | 6 ++++++
 include/linux/iomap.h | 7 +++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index a2a368e824c0..a434fb7887b2 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -581,6 +581,12 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	if (iov_iter_rw(iter) == READ && iomi.pos >= dio->i_size)
 		iov_iter_revert(iter, iomi.pos - dio->i_size);
 
+	if (ret == -EFAULT && dio->size && (dio_flags & IOMAP_DIO_PARTIAL)) {
+		if (!(iocb->ki_flags & IOCB_NOWAIT))
+			wait_for_completion = true;
+		ret = 0;
+	}
+
 	/* magic error code to fall back to buffered I/O */
 	if (ret == -ENOTBLK) {
 		wait_for_completion = true;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 24f8489583ca..2a213b0d1e1f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -330,6 +330,13 @@ struct iomap_dio_ops {
   */
 #define IOMAP_DIO_OVERWRITE_ONLY	(1 << 1)
 
+/*
+ * When a page fault occurs, return a partial synchronous result and allow
+ * the caller to retry the rest of the operation after dealing with the page
+ * fault.
+ */
+#define IOMAP_DIO_PARTIAL		(1 << 2)
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags);
-- 
cgit v1.2.3


From 4fdccaa0d184c202f98d73b24e3ec8eeee88ab8d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 24 Jul 2021 12:26:41 +0200
Subject: iomap: Add done_before argument to iomap_dio_rw

Add a done_before argument to iomap_dio_rw that indicates how much of
the request has already been transferred.  When the request succeeds, we
report that done_before additional bytes were tranferred.  This is
useful for finishing a request asynchronously when part of the request
has already been completed synchronously.

We'll use that to allow iomap_dio_rw to be used with page faults
disabled: when a page fault occurs while submitting a request, we
synchronously complete the part of the request that has already been
submitted.  The caller can then take care of the page fault and call
iomap_dio_rw again for the rest of the request, passing in the number of
bytes already tranferred.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/btrfs/file.c       |  5 +++--
 fs/erofs/data.c       |  2 +-
 fs/ext4/file.c        |  5 +++--
 fs/gfs2/file.c        |  4 ++--
 fs/iomap/direct-io.c  | 19 ++++++++++++++++---
 fs/xfs/xfs_file.c     |  6 +++---
 fs/zonefs/super.c     |  4 ++--
 include/linux/iomap.h |  4 ++--
 8 files changed, 32 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f37211d3bb69..9d41b28c67ba 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1957,7 +1957,7 @@ relock:
 	}
 
 	dio = __iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
-			     0);
+			     0, 0);
 
 	btrfs_inode_unlock(inode, ilock_flags);
 
@@ -3658,7 +3658,8 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
 		return 0;
 
 	btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
-	ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops, 0);
+	ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dio_ops,
+			   0, 0);
 	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
 	return ret;
 }
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 9db829715652..16a41d0db55a 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -287,7 +287,7 @@ static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 
 		if (!err)
 			return iomap_dio_rw(iocb, to, &erofs_iomap_ops,
-					    NULL, 0);
+					    NULL, 0, 0);
 		if (err < 0)
 			return err;
 	}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac0e11bbb445..b25c1f8f7c4f 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -74,7 +74,7 @@ static ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		return generic_file_read_iter(iocb, to);
 	}
 
-	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0);
+	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 0, 0);
 	inode_unlock_shared(inode);
 
 	file_accessed(iocb->ki_filp);
@@ -566,7 +566,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (ilock_shared)
 		iomap_ops = &ext4_iomap_overwrite_ops;
 	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
-			   (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0);
+			   (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
+			   0);
 	if (ret == -ENOTBLK)
 		ret = 0;
 
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d9126e3e6dd6..f772ee0fcae3 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -822,7 +822,7 @@ static ssize_t gfs2_file_direct_read(struct kiocb *iocb, struct iov_iter *to,
 	if (ret)
 		goto out_uninit;
 
-	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0);
+	ret = iomap_dio_rw(iocb, to, &gfs2_iomap_ops, NULL, 0, 0);
 	gfs2_glock_dq(gh);
 out_uninit:
 	gfs2_holder_uninit(gh);
@@ -856,7 +856,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from,
 	if (offset + len > i_size_read(&ip->i_inode))
 		goto out;
 
-	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0);
+	ret = iomap_dio_rw(iocb, from, &gfs2_iomap_ops, NULL, 0, 0);
 	if (ret == -ENOTBLK)
 		ret = 0;
 out:
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index a434fb7887b2..468dcbba45bc 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -31,6 +31,7 @@ struct iomap_dio {
 	atomic_t		ref;
 	unsigned		flags;
 	int			error;
+	size_t			done_before;
 	bool			wait_for_completion;
 
 	union {
@@ -124,6 +125,9 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
 		ret = generic_write_sync(iocb, ret);
 
+	if (ret > 0)
+		ret += dio->done_before;
+
 	kfree(dio);
 
 	return ret;
@@ -450,13 +454,21 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter,
  * may be pure data writes. In that case, we still need to do a full data sync
  * completion.
  *
+ * When page faults are disabled and @dio_flags includes IOMAP_DIO_PARTIAL,
+ * __iomap_dio_rw can return a partial result if it encounters a non-resident
+ * page in @iter after preparing a transfer.  In that case, the non-resident
+ * pages can be faulted in and the request resumed with @done_before set to the
+ * number of bytes previously transferred.  The request will then complete with
+ * the correct total number of bytes transferred; this is essential for
+ * completing partial requests asynchronously.
+ *
  * Returns -ENOTBLK In case of a page invalidation invalidation failure for
  * writes.  The callers needs to fall back to buffered I/O in this case.
  */
 struct iomap_dio *
 __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
-		unsigned int dio_flags)
+		unsigned int dio_flags, size_t done_before)
 {
 	struct address_space *mapping = iocb->ki_filp->f_mapping;
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -486,6 +498,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->dops = dops;
 	dio->error = 0;
 	dio->flags = 0;
+	dio->done_before = done_before;
 
 	dio->submit.iter = iter;
 	dio->submit.waiter = current;
@@ -652,11 +665,11 @@ EXPORT_SYMBOL_GPL(__iomap_dio_rw);
 ssize_t
 iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
-		unsigned int dio_flags)
+		unsigned int dio_flags, size_t done_before)
 {
 	struct iomap_dio *dio;
 
-	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags);
+	dio = __iomap_dio_rw(iocb, iter, ops, dops, dio_flags, done_before);
 	if (IS_ERR_OR_NULL(dio))
 		return PTR_ERR_OR_ZERO(dio);
 	return iomap_dio_complete(dio);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 7aa943edfc02..240eb932c014 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -259,7 +259,7 @@ xfs_file_dio_read(
 	ret = xfs_ilock_iocb(iocb, XFS_IOLOCK_SHARED);
 	if (ret)
 		return ret;
-	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0);
+	ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 0, 0);
 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
 
 	return ret;
@@ -569,7 +569,7 @@ xfs_file_dio_write_aligned(
 	}
 	trace_xfs_file_direct_write(iocb, from);
 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-			   &xfs_dio_write_ops, 0);
+			   &xfs_dio_write_ops, 0, 0);
 out_unlock:
 	if (iolock)
 		xfs_iunlock(ip, iolock);
@@ -647,7 +647,7 @@ retry_exclusive:
 
 	trace_xfs_file_direct_write(iocb, from);
 	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
-			   &xfs_dio_write_ops, flags);
+			   &xfs_dio_write_ops, flags, 0);
 
 	/*
 	 * Retry unaligned I/O with exclusive blocking semantics if the DIO
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index ddc346a9df9b..6122c38ab44d 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -852,7 +852,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
 		ret = zonefs_file_dio_append(iocb, from);
 	else
 		ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
-				   &zonefs_write_dio_ops, 0);
+				   &zonefs_write_dio_ops, 0, 0);
 	if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
 	    (ret > 0 || ret == -EIOCBQUEUED)) {
 		if (ret > 0)
@@ -987,7 +987,7 @@ static ssize_t zonefs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 		}
 		file_accessed(iocb->ki_filp);
 		ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
-				   &zonefs_read_dio_ops, 0);
+				   &zonefs_read_dio_ops, 0, 0);
 	} else {
 		ret = generic_file_read_iter(iocb, to);
 		if (ret == -EIO)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 2a213b0d1e1f..829f2325ecba 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -339,10 +339,10 @@ struct iomap_dio_ops {
 
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
-		unsigned int dio_flags);
+		unsigned int dio_flags, size_t done_before);
 struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
-		unsigned int dio_flags);
+		unsigned int dio_flags, size_t done_before);
 ssize_t iomap_dio_complete(struct iomap_dio *dio);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
-- 
cgit v1.2.3


From 55b8fe703bc51200d4698596c90813453b35ae63 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Tue, 17 Aug 2021 22:52:08 +0200
Subject: gup: Introduce FOLL_NOFAULT flag to disable page faults

Introduce a new FOLL_NOFAULT flag that causes get_user_pages to return
-EFAULT when it would otherwise trigger a page fault.  This is roughly
similar to FOLL_FAST_ONLY but available on all architectures, and less
fragile.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 include/linux/mm.h | 3 ++-
 mm/gup.c           | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..2f0e6b9f8f3b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2851,7 +2851,8 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 #define FOLL_NOWAIT	0x20	/* if a disk transfer is needed, start the IO
 				 * and return without waiting upon it */
-#define FOLL_POPULATE	0x40	/* fault in page */
+#define FOLL_POPULATE	0x40	/* fault in pages (with FOLL_MLOCK) */
+#define FOLL_NOFAULT	0x80	/* do not fault in pages */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
diff --git a/mm/gup.c b/mm/gup.c
index 795f15c410cc..e1c7e4bde11f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -918,6 +918,8 @@ static int faultin_page(struct vm_area_struct *vma,
 	/* mlock all present pages, but do not fault in new pages */
 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
 		return -ENOENT;
+	if (*flags & FOLL_NOFAULT)
+		return -EFAULT;
 	if (*flags & FOLL_WRITE)
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
@@ -2843,7 +2845,7 @@ static int internal_get_user_pages_fast(unsigned long start,
 
 	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
 				       FOLL_FORCE | FOLL_PIN | FOLL_GET |
-				       FOLL_FAST_ONLY)))
+				       FOLL_FAST_ONLY | FOLL_NOFAULT)))
 		return -EINVAL;
 
 	if (gup_flags & FOLL_PIN)
-- 
cgit v1.2.3


From 3337ab08d08b1a375f88471d9c8b1cac968cb054 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 12 Jul 2021 12:06:14 +0200
Subject: iov_iter: Introduce nofault flag to disable page faults

Introduce a new nofault flag to indicate to iov_iter_get_pages not to
fault in user pages.

This is implemented by passing the FOLL_NOFAULT flag to get_user_pages,
which causes get_user_pages to fail when it would otherwise fault in a
page. We'll use the ->nofault flag to prevent iomap_dio_rw from faulting
in pages when page faults are not allowed.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 include/linux/uio.h |  1 +
 lib/iov_iter.c      | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 25d1c24fd829..6350354f97e9 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -35,6 +35,7 @@ struct iov_iter_state {
 
 struct iov_iter {
 	u8 iter_type;
+	bool nofault;
 	bool data_source;
 	size_t iov_offset;
 	size_t count;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ac9a87e727a3..66a740e6e153 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -513,6 +513,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
 	WARN_ON(direction & ~(READ | WRITE));
 	*i = (struct iov_iter) {
 		.iter_type = ITER_IOVEC,
+		.nofault = false,
 		.data_source = direction,
 		.iov = iov,
 		.nr_segs = nr_segs,
@@ -1527,13 +1528,17 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		return 0;
 
 	if (likely(iter_is_iovec(i))) {
+		unsigned int gup_flags = 0;
 		unsigned long addr;
 
+		if (iov_iter_rw(i) != WRITE)
+			gup_flags |= FOLL_WRITE;
+		if (i->nofault)
+			gup_flags |= FOLL_NOFAULT;
+
 		addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n,
-				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
-				pages);
+		res = get_user_pages_fast(addr, n, gup_flags, pages);
 		if (unlikely(res <= 0))
 			return res;
 		return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1649,15 +1654,20 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		return 0;
 
 	if (likely(iter_is_iovec(i))) {
+		unsigned int gup_flags = 0;
 		unsigned long addr;
 
+		if (iov_iter_rw(i) != WRITE)
+			gup_flags |= FOLL_WRITE;
+		if (i->nofault)
+			gup_flags |= FOLL_NOFAULT;
+
 		addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
 		p = get_pages_array(n);
 		if (!p)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n,
-				iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
+		res = get_user_pages_fast(addr, n, gup_flags, p);
 		if (unlikely(res <= 0)) {
 			kvfree(p);
 			*pages = NULL;
-- 
cgit v1.2.3


From 63dfe0709643528290c8a6825f278eda0e3f3c2e Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:32 +0900
Subject: can: bittiming: allow TDC{V,O} to be zero and add
 can_tdc_const::tdc{v,o,f}_min

ISO 11898-1 specifies in section 11.3.3 "Transmitter delay
compensation" that "the configuration range for [the] SSP position
shall be at least 0 to 63 minimum time quanta."

Because SSP = TDCV + TDCO, it means that we should allow both TDCV and
TDCO to hold zero value in order to honor SSP's minimum possible
value.

However, current implementation assigned special meaning to TDCV and
TDCO's zero values:
  * TDCV = 0 -> TDCV is automatically measured by the transceiver.
  * TDCO = 0 -> TDC is off.

In order to allow for those values to really be zero and to maintain
current features, we introduce two new flags:
  * CAN_CTRLMODE_TDC_AUTO indicates that the controller support
    automatic measurement of TDCV.
  * CAN_CTRLMODE_TDC_MANUAL indicates that the controller support
    manual configuration of TDCV. N.B.: current implementation failed
    to provide an option for the driver to indicate that only manual
    mode was supported.

TDC is disabled if both CAN_CTRLMODE_TDC_AUTO and
CAN_CTRLMODE_TDC_MANUAL flags are off, c.f. the helper function
can_tdc_is_enabled() which is also introduced in this patch.

Also, this patch adds three fields: tdcv_min, tdco_min and tdcf_min to
struct can_tdc_const. While we are not convinced that those three
fields could be anything else than zero, we can imagine that some
controllers might specify a lower bound on these. Thus, those minimums
are really added "just in case".

Comments of struct can_tdc and can_tdc_const are updated accordingly.

Finally, the changes are applied to the etas_es58x driver.

Link: https://lore.kernel.org/all/20210918095637.20108-2-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/bittiming.c           | 10 +++--
 drivers/net/can/usb/etas_es58x/es58x_fd.c |  7 +++-
 include/linux/can/bittiming.h             | 64 +++++++++++++++++++++++--------
 include/linux/can/dev.h                   |  4 ++
 include/uapi/linux/can/netlink.h          |  2 +
 5 files changed, 65 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
index b1b5a82f0829..9dda44c0ae9d 100644
--- a/drivers/net/can/dev/bittiming.c
+++ b/drivers/net/can/dev/bittiming.c
@@ -182,9 +182,12 @@ void can_calc_tdco(struct net_device *dev)
 	struct can_tdc *tdc = &priv->tdc;
 	const struct can_tdc_const *tdc_const = priv->tdc_const;
 
-	if (!tdc_const)
+	if (!tdc_const ||
+	    !(priv->ctrlmode_supported & CAN_CTRLMODE_TDC_AUTO))
 		return;
 
+	priv->ctrlmode &= ~CAN_CTRLMODE_TDC_MASK;
+
 	/* As specified in ISO 11898-1 section 11.3.3 "Transmitter
 	 * delay compensation" (TDC) is only applicable if data BRP is
 	 * one or two.
@@ -193,9 +196,10 @@ void can_calc_tdco(struct net_device *dev)
 		/* Reuse "normal" sample point and convert it to time quanta */
 		u32 sample_point_in_tq = can_bit_time(dbt) * dbt->sample_point / 1000;
 
+		if (sample_point_in_tq < tdc_const->tdco_min)
+			return;
 		tdc->tdco = min(sample_point_in_tq, tdc_const->tdco_max);
-	} else {
-		tdc->tdco = 0;
+		priv->ctrlmode |= CAN_CTRLMODE_TDC_AUTO;
 	}
 }
 #endif /* CONFIG_CAN_CALC_BITTIMING */
diff --git a/drivers/net/can/usb/etas_es58x/es58x_fd.c b/drivers/net/can/usb/etas_es58x/es58x_fd.c
index af042aa55f59..4f0cae29f4d8 100644
--- a/drivers/net/can/usb/etas_es58x/es58x_fd.c
+++ b/drivers/net/can/usb/etas_es58x/es58x_fd.c
@@ -428,7 +428,7 @@ static int es58x_fd_enable_channel(struct es58x_priv *priv)
 		es58x_fd_convert_bittiming(&tx_conf_msg.data_bittiming,
 					   &priv->can.data_bittiming);
 
-		if (priv->can.tdc.tdco) {
+		if (can_tdc_is_enabled(&priv->can)) {
 			tx_conf_msg.tdc_enabled = 1;
 			tx_conf_msg.tdco = cpu_to_le16(priv->can.tdc.tdco);
 			tx_conf_msg.tdcf = cpu_to_le16(priv->can.tdc.tdcf);
@@ -505,8 +505,11 @@ static const struct can_bittiming_const es58x_fd_data_bittiming_const = {
  * Register" from Microchip.
  */
 static const struct can_tdc_const es58x_tdc_const = {
+	.tdcv_min = 0,
 	.tdcv_max = 0, /* Manual mode not supported. */
+	.tdco_min = 0,
 	.tdco_max = 127,
+	.tdcf_min = 0,
 	.tdcf_max = 127
 };
 
@@ -523,7 +526,7 @@ const struct es58x_parameters es58x_fd_param = {
 	.clock = {.freq = 80 * CAN_MHZ},
 	.ctrlmode_supported = CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY |
 	    CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_FD | CAN_CTRLMODE_FD_NON_ISO |
-	    CAN_CTRLMODE_CC_LEN8_DLC,
+	    CAN_CTRLMODE_CC_LEN8_DLC | CAN_CTRLMODE_TDC_AUTO,
 	.tx_start_of_frame = 0xCEFA,	/* FACE in little endian */
 	.rx_start_of_frame = 0xFECA,	/* CAFE in little endian */
 	.tx_urb_cmd_max_len = ES58X_FD_TX_URB_CMD_MAX_LEN,
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 9de6e9053e34..9e20260611cc 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -19,6 +19,9 @@
 /* Megahertz */
 #define CAN_MHZ 1000000UL
 
+#define CAN_CTRLMODE_TDC_MASK					\
+	(CAN_CTRLMODE_TDC_AUTO | CAN_CTRLMODE_TDC_MANUAL)
+
 /*
  * struct can_tdc - CAN FD Transmission Delay Compensation parameters
  *
@@ -33,29 +36,43 @@
  *
  * This structure contains the parameters to calculate that SSP.
  *
- * @tdcv: Transmitter Delay Compensation Value. Distance, in time
- *	quanta, from when the bit is sent on the TX pin to when it is
- *	received on the RX pin of the transmitter. Possible options:
+ * -+----------- one bit ----------+-- TX pin
+ *  |<--- Sample Point --->|
+ *
+ *                         --+----------- one bit ----------+-- RX pin
+ *  |<-------- TDCV -------->|
+ *                           |<------- TDCO ------->|
+ *  |<----------- Secondary Sample Point ---------->|
+ *
+ * @tdcv: Transmitter Delay Compensation Value. The time needed for
+ *	the signal to propagate, i.e. the distance, in time quanta,
+ *	from the start of the bit on the TX pin to when it is received
+ *	on the RX pin. @tdcv depends on the controller modes:
+ *
+ *	  CAN_CTRLMODE_TDC_AUTO is set: The transceiver dynamically
+ *	  measures @tdcv for each transmitted CAN FD frame and the
+ *	  value provided here should be ignored.
  *
- *	  0: automatic mode. The controller dynamically measures @tdcv
- *	  for each transmitted CAN FD frame.
+ *	  CAN_CTRLMODE_TDC_MANUAL is set: use the fixed provided @tdcv
+ *	  value.
  *
- *	  Other values: manual mode. Use the fixed provided value.
+ *	N.B. CAN_CTRLMODE_TDC_AUTO and CAN_CTRLMODE_TDC_MANUAL are
+ *	mutually exclusive. Only one can be set at a time. If both
+ *	CAN_TDC_CTRLMODE_AUTO and CAN_TDC_CTRLMODE_MANUAL are unset,
+ *	TDC is disabled and all the values of this structure should be
+ *	ignored.
  *
  * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
  *	quanta, defining the distance between the start of the bit
  *	reception on the RX pin of the transceiver and the SSP
  *	position such that SSP = @tdcv + @tdco.
  *
- *	If @tdco is zero, then TDC is disabled and both @tdcv and
- *	@tdcf should be ignored.
- *
  * @tdcf: Transmitter Delay Compensation Filter window. Defines the
- *	minimum value for the SSP position in time quanta. If SSP is
- *	less than @tdcf, then no delay compensations occur and the
- *	normal sampling point is used instead. The feature is enabled
- *	if and only if @tdcv is set to zero (automatic mode) and @tdcf
- *	is configured to a value greater than @tdco.
+ *	minimum value for the SSP position in time quanta. If the SSP
+ *	position is less than @tdcf, then no delay compensations occur
+ *	and the normal sampling point is used instead. The feature is
+ *	enabled if and only if @tdcv is set to zero (automatic mode)
+ *	and @tdcf is configured to a value greater than @tdco.
  */
 struct can_tdc {
 	u32 tdcv;
@@ -67,19 +84,32 @@ struct can_tdc {
  * struct can_tdc_const - CAN hardware-dependent constant for
  *	Transmission Delay Compensation
  *
- * @tdcv_max: Transmitter Delay Compensation Value maximum value.
- *	Should be set to zero if the controller does not support
- *	manual mode for tdcv.
+ * @tdcv_min: Transmitter Delay Compensation Value minimum value. If
+ *	the controller does not support manual mode for tdcv
+ *	(c.f. flag CAN_CTRLMODE_TDC_MANUAL) then this value is
+ *	ignored.
+ * @tdcv_max: Transmitter Delay Compensation Value maximum value. If
+ *	the controller does not support manual mode for tdcv
+ *	(c.f. flag CAN_CTRLMODE_TDC_MANUAL) then this value is
+ *	ignored.
+ *
+ * @tdco_min: Transmitter Delay Compensation Offset minimum value.
  * @tdco_max: Transmitter Delay Compensation Offset maximum value.
  *	Should not be zero. If the controller does not support TDC,
  *	then the pointer to this structure should be NULL.
+ *
+ * @tdcf_min: Transmitter Delay Compensation Filter window minimum
+ *	value. If @tdcf_max is zero, this value is ignored.
  * @tdcf_max: Transmitter Delay Compensation Filter window maximum
  *	value. Should be set to zero if the controller does not
  *	support this feature.
  */
 struct can_tdc_const {
+	u32 tdcv_min;
 	u32 tdcv_max;
+	u32 tdco_min;
 	u32 tdco_max;
+	u32 tdcf_min;
 	u32 tdcf_max;
 };
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 2413253e54c7..6dacbbb41e68 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -96,6 +96,10 @@ struct can_priv {
 #endif
 };
 
+static inline bool can_tdc_is_enabled(const struct can_priv *priv)
+{
+	return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK);
+}
 
 /* helper to define static CAN controller features at device creation time */
 static inline void can_set_static_ctrlmode(struct net_device *dev,
diff --git a/include/uapi/linux/can/netlink.h b/include/uapi/linux/can/netlink.h
index f730d443b918..004cd09a7d49 100644
--- a/include/uapi/linux/can/netlink.h
+++ b/include/uapi/linux/can/netlink.h
@@ -101,6 +101,8 @@ struct can_ctrlmode {
 #define CAN_CTRLMODE_PRESUME_ACK	0x40	/* Ignore missing CAN ACKs */
 #define CAN_CTRLMODE_FD_NON_ISO		0x80	/* CAN FD in non-ISO mode */
 #define CAN_CTRLMODE_CC_LEN8_DLC	0x100	/* Classic CAN DLC option */
+#define CAN_CTRLMODE_TDC_AUTO		0x200	/* CAN transiver automatically calculates TDCV */
+#define CAN_CTRLMODE_TDC_MANUAL		0x400	/* TDCV is manually set up by user */
 
 /*
  * CAN device statistics
-- 
cgit v1.2.3


From 39f66c9e229797a58a12ea78388cbbad1f81aec9 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:33 +0900
Subject: can: bittiming: change unit of TDC parameters to clock periods
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the current implementation, all Transmission Delay Compensation
(TDC) parameters are expressed in time quantum. However, ISO 11898-1
actually specifies that these should be expressed in *minimum* time
quantum.

Furthermore, the minimum time quantum is specified to be "one node
clock period long" (c.f. paragraph 11.3.1.1 "Bit time"). For sake of
simplicity, we prefer to use the "clock period" term instead of
"minimum time quantum" because we believe that it is more broadly
understood.

This patch fixes that discrepancy by updating the documentation and
the formula for TDCO calculation.

N.B. In can_calc_tdco(), the sample point (in time quantum) was
calculated using a division, thus introducing a risk of rounding and
truncation errors. On top of changing the unit to clock period, we
also modified the formula to use only additions.

Link: https://lore.kernel.org/all/20210918095637.20108-3-mailhol.vincent@wanadoo.fr
Suggested-by: Stefan Mätje <Stefan.Maetje@esd.eu>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/bittiming.c |  9 +++++----
 include/linux/can/bittiming.h   | 28 +++++++++++++++++-----------
 2 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
index 9dda44c0ae9d..0ccf982ca301 100644
--- a/drivers/net/can/dev/bittiming.c
+++ b/drivers/net/can/dev/bittiming.c
@@ -193,12 +193,13 @@ void can_calc_tdco(struct net_device *dev)
 	 * one or two.
 	 */
 	if (dbt->brp == 1 || dbt->brp == 2) {
-		/* Reuse "normal" sample point and convert it to time quanta */
-		u32 sample_point_in_tq = can_bit_time(dbt) * dbt->sample_point / 1000;
+		/* Sample point in clock periods */
+		u32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
+					  dbt->phase_seg1) * dbt->brp;
 
-		if (sample_point_in_tq < tdc_const->tdco_min)
+		if (sample_point_in_tc < tdc_const->tdco_min)
 			return;
-		tdc->tdco = min(sample_point_in_tq, tdc_const->tdco_max);
+		tdc->tdco = min(sample_point_in_tc, tdc_const->tdco_max);
 		priv->ctrlmode |= CAN_CTRLMODE_TDC_AUTO;
 	}
 }
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index 9e20260611cc..aebbe65dab7e 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -31,8 +31,8 @@
  *
  * To solve this issue, ISO 11898-1 introduces in section 11.3.3
  * "Transmitter delay compensation" a SSP (Secondary Sample Point)
- * equal to the distance, in time quanta, from the start of the bit
- * time on the TX pin to the actual measurement on the RX pin.
+ * equal to the distance from the start of the bit time on the TX pin
+ * to the actual measurement on the RX pin.
  *
  * This structure contains the parameters to calculate that SSP.
  *
@@ -44,8 +44,13 @@
  *                           |<------- TDCO ------->|
  *  |<----------- Secondary Sample Point ---------->|
  *
+ * To increase precision, contrary to the other bittiming parameters
+ * which are measured in time quanta, the TDC parameters are measured
+ * in clock periods (also referred as "minimum time quantum" in ISO
+ * 11898-1).
+ *
  * @tdcv: Transmitter Delay Compensation Value. The time needed for
- *	the signal to propagate, i.e. the distance, in time quanta,
+ *	the signal to propagate, i.e. the distance, in clock periods,
  *	from the start of the bit on the TX pin to when it is received
  *	on the RX pin. @tdcv depends on the controller modes:
  *
@@ -62,17 +67,18 @@
  *	TDC is disabled and all the values of this structure should be
  *	ignored.
  *
- * @tdco: Transmitter Delay Compensation Offset. Offset value, in time
- *	quanta, defining the distance between the start of the bit
- *	reception on the RX pin of the transceiver and the SSP
+ * @tdco: Transmitter Delay Compensation Offset. Offset value, in
+ *	clock periods, defining the distance between the start of the
+ *	bit reception on the RX pin of the transceiver and the SSP
  *	position such that SSP = @tdcv + @tdco.
  *
  * @tdcf: Transmitter Delay Compensation Filter window. Defines the
- *	minimum value for the SSP position in time quanta. If the SSP
- *	position is less than @tdcf, then no delay compensations occur
- *	and the normal sampling point is used instead. The feature is
- *	enabled if and only if @tdcv is set to zero (automatic mode)
- *	and @tdcf is configured to a value greater than @tdco.
+ *	minimum value for the SSP position in clock periods. If the
+ *	SSP position is less than @tdcf, then no delay compensations
+ *	occur and the normal sampling point is used instead. The
+ *	feature is enabled if and only if @tdcv is set to zero
+ *	(automatic mode) and @tdcf is configured to a value greater
+ *	than @tdco.
  */
 struct can_tdc {
 	u32 tdcv;
-- 
cgit v1.2.3


From da45a1e4d7b9d6b5a8231acb812df719fe3228b4 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:34 +0900
Subject: can: bittiming: change can_calc_tdco()'s prototype to not directly
 modify priv

The function can_calc_tdco() directly retrieves can_priv from the
net_device and directly modifies it.

This is annoying for the upcoming patch. In
drivers/net/can/dev/netlink.c:can_changelink(), the data bittiming are
written to a temporary structure and memcpyed to can_priv only after
everything succeeded. In the next patch, where we will introduce the
netlink interface for TDC parameters, we will add a new TDC block
which can potentially fail. For this reason, the data bittiming
temporary structure has to be copied after that to-be-introduced TDC
block. However, TDC also needs to access data bittiming information.

We change the prototype so that the data bittiming structure is passed
to can_calc_tdco() as an argument instead of retrieving it from
priv. This way can_calc_tdco() can access the data bittiming before it
gets memcpyed to priv.

Link: https://lore.kernel.org/all/20210918095637.20108-4-mailhol.vincent@wanadoo.fr
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/bittiming.c | 17 +++++++----------
 drivers/net/can/dev/netlink.c   |  3 ++-
 include/linux/can/bittiming.h   |  9 +++++++--
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev/bittiming.c b/drivers/net/can/dev/bittiming.c
index 0ccf982ca301..0509625c3082 100644
--- a/drivers/net/can/dev/bittiming.c
+++ b/drivers/net/can/dev/bittiming.c
@@ -175,18 +175,15 @@ int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	return 0;
 }
 
-void can_calc_tdco(struct net_device *dev)
-{
-	struct can_priv *priv = netdev_priv(dev);
-	const struct can_bittiming *dbt = &priv->data_bittiming;
-	struct can_tdc *tdc = &priv->tdc;
-	const struct can_tdc_const *tdc_const = priv->tdc_const;
+void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const,
+		   const struct can_bittiming *dbt,
+		   u32 *ctrlmode, u32 ctrlmode_supported)
 
-	if (!tdc_const ||
-	    !(priv->ctrlmode_supported & CAN_CTRLMODE_TDC_AUTO))
+{
+	if (!tdc_const || !(ctrlmode_supported & CAN_CTRLMODE_TDC_AUTO))
 		return;
 
-	priv->ctrlmode &= ~CAN_CTRLMODE_TDC_MASK;
+	*ctrlmode &= ~CAN_CTRLMODE_TDC_MASK;
 
 	/* As specified in ISO 11898-1 section 11.3.3 "Transmitter
 	 * delay compensation" (TDC) is only applicable if data BRP is
@@ -200,7 +197,7 @@ void can_calc_tdco(struct net_device *dev)
 		if (sample_point_in_tc < tdc_const->tdco_min)
 			return;
 		tdc->tdco = min(sample_point_in_tc, tdc_const->tdco_max);
-		priv->ctrlmode |= CAN_CTRLMODE_TDC_AUTO;
+		*ctrlmode |= CAN_CTRLMODE_TDC_AUTO;
 	}
 }
 #endif /* CONFIG_CAN_CALC_BITTIMING */
diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index 80425636049d..e79c9a2ffbfc 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -189,7 +189,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[],
 
 		memcpy(&priv->data_bittiming, &dbt, sizeof(dbt));
 
-		can_calc_tdco(dev);
+		can_calc_tdco(&priv->tdc, priv->tdc_const, &priv->data_bittiming,
+			      &priv->ctrlmode, priv->ctrlmode_supported);
 
 		if (priv->do_set_data_bittiming) {
 			/* Finally, set the bit-timing registers */
diff --git a/include/linux/can/bittiming.h b/include/linux/can/bittiming.h
index aebbe65dab7e..20b50baf3a02 100644
--- a/include/linux/can/bittiming.h
+++ b/include/linux/can/bittiming.h
@@ -123,7 +123,9 @@ struct can_tdc_const {
 int can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 		       const struct can_bittiming_const *btc);
 
-void can_calc_tdco(struct net_device *dev);
+void can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const,
+		   const struct can_bittiming *dbt,
+		   u32 *ctrlmode, u32 ctrlmode_supported);
 #else /* !CONFIG_CAN_CALC_BITTIMING */
 static inline int
 can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
@@ -133,7 +135,10 @@ can_calc_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	return -EINVAL;
 }
 
-static inline void can_calc_tdco(struct net_device *dev)
+static inline void
+can_calc_tdco(struct can_tdc *tdc, const struct can_tdc_const *tdc_const,
+	      const struct can_bittiming *dbt,
+	      u32 *ctrlmode, u32 ctrlmode_supported)
 {
 }
 #endif /* CONFIG_CAN_CALC_BITTIMING */
-- 
cgit v1.2.3


From e8060f08cd69d1d692cfb9f0a2808477a501f35a Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:36 +0900
Subject: can: netlink: add can_priv::do_get_auto_tdcv() to retrieve tdcv from
 device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some CAN device can measure the TDCV (Transmission Delay Compensation
Value) automatically for each transmitted CAN frames.

A callback function do_get_auto_tdcv() is added to retrieve that
value. This function is used only if CAN_CTRLMODE_TDC_AUTO is enabled
(if CAN_CTRLMODE_TDC_MANUAL is selected, the TDCV value is provided by
the user).

If the device does not support reporting of TDCV, do_get_auto_tdcv()
should be set to NULL and TDCV will not be reported by the netlink
interface.

On success, do_get_auto_tdcv() shall return 0. If the value can not be
measured by the device, for example because network is down or because
no frames were transmitted yet, can_priv::do_get_auto_tdcv() shall
return a negative error code (e.g. -EINVAL) to signify that the value
is not yet available. In such cases, TDCV is not reported by the
netlink interface.

Link: https://lore.kernel.org/all/20210918095637.20108-6-mailhol.vincent@wanadoo.fr
CC: Stefan Mätje <stefan.maetje@esd.eu>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev/netlink.c | 15 ++++++++++++---
 include/linux/can/dev.h       |  1 +
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c
index c77cc6ae88b6..95cca4e5251f 100644
--- a/drivers/net/can/dev/netlink.c
+++ b/drivers/net/can/dev/netlink.c
@@ -372,7 +372,8 @@ static size_t can_tdc_get_size(const struct net_device *dev)
 	}
 
 	if (can_tdc_is_enabled(priv)) {
-		if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL)
+		if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL ||
+		    priv->do_get_auto_tdcv)
 			size += nla_total_size(sizeof(u32));	/* IFLA_CAN_TDCV */
 		size += nla_total_size(sizeof(u32));		/* IFLA_CAN_TDCO */
 		if (priv->tdc_const->tdcf_max)
@@ -445,8 +446,16 @@ static int can_tdc_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		goto err_cancel;
 
 	if (can_tdc_is_enabled(priv)) {
-		if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL &&
-		    nla_put_u32(skb, IFLA_CAN_TDC_TDCV, tdc->tdcv))
+		u32 tdcv;
+		int err = -EINVAL;
+
+		if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL) {
+			tdcv = tdc->tdcv;
+			err = 0;
+		} else if (priv->do_get_auto_tdcv) {
+			err = priv->do_get_auto_tdcv(dev, &tdcv);
+		}
+		if (!err && nla_put_u32(skb, IFLA_CAN_TDC_TDCV, tdcv))
 			goto err_cancel;
 		if (nla_put_u32(skb, IFLA_CAN_TDC_TDCO, tdc->tdco))
 			goto err_cancel;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 6dacbbb41e68..b4aa0f048cab 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -82,6 +82,7 @@ struct can_priv {
 			    enum can_state *state);
 	int (*do_get_berr_counter)(const struct net_device *dev,
 				   struct can_berr_counter *bec);
+	int (*do_get_auto_tdcv)(const struct net_device *dev, u32 *tdcv);
 
 	unsigned int echo_skb_max;
 	struct sk_buff **echo_skb;
-- 
cgit v1.2.3


From fa759a9395ea81c17db613dde43c46f0607df7e7 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Sat, 18 Sep 2021 18:56:37 +0900
Subject: can: dev: add can_tdc_get_relative_tdco() helper function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

struct can_tdc::tdco represents the absolute offset from TDCV. Some
controllers use instead an offset relative to the Sample Point (SP)
such that:
| SSP = TDCV + absolute TDCO
|     = TDCV + SP + relative TDCO

Consequently:
| relative TDCO = absolute TDCO - SP

The function can_tdc_get_relative_tdco() allow to retrieve this
relative TDCO value.

Link: https://lore.kernel.org/all/20210918095637.20108-7-mailhol.vincent@wanadoo.fr
CC: Stefan Mätje <Stefan.Maetje@esd.eu>
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index b4aa0f048cab..45f19d9db5ca 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -102,6 +102,35 @@ static inline bool can_tdc_is_enabled(const struct can_priv *priv)
 	return !!(priv->ctrlmode & CAN_CTRLMODE_TDC_MASK);
 }
 
+/*
+ * can_get_relative_tdco() - TDCO relative to the sample point
+ *
+ * struct can_tdc::tdco represents the absolute offset from TDCV. Some
+ * controllers use instead an offset relative to the Sample Point (SP)
+ * such that:
+ *
+ * SSP = TDCV + absolute TDCO
+ *     = TDCV + SP + relative TDCO
+ *
+ * -+----------- one bit ----------+-- TX pin
+ *  |<--- Sample Point --->|
+ *
+ *                         --+----------- one bit ----------+-- RX pin
+ *  |<-------- TDCV -------->|
+ *                           |<------------------------>| absolute TDCO
+ *                           |<--- Sample Point --->|
+ *                           |                      |<->| relative TDCO
+ *  |<------------- Secondary Sample Point ------------>|
+ */
+static inline s32 can_get_relative_tdco(const struct can_priv *priv)
+{
+	const struct can_bittiming *dbt = &priv->data_bittiming;
+	s32 sample_point_in_tc = (CAN_SYNC_SEG + dbt->prop_seg +
+				  dbt->phase_seg1) * dbt->brp;
+
+	return (s32)priv->tdc.tdco - sample_point_in_tc;
+}
+
 /* helper to define static CAN controller features at device creation time */
 static inline void can_set_static_ctrlmode(struct net_device *dev,
 					   u32 static_mode)
-- 
cgit v1.2.3


From b3b180e735409ca0c76642014304b59482e0e653 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 20 Sep 2021 14:20:07 +0200
Subject: dmaengine: remove debugfs #ifdef

The ptdma driver has added debugfs support, but this fails to build
when debugfs is disabled:

drivers/dma/ptdma/ptdma-debugfs.c: In function 'ptdma_debugfs_setup':
drivers/dma/ptdma/ptdma-debugfs.c:93:54: error: 'struct dma_device' has no member named 'dbg_dev_root'
   93 |         debugfs_create_file("info", 0400, pt->dma_dev.dbg_dev_root, pt,
      |                                                      ^
drivers/dma/ptdma/ptdma-debugfs.c:96:55: error: 'struct dma_device' has no member named 'dbg_dev_root'
   96 |         debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt,
      |                                                       ^
drivers/dma/ptdma/ptdma-debugfs.c:102:52: error: 'struct dma_device' has no member named 'dbg_dev_root'
  102 |                 debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
      |                                                    ^

Remove the #ifdef in the header, as this only saves a few bytes,
but would require ugly #ifdefs in each driver using it.
Simplify the other user while we're at it.

Fixes: e2fb2e2a33fa ("dmaengine: ptdma: Add debugfs entries for PTDMA")
Fixes: 26cf132de6f7 ("dmaengine: Create debug directories for DMA devices")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20210920122017.205975-1-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dpdma.c | 15 +--------------
 include/linux/dmaengine.h         |  2 --
 2 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index b280a53e8570..ce5c66e6897d 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -271,9 +271,6 @@ struct xilinx_dpdma_device {
 /* -----------------------------------------------------------------------------
  * DebugFS
  */
-
-#ifdef CONFIG_DEBUG_FS
-
 #define XILINX_DPDMA_DEBUGFS_READ_MAX_SIZE	32
 #define XILINX_DPDMA_DEBUGFS_UINT16_MAX_STR	"65535"
 
@@ -299,7 +296,7 @@ struct xilinx_dpdma_debugfs_request {
 
 static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
 {
-	if (chan->id == dpdma_debugfs.chan_id)
+	if (IS_ENABLED(CONFIG_DEBUG_FS) && chan->id == dpdma_debugfs.chan_id)
 		dpdma_debugfs.xilinx_dpdma_irq_done_count++;
 }
 
@@ -462,16 +459,6 @@ static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
 		dev_err(xdev->dev, "Failed to create debugfs testcase file\n");
 }
 
-#else
-static void xilinx_dpdma_debugfs_init(struct xilinx_dpdma_device *xdev)
-{
-}
-
-static void xilinx_dpdma_debugfs_desc_done_irq(struct xilinx_dpdma_chan *chan)
-{
-}
-#endif /* CONFIG_DEBUG_FS */
-
 /* -----------------------------------------------------------------------------
  * I/O Accessors
  */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index e5c2c9e71bf1..9000f3ffce8b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -944,10 +944,8 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 	void (*device_release)(struct dma_device *dev);
 	/* debugfs support */
-#ifdef CONFIG_DEBUG_FS
 	void (*dbg_summary_show)(struct seq_file *s, struct dma_device *dev);
 	struct dentry *dbg_dev_root;
-#endif
 };
 
 static inline int dmaengine_slave_config(struct dma_chan *chan,
-- 
cgit v1.2.3


From 1ba5478270a5c3a02b08052a3c003c282f2db94a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Oct 2021 17:49:21 +0200
Subject: irqchip: Fix compile-testing without CONFIG_OF

Drivers using the new IRQCHIP_PLATFORM_DRIVER_BEGIN helper
fail to link when compile-testing without CONFIG_OF,
as that means CONFIG_IRQCHIP is disabled as well:

ld.lld: error: undefined symbol: platform_irqchip_probe
>>> referenced by irq-meson-gpio.c
>>>               irqchip/irq-meson-gpio.o:(meson_gpio_intc_driver) in archive drivers/built-in.a
>>> referenced by irq-mchp-eic.c
>>>               irqchip/irq-mchp-eic.o:(mchp_eic_driver) in archive drivers/built-in.a

As the drivers are not actually used in this case, just
making the reference to this symbol conditional helps
avoid the link failure.

Fixes: f8410e626569 ("irqchip: Add IRQCHIP_PLATFORM_DRIVER_BEGIN/END and IRQCHIP_MATCH helper macros")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211022154927.920491-1-arnd@kernel.org
---
 include/linux/irqchip.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h
index 67351aac65ef..29dbe675fc7a 100644
--- a/include/linux/irqchip.h
+++ b/include/linux/irqchip.h
@@ -39,8 +39,9 @@ static const struct of_device_id drv_name##_irqchip_match_table[] = {
 	{},								\
 };									\
 MODULE_DEVICE_TABLE(of, drv_name##_irqchip_match_table);		\
-static struct platform_driver drv_name##_driver = {		\
-	.probe  = platform_irqchip_probe,				\
+static struct platform_driver drv_name##_driver = {			\
+	.probe  = IS_ENABLED(CONFIG_IRQCHIP) ? 				\
+			platform_irqchip_probe : NULL,			\
 	.driver = {							\
 		.name = #drv_name,					\
 		.owner = THIS_MODULE,					\
-- 
cgit v1.2.3


From f2739ca15c414ebad88f4333e3186fd4144c1753 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 22 Oct 2021 11:46:42 -0500
Subject: x86/of: Kill unused early_init_dt_scan_chosen_arch()

There are no callers for early_init_dt_scan_chosen_arch(), so remove it.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Frank Rowand <frank.rowand@sony.com>
Link: https://lkml.kernel.org/r/20211022164642.2815706-1-robh@kernel.org
---
 arch/x86/kernel/devicetree.c | 5 -----
 include/linux/of_fdt.h       | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 6a4cb71c2498..78b2311b3b8b 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -31,11 +31,6 @@ char __initdata cmd_line[COMMAND_LINE_SIZE];
 
 int __initdata of_ioapic;
 
-void __init early_init_dt_scan_chosen_arch(unsigned long node)
-{
-	BUG();
-}
-
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
 	BUG();
diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h
index cf6a65b94d40..cf48983d3c86 100644
--- a/include/linux/of_fdt.h
+++ b/include/linux/of_fdt.h
@@ -65,7 +65,6 @@ extern int early_init_dt_scan_memory(unsigned long node, const char *uname,
 extern int early_init_dt_scan_chosen_stdout(void);
 extern void early_init_fdt_scan_reserved_mem(void);
 extern void early_init_fdt_reserve_self(void);
-extern void __init early_init_dt_scan_chosen_arch(unsigned long node);
 extern void early_init_dt_add_memory_arch(u64 base, u64 size);
 extern u64 dt_mem_next_cell(int s, const __be32 **cellp);
 
-- 
cgit v1.2.3


From a1b09501971435ef213251891753afb0d7f3d27a Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 20 Oct 2021 11:24:06 +0100
Subject: irq: add generic_handle_arch_irq()

Several architectures select GENERIC_IRQ_MULTI_HANDLER and branch to
handle_arch_irq() without performing any entry accounting.

Add a generic wrapper to handle the common irqentry work when invoking
handle_arch_irq(). Where an architecture needs to perform some entry
accounting itself, it will need to invoke handle_arch_irq() itself.

In subsequent patches it will become the responsibilty of the entry code
to set the irq regs when entering an IRQ (rather than deferring this to
an irqchip handler), so generic_handle_arch_irq() is made to set the irq
regs now. This can be redundant in some cases, but is never harmful as
saving/restoring the old regs nests safely.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Guo Ren <guoren@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h |  1 +
 kernel/irq/handle.c | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c8293c817646..988c225eef2d 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1261,6 +1261,7 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *));
  * top-level IRQ handler.
  */
 extern void (*handle_arch_irq)(struct pt_regs *) __ro_after_init;
+asmlinkage void generic_handle_arch_irq(struct pt_regs *regs);
 #else
 #ifndef set_handle_irq
 #define set_handle_irq(handle_irq)		\
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 221d80c31e94..27182003b879 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -14,6 +14,8 @@
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 
+#include <asm/irq_regs.h>
+
 #include <trace/events/irq.h>
 
 #include "internals.h"
@@ -226,4 +228,20 @@ int __init set_handle_irq(void (*handle_irq)(struct pt_regs *))
 	handle_arch_irq = handle_irq;
 	return 0;
 }
+
+/**
+ * generic_handle_arch_irq - root irq handler for architectures which do no
+ *                           entry accounting themselves
+ * @regs:	Register file coming from the low-level handling code
+ */
+asmlinkage void noinstr generic_handle_arch_irq(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs;
+
+	irq_enter();
+	old_regs = set_irq_regs(regs);
+	handle_arch_irq(regs);
+	set_irq_regs(old_regs);
+	irq_exit();
+}
 #endif
-- 
cgit v1.2.3


From 63c67f526db86d3102a77437a510c949f6debb08 Mon Sep 17 00:00:00 2001
From: Luo Jie <luoj@codeaurora.org>
Date: Sun, 24 Oct 2021 16:27:34 +0800
Subject: net: phy: add genphy_c45_fast_retrain

Add generic fast retrain auto-negotiation function for C45 PHYs.

Signed-off-by: Luo Jie <luoj@codeaurora.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy-c45.c | 34 ++++++++++++++++++++++++++++++++++
 include/linux/phy.h       |  1 +
 2 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index c617dbcad6ea..b01180e1f578 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -611,6 +611,40 @@ int genphy_c45_loopback(struct phy_device *phydev, bool enable)
 }
 EXPORT_SYMBOL_GPL(genphy_c45_loopback);
 
+/**
+ * genphy_c45_fast_retrain - configure fast retrain registers
+ * @phydev: target phy_device struct
+ *
+ * Description: If fast-retrain is enabled, we configure PHY as
+ *   advertising fast retrain capable and THP Bypass Request, then
+ *   enable fast retrain. If it is not enabled, we configure fast
+ *   retrain disabled.
+ */
+int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable)
+{
+	int ret;
+
+	if (!enable)
+		return phy_clear_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FSRT_CSR,
+				MDIO_PMA_10GBR_FSRT_ENABLE);
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT, phydev->supported)) {
+		ret = phy_set_bits_mmd(phydev, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+				MDIO_AN_10GBT_CTRL_ADVFSRT2_5G);
+		if (ret)
+			return ret;
+
+		ret = phy_set_bits_mmd(phydev, MDIO_MMD_AN, MDIO_AN_CTRL2,
+				MDIO_AN_THP_BP2_5GT);
+		if (ret)
+			return ret;
+	}
+
+	return phy_set_bits_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_FSRT_CSR,
+			MDIO_PMA_10GBR_FSRT_ENABLE);
+}
+EXPORT_SYMBOL_GPL(genphy_c45_fast_retrain);
+
 struct phy_driver genphy_c45_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 736e1d1a47c4..04e90423fa88 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1584,6 +1584,7 @@ int genphy_c45_config_aneg(struct phy_device *phydev);
 int genphy_c45_loopback(struct phy_device *phydev, bool enable);
 int genphy_c45_pma_resume(struct phy_device *phydev);
 int genphy_c45_pma_suspend(struct phy_device *phydev);
+int genphy_c45_fast_retrain(struct phy_device *phydev, bool enable);
 
 /* Generic C45 PHY driver */
 extern struct phy_driver genphy_c45_driver;
-- 
cgit v1.2.3


From 6b19b766e8f077f29cdb47da5003469a85bbfb9c Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 21 Oct 2021 09:22:35 -0600
Subject: fs: get rid of the res2 iocb->ki_complete argument

The second argument was only used by the USB gadget code, yet everyone
pays the overhead of passing a zero to be passed into aio, where it
ends up being part of the aio res2 value.

Now that everybody is passing in zero, kill off the extra argument.

Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/fops.c                       |  2 +-
 crypto/af_alg.c                    |  2 +-
 drivers/block/loop.c               |  4 ++--
 drivers/nvme/target/io-cmd-file.c  |  4 ++--
 drivers/target/target_core_file.c  |  4 ++--
 drivers/usb/gadget/function/f_fs.c |  2 +-
 drivers/usb/gadget/legacy/inode.c  |  5 ++---
 fs/aio.c                           |  6 +++---
 fs/cachefiles/io.c                 | 12 ++++++------
 fs/ceph/file.c                     |  2 +-
 fs/cifs/file.c                     |  4 ++--
 fs/direct-io.c                     |  2 +-
 fs/fuse/file.c                     |  2 +-
 fs/io_uring.c                      |  6 +++---
 fs/iomap/direct-io.c               |  2 +-
 fs/nfs/direct.c                    |  2 +-
 fs/overlayfs/file.c                |  4 ++--
 include/linux/fs.h                 |  2 +-
 18 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/block/fops.c b/block/fops.c
index 396537598e3e..d86ebda73e8c 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -164,7 +164,7 @@ static void blkdev_bio_end_io(struct bio *bio)
 				ret = blk_status_to_errno(dio->bio.bi_status);
 			}
 
-			dio->iocb->ki_complete(iocb, ret, 0);
+			dio->iocb->ki_complete(iocb, ret);
 			if (dio->flags & DIO_MULTI_BIO)
 				bio_put(&dio->bio);
 		} else {
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 8bd288d2b089..3dd5a773c320 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1076,7 +1076,7 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
 	af_alg_free_resources(areq);
 	sock_put(sk);
 
-	iocb->ki_complete(iocb, err ? err : (int)resultlen, 0);
+	iocb->ki_complete(iocb, err ? err : (int)resultlen);
 }
 EXPORT_SYMBOL_GPL(af_alg_async_cb);
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 7bf4686af774..469d87e24fd4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -554,7 +554,7 @@ static void lo_rw_aio_do_completion(struct loop_cmd *cmd)
 		blk_mq_complete_request(rq);
 }
 
-static void lo_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
+static void lo_rw_aio_complete(struct kiocb *iocb, long ret)
 {
 	struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
 
@@ -627,7 +627,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	lo_rw_aio_do_completion(cmd);
 
 	if (ret != -EIOCBQUEUED)
-		cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
+		lo_rw_aio_complete(&cmd->iocb, ret);
 	return 0;
 }
 
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 1dd1a0fe2e81..6aa30f30b572 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -125,7 +125,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
 	return call_iter(iocb, &iter);
 }
 
-static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
+static void nvmet_file_io_done(struct kiocb *iocb, long ret)
 {
 	struct nvmet_req *req = container_of(iocb, struct nvmet_req, f.iocb);
 	u16 status = NVME_SC_SUCCESS;
@@ -222,7 +222,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
 	}
 
 complete:
-	nvmet_file_io_done(&req->f.iocb, ret, 0);
+	nvmet_file_io_done(&req->f.iocb, ret);
 	return true;
 }
 
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 02f64453b4c5..8190b840065f 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -245,7 +245,7 @@ struct target_core_file_cmd {
 	struct bio_vec	bvecs[];
 };
 
-static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
+static void cmd_rw_aio_complete(struct kiocb *iocb, long ret)
 {
 	struct target_core_file_cmd *cmd;
 
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		ret = call_read_iter(file, &aio_cmd->iocb, &iter);
 
 	if (ret != -EIOCBQUEUED)
-		cmd_rw_aio_complete(&aio_cmd->iocb, ret, 0);
+		cmd_rw_aio_complete(&aio_cmd->iocb, ret);
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 7bd22398d52f..e20c19a0f106 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -831,7 +831,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 		kthread_unuse_mm(io_data->mm);
 	}
 
-	io_data->kiocb->ki_complete(io_data->kiocb, ret, 0);
+	io_data->kiocb->ki_complete(io_data->kiocb, ret);
 
 	if (io_data->ffs->ffs_eventfd && !kiocb_has_eventfd)
 		eventfd_signal(io_data->ffs->ffs_eventfd, 1);
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 28d3d4e71182..78be94750232 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -469,7 +469,7 @@ static void ep_user_copy_worker(struct work_struct *work)
 		ret = -EFAULT;
 
 	/* completing the iocb can drop the ctx and mm, don't touch mm after */
-	iocb->ki_complete(iocb, ret, 0);
+	iocb->ki_complete(iocb, ret);
 
 	kfree(priv->buf);
 	kfree(priv->to_free);
@@ -497,8 +497,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 		kfree(priv);
 		iocb->private = NULL;
 		iocb->ki_complete(iocb,
-				req->actual ? req->actual : (long)req->status,
-				0);
+				req->actual ? req->actual : (long)req->status);
 	} else {
 		/* ep_copy_to_user() won't report both; we hide some faults */
 		if (unlikely(0 != req->status))
diff --git a/fs/aio.c b/fs/aio.c
index 51b08ab01dff..836dc7e48db7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1417,7 +1417,7 @@ static void aio_remove_iocb(struct aio_kiocb *iocb)
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 }
 
-static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+static void aio_complete_rw(struct kiocb *kiocb, long res)
 {
 	struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
 
@@ -1437,7 +1437,7 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
 	}
 
 	iocb->ki_res.res = res;
-	iocb->ki_res.res2 = res2;
+	iocb->ki_res.res2 = 0;
 	iocb_put(iocb);
 }
 
@@ -1508,7 +1508,7 @@ static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
 		ret = -EINTR;
 		fallthrough;
 	default:
-		req->ki_complete(req, ret, 0);
+		req->ki_complete(req, ret);
 	}
 }
 
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index fac2e8e7b533..effe37ef8629 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -37,11 +37,11 @@ static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
 /*
  * Handle completion of a read from the cache.
  */
-static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2)
+static void cachefiles_read_complete(struct kiocb *iocb, long ret)
 {
 	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
 
-	_enter("%ld,%ld", ret, ret2);
+	_enter("%ld", ret);
 
 	if (ki->term_func) {
 		if (ret >= 0)
@@ -139,7 +139,7 @@ static int cachefiles_read(struct netfs_cache_resources *cres,
 		fallthrough;
 	default:
 		ki->was_async = false;
-		cachefiles_read_complete(&ki->iocb, ret, 0);
+		cachefiles_read_complete(&ki->iocb, ret);
 		if (ret > 0)
 			ret = 0;
 		break;
@@ -159,12 +159,12 @@ presubmission_error:
 /*
  * Handle completion of a write to the cache.
  */
-static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2)
+static void cachefiles_write_complete(struct kiocb *iocb, long ret)
 {
 	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
 	struct inode *inode = file_inode(ki->iocb.ki_filp);
 
-	_enter("%ld,%ld", ret, ret2);
+	_enter("%ld", ret);
 
 	/* Tell lockdep we inherited freeze protection from submission thread */
 	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
@@ -244,7 +244,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
 		fallthrough;
 	default:
 		ki->was_async = false;
-		cachefiles_write_complete(&ki->iocb, ret, 0);
+		cachefiles_write_complete(&ki->iocb, ret);
 		if (ret > 0)
 			ret = 0;
 		break;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d16fd2d5fd42..e10a80c1b581 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1023,7 +1023,7 @@ static void ceph_aio_complete(struct inode *inode,
 	ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
 						CEPH_CAP_FILE_RD));
 
-	aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
+	aio_req->iocb->ki_complete(aio_req->iocb, ret);
 
 	ceph_free_cap_flush(aio_req->prealloc_cf);
 	kfree(aio_req);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 13f3182cf796..1b855fcb179e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3184,7 +3184,7 @@ restart_loop:
 	mutex_unlock(&ctx->aio_mutex);
 
 	if (ctx->iocb && ctx->iocb->ki_complete)
-		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
+		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
 	else
 		complete(&ctx->done);
 }
@@ -3917,7 +3917,7 @@ again:
 	mutex_unlock(&ctx->aio_mutex);
 
 	if (ctx->iocb && ctx->iocb->ki_complete)
-		ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
+		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
 	else
 		complete(&ctx->done);
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 453dcff0e7f5..654443558047 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -307,7 +307,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
 
 		if (ret > 0 && dio->op == REQ_OP_WRITE)
 			ret = generic_write_sync(dio->iocb, ret);
-		dio->iocb->ki_complete(dio->iocb, ret, 0);
+		dio->iocb->ki_complete(dio->iocb, ret);
 	}
 
 	kmem_cache_free(dio_cache, dio);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 11404f8c21c7..e6039f22311b 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -687,7 +687,7 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 			spin_unlock(&fi->lock);
 		}
 
-		io->iocb->ki_complete(io->iocb, res, 0);
+		io->iocb->ki_complete(io->iocb, res);
 	}
 
 	kref_put(&io->refcnt, fuse_io_release);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d4631a55a692..edf29406ba8b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2689,7 +2689,7 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 	__io_req_complete(req, issue_flags, req->result, io_put_rw_kbuf(req));
 }
 
-static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+static void io_complete_rw(struct kiocb *kiocb, long res)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
@@ -2700,7 +2700,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
 	io_req_task_work_add(req);
 }
 
-static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
+static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
 {
 	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
@@ -2913,7 +2913,7 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
 		ret = -EINTR;
 		fallthrough;
 	default:
-		kiocb->ki_complete(kiocb, ret, 0);
+		kiocb->ki_complete(kiocb, ret);
 	}
 }
 
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 83ecfba53abe..811c898125a5 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -125,7 +125,7 @@ static void iomap_dio_complete_work(struct work_struct *work)
 	struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
 	struct kiocb *iocb = dio->iocb;
 
-	iocb->ki_complete(iocb, iomap_dio_complete(dio), 0);
+	iocb->ki_complete(iocb, iomap_dio_complete(dio));
 }
 
 /*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 2e894fec036b..7a5f287c5391 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -275,7 +275,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 			res = (long) dreq->count;
 			WARN_ON_ONCE(dreq->count < 0);
 		}
-		dreq->iocb->ki_complete(dreq->iocb, res, 0);
+		dreq->iocb->ki_complete(dreq->iocb, res);
 	}
 
 	complete(&dreq->completion);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index c88ac571593d..ac461a499882 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -272,14 +272,14 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
 	kmem_cache_free(ovl_aio_request_cachep, aio_req);
 }
 
-static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2)
+static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
 {
 	struct ovl_aio_req *aio_req = container_of(iocb,
 						   struct ovl_aio_req, iocb);
 	struct kiocb *orig_iocb = aio_req->orig_iocb;
 
 	ovl_aio_cleanup_handler(aio_req);
-	orig_iocb->ki_complete(orig_iocb, res, res2);
+	orig_iocb->ki_complete(orig_iocb, res);
 }
 
 static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 31029a91f440..0dcb9020a7b3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -330,7 +330,7 @@ struct kiocb {
 	randomized_struct_fields_start
 
 	loff_t			ki_pos;
-	void (*ki_complete)(struct kiocb *iocb, long ret, long ret2);
+	void (*ki_complete)(struct kiocb *iocb, long ret);
 	void			*private;
 	int			ki_flags;
 	u16			ki_hint;
-- 
cgit v1.2.3


From cb464ba53c0cb497dcb4a3daaf4fad4b75291863 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Mon, 11 Oct 2021 13:14:28 +0300
Subject: net/mlx5: Extend health buffer dump

Enhance health buffer to include:
 - assert_var5: expose the 6'th assert variable.
 - time: error's time-stamp in seconds (epoch time).
 - rfr: Recovery Flow Requiered. When set, indicates that the error
        cannot be recovered without flow involving reset.
 - severity: error's severity value, ranging from emergency to debug.
Expose them in the health buffer dump (dmesg and devlink fw reporter).

Health buffer in dmesg:
mlx5_core 0000:08:00.0: print_health_info:425:(pid 912): Health issue observed, firmware internal error, severity(3) ERROR:
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[0] 0x08040700
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[1] 0x00000000
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[2] 0x00000000
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[3] 0x00000000
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[4] 0x00000000
mlx5_core 0000:08:00.0: print_health_info:429:(pid 912): assert_var[5] 0x00000000
mlx5_core 0000:08:00.0: print_health_info:432:(pid 912): assert_exit_ptr 0x00aaf800
mlx5_core 0000:08:00.0: print_health_info:434:(pid 912): assert_callra 0x00aaf70c
mlx5_core 0000:08:00.0: print_health_info:436:(pid 912): fw_ver 16.32.492
mlx5_core 0000:08:00.0: print_health_info:437:(pid 912): time 1634819758
mlx5_core 0000:08:00.0: print_health_info:438:(pid 912): hw_id 0x0000020d
mlx5_core 0000:08:00.0: print_health_info:439:(pid 912): rfr 0
mlx5_core 0000:08:00.0: print_health_info:440:(pid 912): severity 3 (ERROR)
mlx5_core 0000:08:00.0: print_health_info:441:(pid 912): irisc_index 9
mlx5_core 0000:08:00.0: print_health_info:442:(pid 912): synd 0x1: firmware internal error
mlx5_core 0000:08:00.0: print_health_info:444:(pid 912): ext_synd 0x802b
mlx5_core 0000:08:00.0: print_health_info:445:(pid 912): raw fw_ver 0x102001ec

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 73 +++++++++++++++++++++---
 include/linux/mlx5/device.h                      | 14 +++--
 include/linux/mlx5/mlx5_ifc.h                    | 10 +++-
 3 files changed, 82 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 6a4dd7f78958..538ef392f54c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -36,6 +36,7 @@
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/mlx5/driver.h>
+#include <linux/kern_levels.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "lib/mlx5.h"
@@ -74,6 +75,11 @@ enum  {
 	MLX5_SENSOR_FW_SYND_RFR		= 5,
 };
 
+enum {
+	MLX5_SEVERITY_MASK		= 0x7,
+	MLX5_SEVERITY_VALID_MASK	= 0x8,
+};
+
 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
 {
 	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
@@ -98,12 +104,19 @@ static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
 	return (ioread32be(&h->fw_ver) == 0xffffffff);
 }
 
+static int mlx5_health_get_rfr(u8 rfr_severity)
+{
+	return rfr_severity >> MLX5_RFR_BIT_OFFSET;
+}
+
 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
-	u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
 	u8 synd = ioread8(&h->synd);
+	u8 rfr;
+
+	rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
 
 	if (rfr && synd)
 		mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
@@ -366,18 +379,52 @@ static const char *hsynd_str(u8 synd)
 	}
 }
 
+static const char *mlx5_loglevel_str(int level)
+{
+	switch (level) {
+	case LOGLEVEL_EMERG:
+		return "EMERGENCY";
+	case LOGLEVEL_ALERT:
+		return "ALERT";
+	case LOGLEVEL_CRIT:
+		return "CRITICAL";
+	case LOGLEVEL_ERR:
+		return "ERROR";
+	case LOGLEVEL_WARNING:
+		return "WARNING";
+	case LOGLEVEL_NOTICE:
+		return "NOTICE";
+	case LOGLEVEL_INFO:
+		return "INFO";
+	case LOGLEVEL_DEBUG:
+		return "DEBUG";
+	}
+	return "Unknown log level";
+}
+
+static int mlx5_health_get_severity(u8 rfr_severity)
+{
+	return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
+	       rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
+}
+
 static void print_health_info(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
-	char fw_str[18];
-	u32 fw;
+	u8 rfr_severity;
+	int severity;
 	int i;
 
 	/* If the syndrome is 0, the device is OK and no need to print buffer */
 	if (!ioread8(&h->synd))
 		return;
 
+	rfr_severity = ioread8(&h->rfr_severity);
+	severity  = mlx5_health_get_severity(rfr_severity);
+	mlx5_core_err(dev, "Health issue observed, %s, severity(%d) %s:\n",
+		      hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
+
 	for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
 		mlx5_core_err(dev, "assert_var[%d] 0x%08x\n", i,
 			      ioread32be(h->assert_var + i));
@@ -386,15 +433,16 @@ static void print_health_info(struct mlx5_core_dev *dev)
 		      ioread32be(&h->assert_exit_ptr));
 	mlx5_core_err(dev, "assert_callra 0x%08x\n",
 		      ioread32be(&h->assert_callra));
-	sprintf(fw_str, "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
-	mlx5_core_err(dev, "fw_ver %s\n", fw_str);
+	mlx5_core_err(dev, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
+	mlx5_core_err(dev, "time %u\n", ioread32be(&h->time));
 	mlx5_core_err(dev, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
+	mlx5_core_err(dev, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
+	mlx5_core_err(dev, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
 	mlx5_core_err(dev, "irisc_index %d\n", ioread8(&h->irisc_index));
 	mlx5_core_err(dev, "synd 0x%x: %s\n", ioread8(&h->synd),
 		      hsynd_str(ioread8(&h->synd)));
 	mlx5_core_err(dev, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
-	fw = ioread32be(&h->fw_ver);
-	mlx5_core_err(dev, "raw fw_ver 0x%08x\n", fw);
+	mlx5_core_err(dev, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
 }
 
 static int
@@ -443,6 +491,7 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
+	u8 rfr_severity;
 	int err;
 	int i;
 
@@ -473,9 +522,19 @@ mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
 		return err;
 	err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
 					ioread32be(&h->assert_callra));
+	if (err)
+		return err;
+	err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
 	if (err)
 		return err;
 	err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
+	if (err)
+		return err;
+	rfr_severity = ioread8(&h->rfr_severity);
+	err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
+	if (err)
+		return err;
+	err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
 	if (err)
 		return err;
 	err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 347167c18802..f8a0bbb42c3b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -541,19 +541,21 @@ struct mlx5_cmd_layout {
 	u8		status_own;
 };
 
-enum mlx5_fatal_assert_bit_offsets {
-	MLX5_RFR_OFFSET = 31,
+enum mlx5_rfr_severity_bit_offsets {
+	MLX5_RFR_BIT_OFFSET = 0x7,
 };
 
 struct health_buffer {
-	__be32		assert_var[5];
-	__be32		rsvd0[3];
+	__be32		assert_var[6];
+	__be32		rsvd0[2];
 	__be32		assert_exit_ptr;
 	__be32		assert_callra;
-	__be32		rsvd1[2];
+	__be32		rsvd1[1];
+	__be32		time;
 	__be32		fw_ver;
 	__be32		hw_id;
-	__be32		rfr;
+	u8		rfr_severity;
+	u8		rsvd2[3];
 	u8		irisc_index;
 	u8		synd;
 	__be16		ext_synd;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 09e43019d877..6d292b5b8992 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4149,13 +4149,19 @@ struct mlx5_ifc_health_buffer_bits {
 
 	u8         assert_callra[0x20];
 
-	u8         reserved_at_140[0x40];
+	u8         reserved_at_140[0x20];
+
+	u8         time[0x20];
 
 	u8         fw_version[0x20];
 
 	u8         hw_id[0x20];
 
-	u8         reserved_at_1c0[0x20];
+	u8         rfr[0x1];
+	u8         reserved_at_1c1[0x3];
+	u8         valid[0x1];
+	u8         severity[0x3];
+	u8         reserved_at_1c8[0x18];
 
 	u8         irisc_index[0x8];
 	u8         synd[0x8];
-- 
cgit v1.2.3


From 5a1023deeed02a2078bcc11eec1c4be31e85892d Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Wed, 13 Oct 2021 09:45:22 +0300
Subject: net/mlx5: Add periodic update of host time to firmware

Firmware logs its asserts also to non-volatile memory. In order to
reduce drift between the NIC and the host, the driver sets the host
epoch-time to the firmware every hour.

Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 30 ++++++++++++++++++++++++
 include/linux/mlx5/driver.h                      |  2 ++
 include/linux/mlx5/mlx5_ifc.h                    | 12 ++++++++++
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index c35a27255232..64f1abc4dc36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -752,6 +752,31 @@ void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 	spin_unlock_irqrestore(&health->wq_lock, flags);
 }
 
+#define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
+static void mlx5_health_log_ts_update(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+	u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
+	struct mlx5_core_health *health;
+	struct mlx5_core_dev *dev;
+	struct mlx5_priv *priv;
+	u64 now_us;
+
+	health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
+	priv = container_of(health, struct mlx5_priv, health);
+	dev = container_of(priv, struct mlx5_core_dev, priv);
+
+	now_us =  ktime_to_us(ktime_get_real());
+
+	MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
+	MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
+	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
+
+	queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
+			   msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
+}
+
 static void poll_health(struct timer_list *t)
 {
 	struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
@@ -834,6 +859,7 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 	spin_unlock_irqrestore(&health->wq_lock, flags);
+	cancel_delayed_work_sync(&health->update_fw_log_ts_work);
 	cancel_work_sync(&health->report_work);
 	cancel_work_sync(&health->fatal_report_work);
 }
@@ -849,6 +875,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
 
+	cancel_delayed_work_sync(&health->update_fw_log_ts_work);
 	destroy_workqueue(health->wq);
 	mlx5_fw_reporters_destroy(dev);
 }
@@ -874,6 +901,9 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 	spin_lock_init(&health->wq_lock);
 	INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
 	INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
+	INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
+	if (mlx5_core_is_pf(dev))
+		queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
 
 	return 0;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3f4c0f2314a5..f617dfbcd9fd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -134,6 +134,7 @@ enum {
 	MLX5_REG_MCIA		 = 0x9014,
 	MLX5_REG_MFRL		 = 0x9028,
 	MLX5_REG_MLCR		 = 0x902b,
+	MLX5_REG_MRTC		 = 0x902d,
 	MLX5_REG_MTRC_CAP	 = 0x9040,
 	MLX5_REG_MTRC_CONF	 = 0x9041,
 	MLX5_REG_MTRC_STDB	 = 0x9042,
@@ -440,6 +441,7 @@ struct mlx5_core_health {
 	struct work_struct		report_work;
 	struct devlink_health_reporter *fw_reporter;
 	struct devlink_health_reporter *fw_fatal_reporter;
+	struct delayed_work		update_fw_log_ts_work;
 };
 
 struct mlx5_qp_table {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6d292b5b8992..746381eccccf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -10358,6 +10358,17 @@ struct mlx5_ifc_pddr_reg_bits {
 	union mlx5_ifc_pddr_reg_page_data_auto_bits page_data;
 };
 
+struct mlx5_ifc_mrtc_reg_bits {
+	u8         time_synced[0x1];
+	u8         reserved_at_1[0x1f];
+
+	u8         reserved_at_20[0x20];
+
+	u8         time_h[0x20];
+
+	u8         time_l[0x20];
+};
+
 union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_bufferx_reg_bits bufferx_reg;
 	struct mlx5_ifc_eth_2819_cntrs_grp_data_layout_bits eth_2819_cntrs_grp_data_layout;
@@ -10419,6 +10430,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mirc_reg_bits mirc_reg;
 	struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
 	struct mlx5_ifc_mtutc_reg_bits mtutc_reg;
+	struct mlx5_ifc_mrtc_reg_bits mrtc_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3


From 46ae40b94d8826591472798114a723cc7feac7a7 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Thu, 12 Aug 2021 11:53:34 +0300
Subject: net/mlx5: Let user configure io_eq_size param

Currently, each I/O EQ is taking 128KB of memory. This size
is not needed in all use cases, and is critical with large scale.
Hence, allow user to configure the size of I/O EQs.

For example, to reduce I/O EQ size to 64, execute:
$ devlink resource set pci/0000:00:0b.0 path /io_eq_size/ size 64
$ devlink dev reload pci/0000:00:0b.0

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 Documentation/networking/devlink/mlx5.rst          | 12 +++++
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/devlink.h  | 11 +++++
 .../net/ethernet/mellanox/mlx5/core/devlink_res.c  | 56 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  3 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  3 ++
 include/linux/mlx5/driver.h                        |  4 --
 7 files changed, 85 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c

(limited to 'include/linux')

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 4e4b97f7971a..4e6020570292 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -46,6 +46,18 @@ parameters.
 
 The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
 
+Resources
+=========
+
+.. list-table:: Driver-specific resources implemented
+   :widths: 5 5 5 85
+
+   * - Name
+     - Description
+   * - ``comp_eq_size``
+     - Control the size of I/O completion EQs.
+       * The default value is 1024, and the range is between 64 and 4096.
+
 Info versions
 =============
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index bdb271b604d9..79c15ee62cde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+		diag/fw_tracer.o diag/crdump.o devlink.o devlink_res.o diag/rsc_dump.o \
 		fw_reset.o qos.o lib/tout.o
 
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 30bf4882779b..4192f23b1446 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -6,6 +6,13 @@
 
 #include <net/devlink.h>
 
+enum mlx5_devlink_resource_id {
+	MLX5_DL_RES_COMP_EQ = 1,
+
+	__MLX5_ID_RES_MAX,
+	MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1,
+};
+
 enum mlx5_devlink_param_id {
 	MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 	MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
@@ -31,6 +38,10 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
 int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
 				  enum devlink_trap_action *action);
 
+void mlx5_devlink_res_register(struct mlx5_core_dev *dev);
+void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev);
+size_t mlx5_devlink_res_size(struct mlx5_core_dev *dev, enum mlx5_devlink_resource_id id);
+
 struct devlink *mlx5_devlink_alloc(struct device *dev);
 void mlx5_devlink_free(struct devlink *devlink);
 int mlx5_devlink_register(struct devlink *devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
new file mode 100644
index 000000000000..3beedfb8534a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "devlink.h"
+#include "mlx5_core.h"
+
+enum {
+	MLX5_EQ_MIN_SIZE = 64,
+	MLX5_EQ_MAX_SIZE = 4096,
+	MLX5_COMP_EQ_SIZE = 1024,
+};
+
+static int comp_eq_res_register(struct mlx5_core_dev *dev)
+{
+	struct devlink_resource_size_params comp_eq_size;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	devlink_resource_size_params_init(&comp_eq_size, MLX5_EQ_MIN_SIZE,
+					  MLX5_EQ_MAX_SIZE, 1, DEVLINK_RESOURCE_UNIT_ENTRY);
+	return devlink_resource_register(devlink, "io_eq_size", MLX5_COMP_EQ_SIZE,
+					 MLX5_DL_RES_COMP_EQ,
+					 DEVLINK_RESOURCE_ID_PARENT_TOP,
+					 &comp_eq_size);
+}
+
+void mlx5_devlink_res_register(struct mlx5_core_dev *dev)
+{
+	int err;
+
+	err = comp_eq_res_register(dev);
+	if (err)
+		mlx5_core_err(dev, "Failed to register resources, err = %d\n", err);
+}
+
+void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev)
+{
+	devlink_resources_unregister(priv_to_devlink(dev), NULL);
+}
+
+static const size_t default_vals[MLX5_ID_RES_MAX + 1] = {
+	[MLX5_DL_RES_COMP_EQ] = MLX5_COMP_EQ_SIZE,
+};
+
+size_t mlx5_devlink_res_size(struct mlx5_core_dev *dev, enum mlx5_devlink_resource_id id)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	u64 size;
+	int err;
+
+	err = devlink_resource_size_get(devlink, id, &size);
+	if (!err)
+		return size;
+	mlx5_core_err(dev, "Failed to get param. using default. err = %d, id = %u\n",
+		      err, id);
+	return default_vals[id];
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 792e0d6aa861..4dda6e2a4cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -19,6 +19,7 @@
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
 #include "mlx5_irq.h"
+#include "devlink.h"
 
 enum {
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
@@ -807,7 +808,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 
 	INIT_LIST_HEAD(&table->comp_eqs_list);
 	ncomp_eqs = table->num_comp_eqs;
-	nent = MLX5_COMP_EQ_SIZE;
+	nent = mlx5_devlink_res_size(dev, MLX5_DL_RES_COMP_EQ);
 	for (i = 0; i < ncomp_eqs; i++) {
 		struct mlx5_eq_param param = {};
 		int vecidx = i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f8446395163a..96fdbc0c87bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -922,6 +922,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	dev->hv_vhca = mlx5_hv_vhca_create(dev);
 	dev->rsc_dump = mlx5_rsc_dump_create(dev);
 
+	mlx5_devlink_res_register(dev);
+
 	return 0;
 
 err_sf_table_cleanup:
@@ -957,6 +959,7 @@ err_devcom:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
+	mlx5_devlink_res_unregister(dev);
 	mlx5_rsc_dump_destroy(dev);
 	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f617dfbcd9fd..47c07f95bbe1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -797,10 +797,6 @@ struct mlx5_db {
 	int			index;
 };
 
-enum {
-	MLX5_COMP_EQ_SIZE = 1024,
-};
-
 enum {
 	MLX5_PTYS_IB = 1 << 0,
 	MLX5_PTYS_EN = 1 << 2,
-- 
cgit v1.2.3


From a6cb08daa3b459e3dab1d98c67cb8c931f4d81a5 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Wed, 13 Oct 2021 09:57:54 +0300
Subject: net/mlx5: Let user configure event_eq_size param

Event EQ is an EQ which received the notification of almost all the
events generated by the NIC.
Currently, each event EQ is taking 512KB of memory. This size is not
needed in most use cases, and is critical with large scale. Hence,
allow user to configure the size of the event EQ.

For example to reduce event EQ size to 64, execute::
$ devlink resource set pci/0000:00:0b.0 path /event_eq_size/ size 64
$ devlink dev reload pci/0000:00:0b.0

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 Documentation/networking/devlink/mlx5.rst          |  4 ++++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.h  |  1 +
 .../net/ethernet/mellanox/mlx5/core/devlink_res.c  | 26 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  2 +-
 include/linux/mlx5/eq.h                            |  1 -
 5 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 4e6020570292..5b77863f9c88 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -57,6 +57,10 @@ Resources
    * - ``comp_eq_size``
      - Control the size of I/O completion EQs.
        * The default value is 1024, and the range is between 64 and 4096.
+   * - ``event_eq_size``
+     - Control the size of the asynchronous control events EQ.
+       * The default value is 4096, and the range is between 64 and 4096.
+
 
 Info versions
 =============
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 4192f23b1446..674415fd0b3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -8,6 +8,7 @@
 
 enum mlx5_devlink_resource_id {
 	MLX5_DL_RES_COMP_EQ = 1,
+	MLX5_DL_RES_ASYNC_EQ,
 
 	__MLX5_ID_RES_MAX,
 	MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
index 3beedfb8534a..549d23745942 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
@@ -7,6 +7,7 @@
 enum {
 	MLX5_EQ_MIN_SIZE = 64,
 	MLX5_EQ_MAX_SIZE = 4096,
+	MLX5_NUM_ASYNC_EQE = 4096,
 	MLX5_COMP_EQ_SIZE = 1024,
 };
 
@@ -23,13 +24,35 @@ static int comp_eq_res_register(struct mlx5_core_dev *dev)
 					 &comp_eq_size);
 }
 
+static int async_eq_resource_register(struct mlx5_core_dev *dev)
+{
+	struct devlink_resource_size_params async_eq_size;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	devlink_resource_size_params_init(&async_eq_size, MLX5_EQ_MIN_SIZE,
+					  MLX5_EQ_MAX_SIZE, 1, DEVLINK_RESOURCE_UNIT_ENTRY);
+	return devlink_resource_register(devlink, "event_eq_size",
+					 MLX5_NUM_ASYNC_EQE, MLX5_DL_RES_ASYNC_EQ,
+					 DEVLINK_RESOURCE_ID_PARENT_TOP,
+					 &async_eq_size);
+}
+
 void mlx5_devlink_res_register(struct mlx5_core_dev *dev)
 {
 	int err;
 
 	err = comp_eq_res_register(dev);
 	if (err)
-		mlx5_core_err(dev, "Failed to register resources, err = %d\n", err);
+		goto err_msg;
+
+	err = async_eq_resource_register(dev);
+	if (err)
+		goto err;
+	return;
+err:
+	devlink_resources_unregister(priv_to_devlink(dev), NULL);
+err_msg:
+	mlx5_core_err(dev, "Failed to register resources, err = %d\n", err);
 }
 
 void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev)
@@ -39,6 +62,7 @@ void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev)
 
 static const size_t default_vals[MLX5_ID_RES_MAX + 1] = {
 	[MLX5_DL_RES_COMP_EQ] = MLX5_COMP_EQ_SIZE,
+	[MLX5_DL_RES_ASYNC_EQ] = MLX5_NUM_ASYNC_EQE,
 };
 
 size_t mlx5_devlink_res_size(struct mlx5_core_dev *dev, enum mlx5_devlink_resource_id id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4dda6e2a4cbc..31e69067284b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -647,7 +647,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 
 	param = (struct mlx5_eq_param) {
 		.irq_index = MLX5_IRQ_EQ_CTRL,
-		.nent = MLX5_NUM_ASYNC_EQE,
+		.nent = mlx5_devlink_res_size(dev, MLX5_DL_RES_ASYNC_EQ),
 	};
 
 	gather_async_events_mask(dev, param.mask);
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index ea3ff5a8ced3..11161e427608 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -5,7 +5,6 @@
 #define MLX5_CORE_EQ_H
 
 #define MLX5_NUM_CMD_EQE   (32)
-#define MLX5_NUM_ASYNC_EQE (0x1000)
 #define MLX5_NUM_SPARE_EQE (0x80)
 
 struct mlx5_eq;
-- 
cgit v1.2.3


From 554604061979d656bbbec50f101526349cd5aa5f Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Mon, 16 Aug 2021 08:41:08 +0300
Subject: net/mlx5: Let user configure max_macs param

Currently, max_macs is taking 70Kbytes of memory per function. This
size is not needed in all use cases, and is critical with large scale.
Hence, allow user to configure the number of max_macs.

For example, to reduce the number of max_macs to 1, execute::
$ devlink dev param set pci/0000:00:0b.0 name max_macs value 1 \
              cmode driverinit
$ devlink dev reload pci/0000:00:0b.0

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 Documentation/networking/devlink/mlx5.rst         |  4 ++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 69 +++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c    | 18 ++++++
 include/linux/mlx5/mlx5_ifc.h                     |  2 +-
 4 files changed, 92 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 5b77863f9c88..d467e770906e 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -14,8 +14,12 @@ Parameters
 
    * - Name
      - Mode
+     - Validation
    * - ``enable_roce``
      - driverinit
+   * - ``max_macs``
+     - driverinit
+     - The range is between 1 and 2^31. Only power of 2 values are supported.
 
 The ``mlx5`` driver also implements the following driver-specific
 parameters.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1c98652b244a..fc78c745ead1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -752,6 +752,68 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
 	mlx5_devlink_eth_param_unregister(devlink);
 }
 
+static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id,
+					     union devlink_param_value val,
+					     struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	/* At least one unicast mac is needed */
+	if (val.vu32 == 0) {
+		NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0");
+		return -EINVAL;
+	}
+	/* Check if its power of 2 or not */
+	if (!is_power_of_2(val.vu32)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Only power of 2 values are supported for max_macs");
+		return -EOPNOTSUPP;
+	}
+
+	if (ilog2(val.vu32) >
+	    MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) {
+		NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static const struct devlink_param max_uc_list_param =
+	DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
+			      NULL, NULL, mlx5_devlink_max_uc_list_validate);
+
+static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	union devlink_param_value value;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, log_max_current_uc_list_wr_supported))
+		return 0;
+
+	err = devlink_param_register(devlink, &max_uc_list_param);
+	if (err)
+		return err;
+
+	value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list);
+	devlink_param_driverinit_value_set(devlink,
+					   DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+					   value);
+	return 0;
+}
+
+static void
+mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+	if (!MLX5_CAP_GEN(dev, log_max_current_uc_list_wr_supported))
+		return;
+
+	devlink_param_unregister(devlink, &max_uc_list_param);
+}
+
 #define MLX5_TRAP_DROP(_id, _group_id)					\
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				\
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
@@ -815,11 +877,17 @@ int mlx5_devlink_register(struct devlink *devlink)
 	if (err)
 		goto traps_reg_err;
 
+	err = mlx5_devlink_max_uc_list_param_register(devlink);
+	if (err)
+		goto uc_list_reg_err;
+
 	if (!mlx5_core_is_mp_slave(dev))
 		devlink_set_features(devlink, DEVLINK_F_RELOAD);
 
 	return 0;
 
+uc_list_reg_err:
+	mlx5_devlink_traps_unregister(devlink);
 traps_reg_err:
 	mlx5_devlink_auxdev_params_unregister(devlink);
 auxdev_reg_err:
@@ -830,6 +898,7 @@ auxdev_reg_err:
 
 void mlx5_devlink_unregister(struct devlink *devlink)
 {
+	mlx5_devlink_max_uc_list_param_unregister(devlink);
 	mlx5_devlink_traps_unregister(devlink);
 	mlx5_devlink_auxdev_params_unregister(devlink);
 	devlink_params_unregister(devlink, mlx5_devlink_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 96fdbc0c87bf..079ee9e8da10 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -484,10 +484,23 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
 }
 
+static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
+{
+	struct devlink *devlink = priv_to_devlink(dev);
+	union devlink_param_value val;
+	int err;
+
+	err = devlink_param_driverinit_value_get(devlink,
+						 DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
+						 &val);
+	return err ? 0 : val.vu32;
+}
+
 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 {
 	struct mlx5_profile *prof = &dev->profile;
 	void *set_hca_cap;
+	u32 max_uc_list;
 	int err;
 
 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
@@ -561,6 +574,11 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	if (MLX5_CAP_GEN(dev, roce_rw_supported))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
 
+	max_uc_list = max_uc_list_get_devlink_param(dev);
+	if (max_uc_list)
+		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
+			 ilog2(max_uc_list));
+
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 746381eccccf..97465d00de9d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1603,7 +1603,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         ext_stride_num_range[0x1];
 	u8         roce_rw_supported[0x1];
-	u8         reserved_at_3a2[0x1];
+	u8         log_max_current_uc_list_wr_supported[0x1];
 	u8         log_max_stride_sz_rq[0x5];
 	u8         reserved_at_3a8[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-- 
cgit v1.2.3


From 95cadae320be46583078690ac89ffe63c95cc9d2 Mon Sep 17 00:00:00 2001
From: Qian Cai <quic_qiancai@quicinc.com>
Date: Mon, 25 Oct 2021 17:05:28 -0400
Subject: fortify: strlen: Avoid shadowing previous locals

The __compiletime_strlen() macro expansion will shadow p_size and p_len
local variables. No callers currently use any of the shadowed names
for their "p" variable, so there are no code generation problems.

Add "__" prefixes to variable definitions __compiletime_strlen() to
avoid new W=2 warnings:

./include/linux/fortify-string.h: In function 'strnlen':
./include/linux/fortify-string.h:17:9: warning: declaration of 'p_size' shadows a previous local [-Wshadow]
   17 |  size_t p_size = __builtin_object_size(p, 1); \
      |         ^~~~~~
./include/linux/fortify-string.h:77:17: note: in expansion of macro '__compiletime_strlen'
   77 |  size_t p_len = __compiletime_strlen(p);
      |                 ^~~~~~~~~~~~~~~~~~~~
./include/linux/fortify-string.h:76:9: note: shadowed declaration is here
   76 |  size_t p_size = __builtin_object_size(p, 1);
      |         ^~~~~~

Signed-off-by: Qian Cai <quic_qiancai@quicinc.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20211025210528.261643-1-quic_qiancai@quicinc.com
---
 include/linux/fortify-string.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index fdb0a74c9ca2..a6cd6815f249 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -10,18 +10,18 @@ void __read_overflow(void) __compiletime_error("detected read beyond size of obj
 void __read_overflow2(void) __compiletime_error("detected read beyond size of object (2nd parameter)");
 void __write_overflow(void) __compiletime_error("detected write beyond size of object (1st parameter)");
 
-#define __compiletime_strlen(p)				\
-({							\
-	unsigned char *__p = (unsigned char *)(p);      \
-	size_t ret = (size_t)-1;			\
-	size_t p_size = __builtin_object_size(p, 1);	\
-	if (p_size != (size_t)-1) {			\
-		size_t p_len = p_size - 1;		\
-		if (__builtin_constant_p(__p[p_len]) &&	\
-		    __p[p_len] == '\0')			\
-			ret = __builtin_strlen(__p);	\
-	}						\
-	ret;						\
+#define __compiletime_strlen(p)					\
+({								\
+	unsigned char *__p = (unsigned char *)(p);		\
+	size_t __ret = (size_t)-1;				\
+	size_t __p_size = __builtin_object_size(p, 1);		\
+	if (__p_size != (size_t)-1) {				\
+		size_t __p_len = __p_size - 1;			\
+		if (__builtin_constant_p(__p[__p_len]) &&	\
+		    __p[__p_len] == '\0')			\
+			__ret = __builtin_strlen(__p);		\
+	}							\
+	__ret;							\
 })
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-- 
cgit v1.2.3


From ef57c1610dd8fba5031bf71e0db73356190de151 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 25 Oct 2021 09:48:17 -0700
Subject: ipv6: move inet6_sk(sk)->rx_dst_cookie to sk->sk_rx_dst_cookie

Increase cache locality by moving rx_dst_coookie next to sk->sk_rx_dst

This removes one or two cache line misses in IPv6 early demux (TCP/UDP)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ipv6.h | 1 -
 include/net/sock.h   | 2 ++
 net/ipv6/tcp_ipv6.c  | 6 +++---
 net/ipv6/udp.c       | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ef4a69865737..c383630d3f06 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -282,7 +282,6 @@ struct ipv6_pinfo {
 	__be32			rcv_flowinfo;
 
 	__u32			dst_cookie;
-	__u32			rx_dst_cookie;
 
 	struct ipv6_mc_socklist	__rcu *ipv6_mc_list;
 	struct ipv6_ac_socklist	*ipv6_ac_list;
diff --git a/include/net/sock.h b/include/net/sock.h
index 0bfb3f138bda..99c4194cb61a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -260,6 +260,7 @@ struct bpf_local_storage;
   *	@sk_wq: sock wait queue and async head
   *	@sk_rx_dst: receive input route used by early demux
   *	@sk_rx_dst_ifindex: ifindex for @sk_rx_dst
+  *	@sk_rx_dst_cookie: cookie for @sk_rx_dst
   *	@sk_dst_cache: destination cache
   *	@sk_dst_pending_confirm: need to confirm neighbour
   *	@sk_policy: flow policy
@@ -432,6 +433,7 @@ struct sock {
 #endif
 	struct dst_entry	*sk_rx_dst;
 	int			sk_rx_dst_ifindex;
+	u32			sk_rx_dst_cookie;
 
 	struct dst_entry __rcu	*sk_dst_cache;
 	atomic_t		sk_omem_alloc;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3e8669b6d636..50d9578e945b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -109,7 +109,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
 
 		sk->sk_rx_dst = dst;
 		sk->sk_rx_dst_ifindex = skb->skb_iif;
-		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
 	}
 }
 
@@ -1511,7 +1511,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		if (dst) {
 			if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
 			    INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
-					    dst, np->rx_dst_cookie) == NULL) {
+					    dst, sk->sk_rx_dst_cookie) == NULL) {
 				dst_release(dst);
 				sk->sk_rx_dst = NULL;
 			}
@@ -1872,7 +1872,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
 			if (dst)
-				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
+				dst = dst_check(dst, sk->sk_rx_dst_cookie);
 			if (dst &&
 			    sk->sk_rx_dst_ifindex == skb->skb_iif)
 				skb_dst_set_noref(skb, dst);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8d785232b479..14a94cddcf0b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -884,7 +884,7 @@ static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
 	if (udp_sk_rx_dst_set(sk, dst)) {
 		const struct rt6_info *rt = (const struct rt6_info *)dst;
 
-		inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+		sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
 	}
 }
 
@@ -1073,7 +1073,7 @@ INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
 	dst = READ_ONCE(sk->sk_rx_dst);
 
 	if (dst)
-		dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
+		dst = dst_check(dst, sk->sk_rx_dst_cookie);
 	if (dst) {
 		/* set noref for now.
 		 * any place which wants to hold dst has to call
-- 
cgit v1.2.3


From 79ca6f74dae067681a779fd573c2eb59649989bc Mon Sep 17 00:00:00 2001
From: Hao Wu <hao.wu@rubrik.com>
Date: Wed, 8 Sep 2021 02:26:06 -0700
Subject: tpm: fix Atmel TPM crash caused by too frequent queries

The Atmel TPM 1.2 chips crash with error
`tpm_try_transmit: send(): error -62` since kernel 4.14.
It is observed from the kernel log after running `tpm_sealdata -z`.
The error thrown from the command is as follows
```
$ tpm_sealdata -z
Tspi_Key_LoadKey failed: 0x00001087 - layer=tddl,
code=0087 (135), I/O error
```

The issue was reproduced with the following Atmel TPM chip:
```
$ tpm_version
T0  TPM 1.2 Version Info:
  Chip Version:        1.2.66.1
  Spec Level:          2
  Errata Revision:     3
  TPM Vendor ID:       ATML
  TPM Version:         01010000
  Manufacturer Info:   41544d4c
```

The root cause of the issue is due to the TPM calls to msleep()
were replaced with usleep_range() [1], which reduces
the actual timeout. Via experiments, it is observed that
the original msleep(5) actually sleeps for 15ms.
Because of a known timeout issue in Atmel TPM 1.2 chip,
the shorter timeout than 15ms can cause the error described above.

A few further changes in kernel 4.16 [2] and 4.18 [3, 4] further
reduced the timeout to less than 1ms. With experiments,
the problematic timeout in the latest kernel is the one
for `wait_for_tpm_stat`.

To fix it, the patch reverts the timeout of `wait_for_tpm_stat`
to 15ms for all Atmel TPM 1.2 chips, but leave it untouched
for Ateml TPM 2.0 chip, and chips from other vendors.
As explained above, the chosen 15ms timeout is
the actual timeout before this issue introduced,
thus the old value is used here.
Particularly, TPM_ATML_TIMEOUT_WAIT_STAT_MIN is set to 14700us,
TPM_ATML_TIMEOUT_WAIT_STAT_MIN is set to 15000us according to
the existing TPM_TIMEOUT_RANGE_US (300us).
The fixed has been tested in the system with the affected Atmel chip
with no issues observed after boot up.

References:
[1] 9f3fc7bcddcb tpm: replace msleep() with usleep_range() in TPM
1.2/2.0 generic drivers
[2] cf151a9a44d5 tpm: reduce tpm polling delay in tpm_tis_core
[3] 59f5a6b07f64 tpm: reduce poll sleep time in tpm_transmit()
[4] 424eaf910c32 tpm: reduce polling time to usecs for even finer
granularity

Fixes: 9f3fc7bcddcb ("tpm: replace msleep() with usleep_range() in TPM 1.2/2.0 generic drivers")
Link: https://patchwork.kernel.org/project/linux-integrity/patch/20200926223150.109645-1-hao.wu@rubrik.com/
Signed-off-by: Hao Wu <hao.wu@rubrik.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
 drivers/char/tpm/tpm_tis_core.c | 26 ++++++++++++++++++--------
 drivers/char/tpm/tpm_tis_core.h |  4 ++++
 include/linux/tpm.h             |  1 +
 3 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 69579efb247b..b2659a4c4016 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -48,6 +48,7 @@ static int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask,
 		unsigned long timeout, wait_queue_head_t *queue,
 		bool check_cancel)
 {
+	struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
 	unsigned long stop;
 	long rc;
 	u8 status;
@@ -80,8 +81,8 @@ again:
 		}
 	} else {
 		do {
-			usleep_range(TPM_TIMEOUT_USECS_MIN,
-				     TPM_TIMEOUT_USECS_MAX);
+			usleep_range(priv->timeout_min,
+				     priv->timeout_max);
 			status = chip->ops->status(chip);
 			if ((status & mask) == mask)
 				return 0;
@@ -945,7 +946,22 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 	chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
 	chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
 	chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
+	priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
+	priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
 	priv->phy_ops = phy_ops;
+
+	rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor);
+	if (rc < 0)
+		goto out_err;
+
+	priv->manufacturer_id = vendor;
+
+	if (priv->manufacturer_id == TPM_VID_ATML &&
+		!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+		priv->timeout_min = TIS_TIMEOUT_MIN_ATML;
+		priv->timeout_max = TIS_TIMEOUT_MAX_ATML;
+	}
+
 	dev_set_drvdata(&chip->dev, priv);
 
 	if (is_bsw()) {
@@ -988,12 +1004,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 	if (rc)
 		goto out_err;
 
-	rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor);
-	if (rc < 0)
-		goto out_err;
-
-	priv->manufacturer_id = vendor;
-
 	rc = tpm_tis_read8(priv, TPM_RID(0), &rid);
 	if (rc < 0)
 		goto out_err;
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index b2a3c6c72882..3be24f221e32 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -54,6 +54,8 @@ enum tis_defaults {
 	TIS_MEM_LEN = 0x5000,
 	TIS_SHORT_TIMEOUT = 750,	/* ms */
 	TIS_LONG_TIMEOUT = 2000,	/* 2 sec */
+	TIS_TIMEOUT_MIN_ATML = 14700,	/* usecs */
+	TIS_TIMEOUT_MAX_ATML = 15000,	/* usecs */
 };
 
 /* Some timeout values are needed before it is known whether the chip is
@@ -98,6 +100,8 @@ struct tpm_tis_data {
 	wait_queue_head_t read_queue;
 	const struct tpm_tis_phy_ops *phy_ops;
 	unsigned short rng_quality;
+	unsigned int timeout_min; /* usecs */
+	unsigned int timeout_max; /* usecs */
 };
 
 struct tpm_tis_phy_ops {
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index aa11fe323c56..12d827734686 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -269,6 +269,7 @@ enum tpm2_cc_attrs {
 #define TPM_VID_INTEL    0x8086
 #define TPM_VID_WINBOND  0x1050
 #define TPM_VID_STM      0x104A
+#define TPM_VID_ATML     0x1114
 
 enum tpm_chip_flags {
 	TPM_CHIP_FLAG_TPM2		= BIT(1),
-- 
cgit v1.2.3


From 1bdda24c4af64cd2d65dec5192ab624c5fee7ca0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 21 Oct 2021 15:55:05 -0700
Subject: signal: Add an optional check for altstack size

New x86 FPU features will be very large, requiring ~10k of stack in
signal handlers.  These new features require a new approach called
"dynamic features".

The kernel currently tries to ensure that altstacks are reasonably
sized. Right now, on x86, sys_sigaltstack() requires a size of >=2k.
However, that 2k is a constant. Simply raising that 2k requirement
to >10k for the new features would break existing apps which have a
compiled-in size of 2k.

Instead of universally enforcing a larger stack, prohibit a process from
using dynamic features without properly-sized altstacks. This must be
enforced in two places:

 * A dynamic feature can not be enabled without an large-enough altstack
   for each process thread.
 * Once a dynamic feature is enabled, any request to install a too-small
   altstack will be rejected

The dynamic feature enabling code must examine each thread in a
process to ensure that the altstacks are large enough. Add a new lock
(sigaltstack_lock()) to ensure that threads can not race and change
their altstack after being examined.

Add the infrastructure in form of a config option and provide empty
stubs for architectures which do not need dynamic altstack size checks.

This implementation will be fleshed out for x86 in a future patch called

  x86/arch_prctl: Add controls for dynamic XSTATE components

  [dhansen: commit message. ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20211021225527.10184-2-chang.seok.bae@intel.com
---
 arch/Kconfig           |  3 +++
 include/linux/signal.h |  6 ++++++
 kernel/signal.c        | 35 +++++++++++++++++++++++++++++------
 3 files changed, 38 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 8df1c7102643..af5cf3009b4f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1288,6 +1288,9 @@ config ARCH_HAS_ELFCORE_COMPAT
 config ARCH_HAS_PARANOID_L1D_FLUSH
 	bool
 
+config DYNAMIC_SIGFRAME
+	bool
+
 source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3f96a6374e4f..7d34105e20c6 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -464,6 +464,12 @@ int __save_altstack(stack_t __user *, unsigned long);
 	unsafe_put_user(t->sas_ss_size, &__uss->ss_size, label); \
 } while (0);
 
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+bool sigaltstack_size_valid(size_t ss_size);
+#else
+static inline bool sigaltstack_size_valid(size_t size) { return true; }
+#endif /* !CONFIG_DYNAMIC_SIGFRAME */
+
 #ifdef CONFIG_PROC_FS
 struct seq_file;
 extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
diff --git a/kernel/signal.c b/kernel/signal.c
index 952741f6d0f9..9278f5291ed6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -4151,11 +4151,29 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 	return 0;
 }
 
+#ifdef CONFIG_DYNAMIC_SIGFRAME
+static inline void sigaltstack_lock(void)
+	__acquires(&current->sighand->siglock)
+{
+	spin_lock_irq(&current->sighand->siglock);
+}
+
+static inline void sigaltstack_unlock(void)
+	__releases(&current->sighand->siglock)
+{
+	spin_unlock_irq(&current->sighand->siglock);
+}
+#else
+static inline void sigaltstack_lock(void) { }
+static inline void sigaltstack_unlock(void) { }
+#endif
+
 static int
 do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
 		size_t min_ss_size)
 {
 	struct task_struct *t = current;
+	int ret = 0;
 
 	if (oss) {
 		memset(oss, 0, sizeof(stack_t));
@@ -4179,19 +4197,24 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
 				ss_mode != 0))
 			return -EINVAL;
 
+		sigaltstack_lock();
 		if (ss_mode == SS_DISABLE) {
 			ss_size = 0;
 			ss_sp = NULL;
 		} else {
 			if (unlikely(ss_size < min_ss_size))
-				return -ENOMEM;
+				ret = -ENOMEM;
+			if (!sigaltstack_size_valid(ss_size))
+				ret = -ENOMEM;
 		}
-
-		t->sas_ss_sp = (unsigned long) ss_sp;
-		t->sas_ss_size = ss_size;
-		t->sas_ss_flags = ss_flags;
+		if (!ret) {
+			t->sas_ss_sp = (unsigned long) ss_sp;
+			t->sas_ss_size = ss_size;
+			t->sas_ss_flags = ss_flags;
+		}
+		sigaltstack_unlock();
 	}
-	return 0;
+	return ret;
 }
 
 SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
-- 
cgit v1.2.3


From 0953fb263714e1c8c1c3d395036d9a14310081dd Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 20 Oct 2021 20:23:09 +0100
Subject: irq: remove handle_domain_{irq,nmi}()

Now that entry code handles IRQ entry (including setting the IRQ regs)
before calling irqchip code, irqchip code can safely call
generic_handle_domain_irq(), and there's no functional reason for it to
call handle_domain_irq().

Let's cement this split of responsibility and remove handle_domain_irq()
entirely, updating irqchip drivers to call generic_handle_domain_irq().

For consistency, handle_domain_nmi() is similarly removed and replaced
with a generic_handle_domain_nmi() function which also does not perform
any entry logic.

Previously handle_domain_{irq,nmi}() had a WARN_ON() which would fire
when they were called in an inappropriate context. So that we can
identify similar issues going forward, similar WARN_ON_ONCE() logic is
added to the generic_handle_*() functions, and comments are updated for
clarity and consistency.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/core-api/irq/irq-domain.rst |  3 --
 arch/arm/Kconfig                          |  1 -
 arch/arm/mach-imx/avic.c                  |  2 +-
 arch/arm/mach-imx/tzic.c                  |  2 +-
 arch/arm/mach-omap1/irq.c                 |  2 +-
 arch/arm/mach-s3c/irq-s3c24xx.c           |  2 +-
 arch/arm64/Kconfig                        |  1 -
 arch/csky/Kconfig                         |  1 -
 arch/openrisc/Kconfig                     |  1 -
 arch/riscv/Kconfig                        |  1 -
 drivers/irqchip/irq-apple-aic.c           | 20 ++++-----
 drivers/irqchip/irq-armada-370-xp.c       | 13 ++----
 drivers/irqchip/irq-aspeed-vic.c          |  2 +-
 drivers/irqchip/irq-atmel-aic.c           |  2 +-
 drivers/irqchip/irq-atmel-aic5.c          |  2 +-
 drivers/irqchip/irq-bcm2835.c             |  2 +-
 drivers/irqchip/irq-bcm2836.c             |  2 +-
 drivers/irqchip/irq-clps711x.c            |  8 ++--
 drivers/irqchip/irq-csky-apb-intc.c       |  2 +-
 drivers/irqchip/irq-csky-mpintc.c         |  4 +-
 drivers/irqchip/irq-davinci-aintc.c       |  2 +-
 drivers/irqchip/irq-davinci-cp-intc.c     |  2 +-
 drivers/irqchip/irq-digicolor.c           |  2 +-
 drivers/irqchip/irq-dw-apb-ictl.c         |  2 +-
 drivers/irqchip/irq-ftintc010.c           |  2 +-
 drivers/irqchip/irq-gic-v3.c              |  4 +-
 drivers/irqchip/irq-gic.c                 |  2 +-
 drivers/irqchip/irq-hip04.c               |  2 +-
 drivers/irqchip/irq-ixp4xx.c              |  4 +-
 drivers/irqchip/irq-lpc32xx.c             |  2 +-
 drivers/irqchip/irq-mmp.c                 |  4 +-
 drivers/irqchip/irq-mxs.c                 |  2 +-
 drivers/irqchip/irq-nvic.c                |  6 +--
 drivers/irqchip/irq-omap-intc.c           |  2 +-
 drivers/irqchip/irq-or1k-pic.c            |  2 +-
 drivers/irqchip/irq-orion.c               |  4 +-
 drivers/irqchip/irq-rda-intc.c            |  2 +-
 drivers/irqchip/irq-riscv-intc.c          |  2 +-
 drivers/irqchip/irq-sa11x0.c              |  4 +-
 drivers/irqchip/irq-sun4i.c               |  2 +-
 drivers/irqchip/irq-versatile-fpga.c      |  2 +-
 drivers/irqchip/irq-vic.c                 |  2 +-
 drivers/irqchip/irq-vt8500.c              |  2 +-
 drivers/irqchip/irq-wpcm450-aic.c         |  2 +-
 drivers/irqchip/irq-zevio.c               |  2 +-
 include/linux/irqdesc.h                   |  9 +---
 kernel/irq/Kconfig                        |  7 ----
 kernel/irq/irqdesc.c                      | 68 ++++++++-----------------------
 48 files changed, 80 insertions(+), 141 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/irq/irq-domain.rst b/Documentation/core-api/irq/irq-domain.rst
index 9c0e8758037a..d30b4d0a9769 100644
--- a/Documentation/core-api/irq/irq-domain.rst
+++ b/Documentation/core-api/irq/irq-domain.rst
@@ -67,9 +67,6 @@ variety of methods:
   deprecated
 - generic_handle_domain_irq() handles an interrupt described by a
   domain and a hwirq number
-- handle_domain_irq() does the same thing for root interrupt
-  controllers and deals with the set_irq_reg()/irq_enter() sequences
-  that most architecture requires
 
 Note that irq domain lookups must happen in contexts that are
 compatible with a RCU read-side critical section.
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fc196421b2ce..3361a6c29ee9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -64,7 +64,6 @@ config ARM
 	select GENERIC_PCI_IOMAP
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
-	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT
 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c
index 21bce4049cec..cf6546ddc7a3 100644
--- a/arch/arm/mach-imx/avic.c
+++ b/arch/arm/mach-imx/avic.c
@@ -154,7 +154,7 @@ static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs)
 		if (nivector == 0xffff)
 			break;
 
-		handle_domain_irq(domain, nivector, regs);
+		generic_handle_domain_irq(domain, nivector);
 	} while (1);
 }
 
diff --git a/arch/arm/mach-imx/tzic.c b/arch/arm/mach-imx/tzic.c
index 479a01bdac56..8b3d98d288d9 100644
--- a/arch/arm/mach-imx/tzic.c
+++ b/arch/arm/mach-imx/tzic.c
@@ -134,7 +134,7 @@ static void __exception_irq_entry tzic_handle_irq(struct pt_regs *regs)
 			while (stat) {
 				handled = 1;
 				irqofs = fls(stat) - 1;
-				handle_domain_irq(domain, irqofs + i * 32, regs);
+				generic_handle_domain_irq(domain, irqofs + i * 32);
 				stat &= ~(1 << irqofs);
 			}
 		}
diff --git a/arch/arm/mach-omap1/irq.c b/arch/arm/mach-omap1/irq.c
index b11edc8a46f0..ee6a93083154 100644
--- a/arch/arm/mach-omap1/irq.c
+++ b/arch/arm/mach-omap1/irq.c
@@ -165,7 +165,7 @@ asmlinkage void __exception_irq_entry omap1_handle_irq(struct pt_regs *regs)
 		}
 irq:
 		if (irqnr)
-			handle_domain_irq(domain, irqnr, regs);
+			generic_handle_domain_irq(domain, irqnr);
 		else
 			break;
 	} while (irqnr);
diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c
index 3edc5f614eef..45dfd546e6fa 100644
--- a/arch/arm/mach-s3c/irq-s3c24xx.c
+++ b/arch/arm/mach-s3c/irq-s3c24xx.c
@@ -354,7 +354,7 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc,
 	if (!(pnd & (1 << offset)))
 		offset =  __ffs(pnd);
 
-	handle_domain_irq(intc->domain, intc_offset + offset, regs);
+	generic_handle_domain_irq(intc->domain, intc_offset + offset);
 	return true;
 }
 
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5c7ae4c3954b..e6593a03ea27 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -133,7 +133,6 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_GETTIMEOFDAY
 	select GENERIC_VDSO_TIME_NS
-	select HANDLE_DOMAIN_IRQ
 	select HARDIRQS_SW_RESEND
 	select HAVE_MOVE_PMD
 	select HAVE_MOVE_PUD
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 9d4d898df76b..e0bd71b9e23f 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -17,7 +17,6 @@ config CSKY
 	select CSKY_APB_INTC
 	select DMA_DIRECT_REMAP
 	select IRQ_DOMAIN
-	select HANDLE_DOMAIN_IRQ
 	select DW_APB_TIMER_OF
 	select GENERIC_IOREMAP
 	select GENERIC_LIB_ASHLDI3
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e804026b4797..c2491b295d60 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -13,7 +13,6 @@ config OPENRISC
 	select OF
 	select OF_EARLY_FLATTREE
 	select IRQ_DOMAIN
-	select HANDLE_DOMAIN_IRQ
 	select GPIOLIB
 	select HAVE_ARCH_TRACEHOOK
 	select SPARSE_IRQ
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 301a54233c7e..353e28f5f849 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -62,7 +62,6 @@ config RISCV
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL if MMU && 64BIT
-	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c
index 6fc145aacaf0..3759dc36cc8f 100644
--- a/drivers/irqchip/irq-apple-aic.c
+++ b/drivers/irqchip/irq-apple-aic.c
@@ -245,7 +245,7 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs)
 		irq = FIELD_GET(AIC_EVENT_NUM, event);
 
 		if (type == AIC_EVENT_TYPE_HW)
-			handle_domain_irq(aic_irqc->hw_domain, irq, regs);
+			generic_handle_domain_irq(aic_irqc->hw_domain, irq);
 		else if (type == AIC_EVENT_TYPE_IPI && irq == 1)
 			aic_handle_ipi(regs);
 		else if (event != 0)
@@ -392,25 +392,25 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
 	}
 
 	if (TIMER_FIRING(read_sysreg(cntp_ctl_el0)))
-		handle_domain_irq(aic_irqc->hw_domain,
-				  aic_irqc->nr_hw + AIC_TMR_EL0_PHYS, regs);
+		generic_handle_domain_irq(aic_irqc->hw_domain,
+					  aic_irqc->nr_hw + AIC_TMR_EL0_PHYS);
 
 	if (TIMER_FIRING(read_sysreg(cntv_ctl_el0)))
-		handle_domain_irq(aic_irqc->hw_domain,
-				  aic_irqc->nr_hw + AIC_TMR_EL0_VIRT, regs);
+		generic_handle_domain_irq(aic_irqc->hw_domain,
+					  aic_irqc->nr_hw + AIC_TMR_EL0_VIRT);
 
 	if (is_kernel_in_hyp_mode()) {
 		uint64_t enabled = read_sysreg_s(SYS_IMP_APL_VM_TMR_FIQ_ENA_EL2);
 
 		if ((enabled & VM_TMR_FIQ_ENABLE_P) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTP_CTL_EL02)))
-			handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL02_PHYS, regs);
+			generic_handle_domain_irq(aic_irqc->hw_domain,
+						  aic_irqc->nr_hw + AIC_TMR_EL02_PHYS);
 
 		if ((enabled & VM_TMR_FIQ_ENABLE_V) &&
 		    TIMER_FIRING(read_sysreg_s(SYS_CNTV_CTL_EL02)))
-			handle_domain_irq(aic_irqc->hw_domain,
-					  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT, regs);
+			generic_handle_domain_irq(aic_irqc->hw_domain,
+						  aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
 	}
 
 	if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
@@ -674,7 +674,7 @@ static void aic_handle_ipi(struct pt_regs *regs)
 	firing = atomic_fetch_andnot(enabled, this_cpu_ptr(&aic_vipi_flag)) & enabled;
 
 	for_each_set_bit(i, &firing, AIC_NR_SWIPI)
-		handle_domain_irq(aic_irqc->ipi_domain, i, regs);
+		generic_handle_domain_irq(aic_irqc->ipi_domain, i);
 
 	/*
 	 * No ordering needed here; at worst this just changes the timing of
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 53e0fb0562c1..80906bfec845 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -589,12 +589,7 @@ static void armada_370_xp_handle_msi_irq(struct pt_regs *regs, bool is_chained)
 
 		irq = msinr - PCI_MSI_DOORBELL_START;
 
-		if (is_chained)
-			generic_handle_domain_irq(armada_370_xp_msi_inner_domain,
-						  irq);
-		else
-			handle_domain_irq(armada_370_xp_msi_inner_domain,
-					  irq, regs);
+		generic_handle_domain_irq(armada_370_xp_msi_inner_domain, irq);
 	}
 }
 #else
@@ -646,8 +641,8 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 			break;
 
 		if (irqnr > 1) {
-			handle_domain_irq(armada_370_xp_mpic_domain,
-					  irqnr, regs);
+			generic_handle_domain_irq(armada_370_xp_mpic_domain,
+						  irqnr);
 			continue;
 		}
 
@@ -666,7 +661,7 @@ armada_370_xp_handle_irq(struct pt_regs *regs)
 				& IPI_DOORBELL_MASK;
 
 			for_each_set_bit(ipi, &ipimask, IPI_DOORBELL_END)
-				handle_domain_irq(ipi_domain, ipi, regs);
+				generic_handle_domain_irq(ipi_domain, ipi);
 		}
 #endif
 
diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c
index 58717cd44f99..62ccf2c0c414 100644
--- a/drivers/irqchip/irq-aspeed-vic.c
+++ b/drivers/irqchip/irq-aspeed-vic.c
@@ -100,7 +100,7 @@ static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs)
 		if (stat == 0)
 			break;
 		irq += ffs(stat) - 1;
-		handle_domain_irq(vic->dom, irq, regs);
+		generic_handle_domain_irq(vic->dom, irq);
 	}
 }
 
diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 2c999dc310c1..4631f6847953 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -71,7 +71,7 @@ aic_handle(struct pt_regs *regs)
 	if (!irqstat)
 		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
 	else
-		handle_domain_irq(aic_domain, irqnr, regs);
+		generic_handle_domain_irq(aic_domain, irqnr);
 }
 
 static int aic_retrigger(struct irq_data *d)
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index fb4ad2aaa727..145535bd7560 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -80,7 +80,7 @@ aic5_handle(struct pt_regs *regs)
 	if (!irqstat)
 		irq_reg_writel(bgc, 0, AT91_AIC5_EOICR);
 	else
-		handle_domain_irq(aic5_domain, irqnr, regs);
+		generic_handle_domain_irq(aic5_domain, irqnr);
 }
 
 static void aic5_mask(struct irq_data *d)
diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c
index adc1556ed332..e94e2882286c 100644
--- a/drivers/irqchip/irq-bcm2835.c
+++ b/drivers/irqchip/irq-bcm2835.c
@@ -246,7 +246,7 @@ static void __exception_irq_entry bcm2835_handle_irq(
 	u32 hwirq;
 
 	while ((hwirq = get_next_armctrl_hwirq()) != ~0)
-		handle_domain_irq(intc.domain, hwirq, regs);
+		generic_handle_domain_irq(intc.domain, hwirq);
 }
 
 static void bcm2836_chained_handle_irq(struct irq_desc *desc)
diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c
index 501facdb4570..51491c3c6fdd 100644
--- a/drivers/irqchip/irq-bcm2836.c
+++ b/drivers/irqchip/irq-bcm2836.c
@@ -143,7 +143,7 @@ __exception_irq_entry bcm2836_arm_irqchip_handle_irq(struct pt_regs *regs)
 	if (stat) {
 		u32 hwirq = ffs(stat) - 1;
 
-		handle_domain_irq(intc.domain, hwirq, regs);
+		generic_handle_domain_irq(intc.domain, hwirq);
 	}
 }
 
diff --git a/drivers/irqchip/irq-clps711x.c b/drivers/irqchip/irq-clps711x.c
index d0da29aeedc8..77ebe7e47e0e 100644
--- a/drivers/irqchip/irq-clps711x.c
+++ b/drivers/irqchip/irq-clps711x.c
@@ -77,14 +77,14 @@ static asmlinkage void __exception_irq_entry clps711x_irqh(struct pt_regs *regs)
 		irqstat = readw_relaxed(clps711x_intc->intmr[0]) &
 			  readw_relaxed(clps711x_intc->intsr[0]);
 		if (irqstat)
-			handle_domain_irq(clps711x_intc->domain,
-					  fls(irqstat) - 1, regs);
+			generic_handle_domain_irq(clps711x_intc->domain,
+						  fls(irqstat) - 1);
 
 		irqstat = readw_relaxed(clps711x_intc->intmr[1]) &
 			  readw_relaxed(clps711x_intc->intsr[1]);
 		if (irqstat)
-			handle_domain_irq(clps711x_intc->domain,
-					  fls(irqstat) - 1 + 16, regs);
+			generic_handle_domain_irq(clps711x_intc->domain,
+						  fls(irqstat) - 1 + 16);
 	} while (irqstat);
 }
 
diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c
index ab91afa86755..d36f536506ba 100644
--- a/drivers/irqchip/irq-csky-apb-intc.c
+++ b/drivers/irqchip/irq-csky-apb-intc.c
@@ -138,7 +138,7 @@ static inline bool handle_irq_perbit(struct pt_regs *regs, u32 hwirq,
 	if (hwirq == 0)
 		return 0;
 
-	handle_domain_irq(root_domain, irq_base + __fls(hwirq), regs);
+	generic_handle_domain_irq(root_domain, irq_base + __fls(hwirq));
 
 	return 1;
 }
diff --git a/drivers/irqchip/irq-csky-mpintc.c b/drivers/irqchip/irq-csky-mpintc.c
index a1534edef7fa..cb403c960ac0 100644
--- a/drivers/irqchip/irq-csky-mpintc.c
+++ b/drivers/irqchip/irq-csky-mpintc.c
@@ -74,8 +74,8 @@ static void csky_mpintc_handler(struct pt_regs *regs)
 {
 	void __iomem *reg_base = this_cpu_read(intcl_reg);
 
-	handle_domain_irq(root_domain,
-		readl_relaxed(reg_base + INTCL_RDYIR), regs);
+	generic_handle_domain_irq(root_domain,
+		readl_relaxed(reg_base + INTCL_RDYIR));
 }
 
 static void csky_mpintc_enable(struct irq_data *d)
diff --git a/drivers/irqchip/irq-davinci-aintc.c b/drivers/irqchip/irq-davinci-aintc.c
index 810ccc4fe476..123eb7bfc117 100644
--- a/drivers/irqchip/irq-davinci-aintc.c
+++ b/drivers/irqchip/irq-davinci-aintc.c
@@ -73,7 +73,7 @@ davinci_aintc_handle_irq(struct pt_regs *regs)
 	irqnr >>= 2;
 	irqnr -= 1;
 
-	handle_domain_irq(davinci_aintc_irq_domain, irqnr, regs);
+	generic_handle_domain_irq(davinci_aintc_irq_domain, irqnr);
 }
 
 /* ARM Interrupt Controller Initialization */
diff --git a/drivers/irqchip/irq-davinci-cp-intc.c b/drivers/irqchip/irq-davinci-cp-intc.c
index 276da2772e7f..7482c8ed34b2 100644
--- a/drivers/irqchip/irq-davinci-cp-intc.c
+++ b/drivers/irqchip/irq-davinci-cp-intc.c
@@ -135,7 +135,7 @@ davinci_cp_intc_handle_irq(struct pt_regs *regs)
 		return;
 	}
 
-	handle_domain_irq(davinci_cp_intc_irq_domain, irqnr, regs);
+	generic_handle_domain_irq(davinci_cp_intc_irq_domain, irqnr);
 }
 
 static int davinci_cp_intc_host_map(struct irq_domain *h, unsigned int virq,
diff --git a/drivers/irqchip/irq-digicolor.c b/drivers/irqchip/irq-digicolor.c
index fc38d2da11b9..3b0d78aac13b 100644
--- a/drivers/irqchip/irq-digicolor.c
+++ b/drivers/irqchip/irq-digicolor.c
@@ -50,7 +50,7 @@ static void __exception_irq_entry digicolor_handle_irq(struct pt_regs *regs)
 				return;
 		}
 
-		handle_domain_irq(digicolor_irq_domain, hwirq, regs);
+		generic_handle_domain_irq(digicolor_irq_domain, hwirq);
 	} while (1);
 }
 
diff --git a/drivers/irqchip/irq-dw-apb-ictl.c b/drivers/irqchip/irq-dw-apb-ictl.c
index a67266e44491..d5c1c750c8d2 100644
--- a/drivers/irqchip/irq-dw-apb-ictl.c
+++ b/drivers/irqchip/irq-dw-apb-ictl.c
@@ -42,7 +42,7 @@ static void __irq_entry dw_apb_ictl_handle_irq(struct pt_regs *regs)
 		while (stat) {
 			u32 hwirq = ffs(stat) - 1;
 
-			handle_domain_irq(d, hwirq, regs);
+			generic_handle_domain_irq(d, hwirq);
 			stat &= ~BIT(hwirq);
 		}
 	}
diff --git a/drivers/irqchip/irq-ftintc010.c b/drivers/irqchip/irq-ftintc010.c
index 0bf98425dca5..5cc268880f8e 100644
--- a/drivers/irqchip/irq-ftintc010.c
+++ b/drivers/irqchip/irq-ftintc010.c
@@ -134,7 +134,7 @@ asmlinkage void __exception_irq_entry ft010_irqchip_handle_irq(struct pt_regs *r
 
 	while ((status = readl(FT010_IRQ_STATUS(f->base)))) {
 		irq = ffs(status) - 1;
-		handle_domain_irq(f->domain, irq, regs);
+		generic_handle_domain_irq(f->domain, irq);
 	}
 }
 
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index fd4e9a37fea6..daec3309b014 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -660,7 +660,7 @@ static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs)
 	 * PSR.I will be restored when we ERET to the
 	 * interrupted context.
 	 */
-	err = handle_domain_nmi(gic_data.domain, irqnr, regs);
+	err = generic_handle_domain_nmi(gic_data.domain, irqnr);
 	if (err)
 		gic_deactivate_unhandled(irqnr);
 
@@ -728,7 +728,7 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 	else
 		isb();
 
-	if (handle_domain_irq(gic_data.domain, irqnr, regs)) {
+	if (generic_handle_domain_irq(gic_data.domain, irqnr)) {
 		WARN_ONCE(true, "Unexpected interrupt received!\n");
 		gic_deactivate_unhandled(irqnr);
 	}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 5f22c9d65e57..b8bb46c65a97 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -369,7 +369,7 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 			this_cpu_write(sgi_intid, irqstat);
 		}
 
-		handle_domain_irq(gic->domain, irqnr, regs);
+		generic_handle_domain_irq(gic->domain, irqnr);
 	} while (1);
 }
 
diff --git a/drivers/irqchip/irq-hip04.c b/drivers/irqchip/irq-hip04.c
index 058ebaebe2c4..46161f6ff289 100644
--- a/drivers/irqchip/irq-hip04.c
+++ b/drivers/irqchip/irq-hip04.c
@@ -206,7 +206,7 @@ static void __exception_irq_entry hip04_handle_irq(struct pt_regs *regs)
 		irqnr = irqstat & GICC_IAR_INT_ID_MASK;
 
 		if (irqnr <= HIP04_MAX_IRQS)
-			handle_domain_irq(hip04_data.domain, irqnr, regs);
+			generic_handle_domain_irq(hip04_data.domain, irqnr);
 	} while (irqnr > HIP04_MAX_IRQS);
 }
 
diff --git a/drivers/irqchip/irq-ixp4xx.c b/drivers/irqchip/irq-ixp4xx.c
index 37e0749215c7..fb68f8c59fbb 100644
--- a/drivers/irqchip/irq-ixp4xx.c
+++ b/drivers/irqchip/irq-ixp4xx.c
@@ -114,7 +114,7 @@ asmlinkage void __exception_irq_entry ixp4xx_handle_irq(struct pt_regs *regs)
 
 	status = __raw_readl(ixi->irqbase + IXP4XX_ICIP);
 	for_each_set_bit(i, &status, 32)
-		handle_domain_irq(ixi->domain, i, regs);
+		generic_handle_domain_irq(ixi->domain, i);
 
 	/*
 	 * IXP465/IXP435 has an upper IRQ status register
@@ -122,7 +122,7 @@ asmlinkage void __exception_irq_entry ixp4xx_handle_irq(struct pt_regs *regs)
 	if (ixi->is_356) {
 		status = __raw_readl(ixi->irqbase + IXP4XX_ICIP2);
 		for_each_set_bit(i, &status, 32)
-			handle_domain_irq(ixi->domain, i + 32, regs);
+			generic_handle_domain_irq(ixi->domain, i + 32);
 	}
 }
 
diff --git a/drivers/irqchip/irq-lpc32xx.c b/drivers/irqchip/irq-lpc32xx.c
index 5e6f6e25f2ae..a29357f39450 100644
--- a/drivers/irqchip/irq-lpc32xx.c
+++ b/drivers/irqchip/irq-lpc32xx.c
@@ -126,7 +126,7 @@ static void __exception_irq_entry lpc32xx_handle_irq(struct pt_regs *regs)
 	while (hwirq) {
 		irq = __ffs(hwirq);
 		hwirq &= ~BIT(irq);
-		handle_domain_irq(lpc32xx_mic_irqc->domain, irq, regs);
+		generic_handle_domain_irq(lpc32xx_mic_irqc->domain, irq);
 	}
 }
 
diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c
index 4a74ac7b7c42..83455ca72439 100644
--- a/drivers/irqchip/irq-mmp.c
+++ b/drivers/irqchip/irq-mmp.c
@@ -230,7 +230,7 @@ static void __exception_irq_entry mmp_handle_irq(struct pt_regs *regs)
 	if (!(hwirq & SEL_INT_PENDING))
 		return;
 	hwirq &= SEL_INT_NUM_MASK;
-	handle_domain_irq(icu_data[0].domain, hwirq, regs);
+	generic_handle_domain_irq(icu_data[0].domain, hwirq);
 }
 
 static void __exception_irq_entry mmp2_handle_irq(struct pt_regs *regs)
@@ -241,7 +241,7 @@ static void __exception_irq_entry mmp2_handle_irq(struct pt_regs *regs)
 	if (!(hwirq & SEL_INT_PENDING))
 		return;
 	hwirq &= SEL_INT_NUM_MASK;
-	handle_domain_irq(icu_data[0].domain, hwirq, regs);
+	generic_handle_domain_irq(icu_data[0].domain, hwirq);
 }
 
 /* MMP (ARMv5) */
diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
index d1f5740cd575..55cb6b5a686e 100644
--- a/drivers/irqchip/irq-mxs.c
+++ b/drivers/irqchip/irq-mxs.c
@@ -136,7 +136,7 @@ asmlinkage void __exception_irq_entry icoll_handle_irq(struct pt_regs *regs)
 
 	irqnr = __raw_readl(icoll_priv.stat);
 	__raw_writel(irqnr, icoll_priv.vector);
-	handle_domain_irq(icoll_domain, irqnr, regs);
+	generic_handle_domain_irq(icoll_domain, irqnr);
 }
 
 static int icoll_irq_domain_map(struct irq_domain *d, unsigned int virq,
diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c
index b2bd96253d40..63bac3f78863 100644
--- a/drivers/irqchip/irq-nvic.c
+++ b/drivers/irqchip/irq-nvic.c
@@ -37,9 +37,9 @@
 
 static struct irq_domain *nvic_irq_domain;
 
-static void __nvic_handle_irq(irq_hw_number_t hwirq, struct pt_regs *regs)
+static void __nvic_handle_irq(irq_hw_number_t hwirq)
 {
-	handle_domain_irq(nvic_irq_domain, hwirq, regs);
+	generic_handle_domain_irq(nvic_irq_domain, hwirq);
 }
 
 /*
@@ -53,7 +53,7 @@ nvic_handle_irq(irq_hw_number_t hwirq, struct pt_regs *regs)
 
 	irq_enter();
 	old_regs = set_irq_regs(regs);
-	__nvic_handle_irq(hwirq, regs);
+	__nvic_handle_irq(hwirq);
 	set_irq_regs(old_regs);
 	irq_exit();
 }
diff --git a/drivers/irqchip/irq-omap-intc.c b/drivers/irqchip/irq-omap-intc.c
index d360a6eddd6d..dc82162ba763 100644
--- a/drivers/irqchip/irq-omap-intc.c
+++ b/drivers/irqchip/irq-omap-intc.c
@@ -357,7 +357,7 @@ omap_intc_handle_irq(struct pt_regs *regs)
 	}
 
 	irqnr &= ACTIVEIRQ_MASK;
-	handle_domain_irq(domain, irqnr, regs);
+	generic_handle_domain_irq(domain, irqnr);
 }
 
 static int __init intc_of_init(struct device_node *node,
diff --git a/drivers/irqchip/irq-or1k-pic.c b/drivers/irqchip/irq-or1k-pic.c
index 03d2366118dd..49b47e787644 100644
--- a/drivers/irqchip/irq-or1k-pic.c
+++ b/drivers/irqchip/irq-or1k-pic.c
@@ -116,7 +116,7 @@ static void or1k_pic_handle_irq(struct pt_regs *regs)
 	int irq = -1;
 
 	while ((irq = pic_get_irq(irq + 1)) != NO_IRQ)
-		handle_domain_irq(root_domain, irq, regs);
+		generic_handle_domain_irq(root_domain, irq);
 }
 
 static int or1k_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
diff --git a/drivers/irqchip/irq-orion.c b/drivers/irqchip/irq-orion.c
index b6868f7b805a..17c2c7a07f10 100644
--- a/drivers/irqchip/irq-orion.c
+++ b/drivers/irqchip/irq-orion.c
@@ -42,8 +42,8 @@ __exception_irq_entry orion_handle_irq(struct pt_regs *regs)
 			gc->mask_cache;
 		while (stat) {
 			u32 hwirq = __fls(stat);
-			handle_domain_irq(orion_irq_domain,
-					  gc->irq_base + hwirq, regs);
+			generic_handle_domain_irq(orion_irq_domain,
+						  gc->irq_base + hwirq);
 			stat &= ~(1 << hwirq);
 		}
 	}
diff --git a/drivers/irqchip/irq-rda-intc.c b/drivers/irqchip/irq-rda-intc.c
index 960168303b73..9f0144a73777 100644
--- a/drivers/irqchip/irq-rda-intc.c
+++ b/drivers/irqchip/irq-rda-intc.c
@@ -53,7 +53,7 @@ static void __exception_irq_entry rda_handle_irq(struct pt_regs *regs)
 
 	while (stat) {
 		hwirq = __fls(stat);
-		handle_domain_irq(rda_irq_domain, hwirq, regs);
+		generic_handle_domain_irq(rda_irq_domain, hwirq);
 		stat &= ~BIT(hwirq);
 	}
 }
diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c
index 8017f6d32d52..b65bd8878d4f 100644
--- a/drivers/irqchip/irq-riscv-intc.c
+++ b/drivers/irqchip/irq-riscv-intc.c
@@ -37,7 +37,7 @@ static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
 		break;
 #endif
 	default:
-		handle_domain_irq(intc_domain, cause, regs);
+		generic_handle_domain_irq(intc_domain, cause);
 		break;
 	}
 }
diff --git a/drivers/irqchip/irq-sa11x0.c b/drivers/irqchip/irq-sa11x0.c
index dbccc7dafbf8..31c202a1ae62 100644
--- a/drivers/irqchip/irq-sa11x0.c
+++ b/drivers/irqchip/irq-sa11x0.c
@@ -140,8 +140,8 @@ sa1100_handle_irq(struct pt_regs *regs)
 		if (mask == 0)
 			break;
 
-		handle_domain_irq(sa1100_normal_irqdomain,
-				ffs(mask) - 1, regs);
+		generic_handle_domain_irq(sa1100_normal_irqdomain,
+					  ffs(mask) - 1);
 	} while (1);
 }
 
diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
index 8a315d6a3399..dd506ebfdacb 100644
--- a/drivers/irqchip/irq-sun4i.c
+++ b/drivers/irqchip/irq-sun4i.c
@@ -195,7 +195,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs)
 		return;
 
 	do {
-		handle_domain_irq(irq_ic_data->irq_domain, hwirq, regs);
+		generic_handle_domain_irq(irq_ic_data->irq_domain, hwirq);
 		hwirq = readl(irq_ic_data->irq_base +
 				SUN4I_IRQ_VECTOR_REG) >> 2;
 	} while (hwirq != 0);
diff --git a/drivers/irqchip/irq-versatile-fpga.c b/drivers/irqchip/irq-versatile-fpga.c
index 75be350cf82f..f2757b6aecc8 100644
--- a/drivers/irqchip/irq-versatile-fpga.c
+++ b/drivers/irqchip/irq-versatile-fpga.c
@@ -105,7 +105,7 @@ static int handle_one_fpga(struct fpga_irq_data *f, struct pt_regs *regs)
 
 	while ((status  = readl(f->base + IRQ_STATUS))) {
 		irq = ffs(status) - 1;
-		handle_domain_irq(f->domain, irq, regs);
+		generic_handle_domain_irq(f->domain, irq);
 		handled = 1;
 	}
 
diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
index 1e1f2d115257..9e3d5561e04e 100644
--- a/drivers/irqchip/irq-vic.c
+++ b/drivers/irqchip/irq-vic.c
@@ -208,7 +208,7 @@ static int handle_one_vic(struct vic_device *vic, struct pt_regs *regs)
 
 	while ((stat = readl_relaxed(vic->base + VIC_IRQ_STATUS))) {
 		irq = ffs(stat) - 1;
-		handle_domain_irq(vic->domain, irq, regs);
+		generic_handle_domain_irq(vic->domain, irq);
 		handled = 1;
 	}
 
diff --git a/drivers/irqchip/irq-vt8500.c b/drivers/irqchip/irq-vt8500.c
index 5bce936af5d9..e17dd3a8c2d5 100644
--- a/drivers/irqchip/irq-vt8500.c
+++ b/drivers/irqchip/irq-vt8500.c
@@ -183,7 +183,7 @@ static void __exception_irq_entry vt8500_handle_irq(struct pt_regs *regs)
 				continue;
 		}
 
-		handle_domain_irq(intc[i].domain, irqnr, regs);
+		generic_handle_domain_irq(intc[i].domain, irqnr);
 	}
 }
 
diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c
index f3ac392d5bc8..0dcbeb1a05a1 100644
--- a/drivers/irqchip/irq-wpcm450-aic.c
+++ b/drivers/irqchip/irq-wpcm450-aic.c
@@ -69,7 +69,7 @@ static void __exception_irq_entry wpcm450_aic_handle_irq(struct pt_regs *regs)
 	/* Read IPER to signal that nIRQ can be de-asserted */
 	hwirq = readl(aic->regs + AIC_IPER) / 4;
 
-	handle_domain_irq(aic->domain, hwirq, regs);
+	generic_handle_domain_irq(aic->domain, hwirq);
 }
 
 static void wpcm450_aic_eoi(struct irq_data *d)
diff --git a/drivers/irqchip/irq-zevio.c b/drivers/irqchip/irq-zevio.c
index 84163f1ebfcf..7a72620fc478 100644
--- a/drivers/irqchip/irq-zevio.c
+++ b/drivers/irqchip/irq-zevio.c
@@ -50,7 +50,7 @@ static void __exception_irq_entry zevio_handle_irq(struct pt_regs *regs)
 
 	while (readl(zevio_irq_io + IO_STATUS)) {
 		irqnr = readl(zevio_irq_io + IO_CURRENT);
-		handle_domain_irq(zevio_irq_domain, irqnr, regs);
+		generic_handle_domain_irq(zevio_irq_domain, irqnr);
 	}
 }
 
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 59aea39785bf..93d270ca0c56 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -168,14 +168,7 @@ int generic_handle_irq(unsigned int irq);
  * conversion failed.
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq);
-
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
-int handle_domain_irq(struct irq_domain *domain,
-		      unsigned int hwirq, struct pt_regs *regs);
-
-int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
-		      struct pt_regs *regs);
-#endif
+int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq);
 #endif
 
 /* Test to see if a driver has successfully requested an irq */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 897dfc552bb0..1b41078222f3 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -97,13 +97,6 @@ config GENERIC_MSI_IRQ_DOMAIN
 config IRQ_MSI_IOMMU
 	bool
 
-config HANDLE_DOMAIN_IRQ
-	bool
-
-# Legacy behaviour; architectures should call irq_{enter,exit}() themselves
-config HANDLE_DOMAIN_IRQ_IRQENTRY
-	bool
-
 config IRQ_TIMINGS
 	bool
 
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 7041698a7bff..2267e6527db3 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -651,7 +651,11 @@ int handle_irq_desc(struct irq_desc *desc)
  * generic_handle_irq - Invoke the handler for a particular irq
  * @irq:	The irq number to handle
  *
- */
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ *
+ * 		This function must be called from an IRQ context with irq regs
+ * 		initialized.
+  */
 int generic_handle_irq(unsigned int irq)
 {
 	return handle_irq_desc(irq_to_desc(irq));
@@ -661,77 +665,39 @@ EXPORT_SYMBOL_GPL(generic_handle_irq);
 #ifdef CONFIG_IRQ_DOMAIN
 /**
  * generic_handle_domain_irq - Invoke the handler for a HW irq belonging
- *                             to a domain, usually for a non-root interrupt
- *                             controller
+ *                             to a domain.
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
  *
  * Returns:	0 on success, or -EINVAL if conversion has failed
  *
+ * 		This function must be called from an IRQ context with irq regs
+ * 		initialized.
  */
 int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
 {
+	WARN_ON_ONCE(!in_irq());
 	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 EXPORT_SYMBOL_GPL(generic_handle_domain_irq);
 
-#ifdef CONFIG_HANDLE_DOMAIN_IRQ
 /**
- * handle_domain_irq - Invoke the handler for a HW irq belonging to a domain,
- *                     usually for a root interrupt controller
+ * generic_handle_domain_nmi - Invoke the handler for a HW nmi belonging
+ *                             to a domain.
  * @domain:	The domain where to perform the lookup
  * @hwirq:	The HW irq number to convert to a logical one
- * @regs:	Register file coming from the low-level handling code
- *
- * 		This function must be called from an IRQ context.
  *
  * Returns:	0 on success, or -EINVAL if conversion has failed
- */
-int handle_domain_irq(struct irq_domain *domain,
-		      unsigned int hwirq, struct pt_regs *regs)
-{
-	struct pt_regs *old_regs = set_irq_regs(regs);
-	int ret;
-
-	/*
-	 * IRQ context needs to be setup earlier.
-	 */
-	WARN_ON(!in_irq());
-
-	ret = generic_handle_domain_irq(domain, hwirq);
-
-	set_irq_regs(old_regs);
-	return ret;
-}
-
-/**
- * handle_domain_nmi - Invoke the handler for a HW irq belonging to a domain
- * @domain:	The domain where to perform the lookup
- * @hwirq:	The HW irq number to convert to a logical one
- * @regs:	Register file coming from the low-level handling code
- *
- *		This function must be called from an NMI context.
  *
- * Returns:	0 on success, or -EINVAL if conversion has failed
- */
-int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
-		      struct pt_regs *regs)
+ * 		This function must be called from an NMI context with irq regs
+ * 		initialized.
+ **/
+int generic_handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-	int ret;
-
-	/*
-	 * NMI context needs to be setup earlier in order to deal with tracing.
-	 */
-	WARN_ON(!in_nmi());
-
-	ret = generic_handle_domain_irq(domain, hwirq);
-
-	set_irq_regs(old_regs);
-	return ret;
+	WARN_ON_ONCE(!in_nmi());
+	return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
 }
 #endif
-#endif
 
 /* Dynamic interrupt handling */
 
-- 
cgit v1.2.3


From 8d15a7295d33538954df2bca6a4011e4311a9cc2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 21 Oct 2021 18:04:14 +0100
Subject: genirq: Hide irq_cpu_{on,off}line() behind a deprecated option

irq_cpu_{on,off}line() are now only used by the Octeon platform.
Make their use conditional on this plaform being enabled, and
otherwise hidden away.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Serge Semin <fancer.lancer@gmail.com>
Link: https://lore.kernel.org/r/20211021170414.3341522-4-maz@kernel.org
---
 include/linux/irq.h | 5 ++++-
 kernel/irq/Kconfig  | 7 +++++++
 kernel/irq/chip.c   | 2 ++
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c8293c817646..0c746d325206 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -524,9 +524,10 @@ struct irq_chip {
 	void		(*irq_bus_lock)(struct irq_data *data);
 	void		(*irq_bus_sync_unlock)(struct irq_data *data);
 
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
 	void		(*irq_cpu_online)(struct irq_data *data);
 	void		(*irq_cpu_offline)(struct irq_data *data);
-
+#endif
 	void		(*irq_suspend)(struct irq_data *data);
 	void		(*irq_resume)(struct irq_data *data);
 	void		(*irq_pm_shutdown)(struct irq_data *data);
@@ -606,8 +607,10 @@ struct irqaction;
 extern int setup_percpu_irq(unsigned int irq, struct irqaction *new);
 extern void remove_percpu_irq(unsigned int irq, struct irqaction *act);
 
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
 extern void irq_cpu_online(void);
 extern void irq_cpu_offline(void);
+#endif
 extern int irq_set_affinity_locked(struct irq_data *data,
 				   const struct cpumask *cpumask, bool force);
 extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index fbc54c2a7f23..c0b9d4607147 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -144,3 +144,10 @@ config GENERIC_IRQ_MULTI_HANDLER
 	bool
 	help
 	  Allow to specify the low level IRQ handler at run time.
+
+# Cavium Octeon is the last system to use this deprecated option
+# Do not even think of enabling this on any new platform
+config DEPRECATED_IRQ_CPU_ONOFFLINE
+	bool
+	depends on CAVIUM_OCTEON_SOC
+	default CAVIUM_OCTEON_SOC
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index a98bcfc4be7b..f895265d7548 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1122,6 +1122,7 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
 }
 EXPORT_SYMBOL_GPL(irq_modify_status);
 
+#ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE
 /**
  *	irq_cpu_online - Invoke all irq_cpu_online functions.
  *
@@ -1181,6 +1182,7 @@ void irq_cpu_offline(void)
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
+#endif
 
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 
-- 
cgit v1.2.3


From 99ce45d5e7dbde399997a630f45ac9f654fa4bcc Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Tue, 26 Oct 2021 09:57:28 +0800
Subject: mctp: Implement extended addressing

This change allows an extended address struct - struct sockaddr_mctp_ext
- to be passed to sendmsg/recvmsg. This allows userspace to specify
output ifindex and physical address information (for sendmsg) or receive
the input ifindex/physaddr for incoming messages (for recvmsg). This is
typically used by userspace for MCTP address discovery and assignment
operations.

The extended addressing facility is conditional on a new sockopt:
MCTP_OPT_ADDR_EXT; userspace must explicitly enable addressing before
the kernel will consume/populate the extended address data.

Includes a fix for an uninitialised var:
Reported-by: kernel test robot <lkp@intel.com>

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h    |  1 +
 include/net/mctp.h        | 13 +++++--
 include/uapi/linux/mctp.h | 11 ++++++
 net/mctp/af_mctp.c        | 86 ++++++++++++++++++++++++++++++++++++-----
 net/mctp/route.c          | 98 +++++++++++++++++++++++++++++++++++------------
 5 files changed, 170 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7612d760b6a9..8ef26d89ef49 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -365,6 +365,7 @@ struct ucred {
 #define SOL_TLS		282
 #define SOL_XDP		283
 #define SOL_MPTCP	284
+#define SOL_MCTP	285
 
 /* IPX options */
 #define IPX_TYPE	1
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 2a83443bdfac..23bec708f4c7 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -11,6 +11,7 @@
 
 #include <linux/bits.h>
 #include <linux/mctp.h>
+#include <linux/netdevice.h>
 #include <net/net_namespace.h>
 #include <net/sock.h>
 
@@ -58,6 +59,9 @@ struct mctp_sock {
 	mctp_eid_t	bind_addr;
 	__u8		bind_type;
 
+	/* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */
+	bool		addr_ext;
+
 	/* list of mctp_sk_key, for incoming tag lookup. updates protected
 	 * by sk->net->keys_lock
 	 */
@@ -153,7 +157,10 @@ struct mctp_sk_key {
 struct mctp_skb_cb {
 	unsigned int	magic;
 	unsigned int	net;
+	int		ifindex; /* extended/direct addressing if set */
 	mctp_eid_t	src;
+	unsigned char	halen;
+	unsigned char	haddr[MAX_ADDR_LEN];
 };
 
 /* skb control-block accessors with a little extra debugging for initial
@@ -177,6 +184,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
 {
 	struct mctp_skb_cb *cb = (void *)skb->cb;
 
+	BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb));
 	WARN_ON(cb->magic != 0x4d435450);
 	return (void *)(skb->cb);
 }
@@ -189,8 +197,7 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
  *
  * Updates to the route table are performed under rtnl; all reads under RCU,
  * so routes cannot be referenced over a RCU grace period. Specifically: A
- * caller cannot block between mctp_route_lookup and passing the route to
- * mctp_do_route.
+ * caller cannot block between mctp_route_lookup and mctp_route_release()
  */
 struct mctp_route {
 	mctp_eid_t		min, max;
@@ -210,8 +217,6 @@ struct mctp_route {
 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet,
 				     mctp_eid_t daddr);
 
-int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb);
-
 int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 		      struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
 
diff --git a/include/uapi/linux/mctp.h b/include/uapi/linux/mctp.h
index 6acd4ccafbf7..07b0318716fc 100644
--- a/include/uapi/linux/mctp.h
+++ b/include/uapi/linux/mctp.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>
+#include <linux/netdevice.h>
 
 typedef __u8			mctp_eid_t;
 
@@ -28,6 +29,14 @@ struct sockaddr_mctp {
 	__u8			__smctp_pad1;
 };
 
+struct sockaddr_mctp_ext {
+	struct sockaddr_mctp	smctp_base;
+	int			smctp_ifindex;
+	__u8			smctp_halen;
+	__u8			__smctp_pad0[3];
+	__u8			smctp_haddr[MAX_ADDR_LEN];
+};
+
 #define MCTP_NET_ANY		0x0
 
 #define MCTP_ADDR_NULL		0x00
@@ -36,4 +45,6 @@ struct sockaddr_mctp {
 #define MCTP_TAG_MASK		0x07
 #define MCTP_TAG_OWNER		0x08
 
+#define MCTP_OPT_ADDR_EXT	1
+
 #endif /* __UAPI_MCTP_H */
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index 66a411d60b6c..d344b02a1cde 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -77,6 +77,7 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr);
 	int rc, addrlen = msg->msg_namelen;
 	struct sock *sk = sock->sk;
+	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
 	struct mctp_skb_cb *cb;
 	struct mctp_route *rt;
 	struct sk_buff *skb;
@@ -100,11 +101,6 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (addr->smctp_network == MCTP_NET_ANY)
 		addr->smctp_network = mctp_default_net(sock_net(sk));
 
-	rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
-			       addr->smctp_addr.s_addr);
-	if (!rt)
-		return -EHOSTUNREACH;
-
 	skb = sock_alloc_send_skb(sk, hlen + 1 + len,
 				  msg->msg_flags & MSG_DONTWAIT, &rc);
 	if (!skb)
@@ -116,19 +112,45 @@ static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	*(u8 *)skb_put(skb, 1) = addr->smctp_type;
 
 	rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
-	if (rc < 0) {
-		kfree_skb(skb);
-		return rc;
-	}
+	if (rc < 0)
+		goto err_free;
 
 	/* set up cb */
 	cb = __mctp_cb(skb);
 	cb->net = addr->smctp_network;
 
+	/* direct addressing */
+	if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) {
+		DECLARE_SOCKADDR(struct sockaddr_mctp_ext *,
+				 extaddr, msg->msg_name);
+
+		if (extaddr->smctp_halen > sizeof(cb->haddr)) {
+			rc = -EINVAL;
+			goto err_free;
+		}
+
+		cb->ifindex = extaddr->smctp_ifindex;
+		cb->halen = extaddr->smctp_halen;
+		memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen);
+
+		rt = NULL;
+	} else {
+		rt = mctp_route_lookup(sock_net(sk), addr->smctp_network,
+				       addr->smctp_addr.s_addr);
+		if (!rt) {
+			rc = -EHOSTUNREACH;
+			goto err_free;
+		}
+	}
+
 	rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr,
 			       addr->smctp_tag);
 
 	return rc ? : len;
+
+err_free:
+	kfree_skb(skb);
+	return rc;
 }
 
 static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
@@ -136,6 +158,7 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 {
 	DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name);
 	struct sock *sk = sock->sk;
+	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
 	struct sk_buff *skb;
 	size_t msglen;
 	u8 type;
@@ -181,6 +204,16 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		addr->smctp_tag = hdr->flags_seq_tag &
 					(MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO);
 		msg->msg_namelen = sizeof(*addr);
+
+		if (msk->addr_ext) {
+			DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, ae,
+					 msg->msg_name);
+			msg->msg_namelen = sizeof(*ae);
+			ae->smctp_ifindex = cb->ifindex;
+			ae->smctp_halen = cb->halen;
+			memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr));
+			memcpy(ae->smctp_haddr, cb->haddr, cb->halen);
+		}
 	}
 
 	rc = len;
@@ -196,12 +229,45 @@ out_free:
 static int mctp_setsockopt(struct socket *sock, int level, int optname,
 			   sockptr_t optval, unsigned int optlen)
 {
-	return -EINVAL;
+	struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+	int val;
+
+	if (level != SOL_MCTP)
+		return -EINVAL;
+
+	if (optname == MCTP_OPT_ADDR_EXT) {
+		if (optlen != sizeof(int))
+			return -EINVAL;
+		if (copy_from_sockptr(&val, optval, sizeof(int)))
+			return -EFAULT;
+		msk->addr_ext = val;
+		return 0;
+	}
+
+	return -ENOPROTOOPT;
 }
 
 static int mctp_getsockopt(struct socket *sock, int level, int optname,
 			   char __user *optval, int __user *optlen)
 {
+	struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk);
+	int len, val;
+
+	if (level != SOL_MCTP)
+		return -EINVAL;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	if (optname == MCTP_OPT_ADDR_EXT) {
+		if (len != sizeof(int))
+			return -EINVAL;
+		val = !!msk->addr_ext;
+		if (copy_to_user(optval, &val, len))
+			return -EFAULT;
+		return 0;
+	}
+
 	return -EINVAL;
 }
 
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 82fb5ae524f6..c23ab3547ee5 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -434,6 +434,7 @@ static unsigned int mctp_route_mtu(struct mctp_route *rt)
 
 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
 {
+	struct mctp_skb_cb *cb = mctp_cb(skb);
 	struct mctp_hdr *hdr = mctp_hdr(skb);
 	char daddr_buf[MAX_ADDR_LEN];
 	char *daddr = NULL;
@@ -448,9 +449,14 @@ static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb)
 		return -EMSGSIZE;
 	}
 
-	/* If lookup fails let the device handle daddr==NULL */
-	if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
-		daddr = daddr_buf;
+	if (cb->ifindex) {
+		/* direct route; use the hwaddr we stashed in sendmsg */
+		daddr = cb->haddr;
+	} else {
+		/* If lookup fails let the device handle daddr==NULL */
+		if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0)
+			daddr = daddr_buf;
+	}
 
 	rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol),
 			     daddr, skb->dev->dev_addr, skb->len);
@@ -649,16 +655,6 @@ static struct mctp_route *mctp_route_lookup_null(struct net *net,
 	return NULL;
 }
 
-/* sends a skb to rt and releases the route. */
-int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb)
-{
-	int rc;
-
-	rc = rt->output(rt, skb);
-	mctp_route_release(rt);
-	return rc;
-}
-
 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
 				  unsigned int mtu, u8 tag)
 {
@@ -725,7 +721,7 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
 		/* copy message payload */
 		skb_copy_bits(skb, pos, skb_transport_header(skb2), size);
 
-		/* do route, but don't drop the rt reference */
+		/* do route */
 		rc = rt->output(rt, skb2);
 		if (rc)
 			break;
@@ -734,7 +730,6 @@ static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb,
 		pos += size;
 	}
 
-	mctp_route_release(rt);
 	consume_skb(skb);
 	return rc;
 }
@@ -744,15 +739,51 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 {
 	struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
 	struct mctp_skb_cb *cb = mctp_cb(skb);
+	struct mctp_route tmp_rt;
+	struct net_device *dev;
 	struct mctp_hdr *hdr;
 	unsigned long flags;
 	unsigned int mtu;
 	mctp_eid_t saddr;
+	bool ext_rt;
 	int rc;
 	u8 tag;
 
-	if (WARN_ON(!rt->dev))
+	rc = -ENODEV;
+
+	if (rt) {
+		ext_rt = false;
+		dev = NULL;
+
+		if (WARN_ON(!rt->dev))
+			goto out_release;
+
+	} else if (cb->ifindex) {
+		ext_rt = true;
+		rt = &tmp_rt;
+
+		rcu_read_lock();
+		dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
+		if (!dev) {
+			rcu_read_unlock();
+			return rc;
+		}
+
+		rt->dev = __mctp_dev_get(dev);
+		rcu_read_unlock();
+
+		if (!rt->dev)
+			goto out_release;
+
+		/* establish temporary route - we set up enough to keep
+		 * mctp_route_output happy
+		 */
+		rt->output = mctp_route_output;
+		rt->mtu = 0;
+
+	} else {
 		return -EINVAL;
+	}
 
 	spin_lock_irqsave(&rt->dev->addrs_lock, flags);
 	if (rt->dev->num_addrs == 0) {
@@ -765,18 +796,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 	spin_unlock_irqrestore(&rt->dev->addrs_lock, flags);
 
 	if (rc)
-		return rc;
+		goto out_release;
 
 	if (req_tag & MCTP_HDR_FLAG_TO) {
 		rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag);
 		if (rc)
-			return rc;
+			goto out_release;
 		tag |= MCTP_HDR_FLAG_TO;
 	} else {
 		tag = req_tag;
 	}
 
-
 	skb->protocol = htons(ETH_P_MCTP);
 	skb->priority = 0;
 	skb_reset_transport_header(skb);
@@ -796,12 +826,22 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
 	mtu = mctp_route_mtu(rt);
 
 	if (skb->len + sizeof(struct mctp_hdr) <= mtu) {
-		hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM |
-			tag;
-		return mctp_do_route(rt, skb);
+		hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM |
+			MCTP_HDR_FLAG_EOM | tag;
+		rc = rt->output(rt, skb);
 	} else {
-		return mctp_do_fragment_route(rt, skb, mtu, tag);
+		rc = mctp_do_fragment_route(rt, skb, mtu, tag);
 	}
+
+out_release:
+	if (!ext_rt)
+		mctp_route_release(rt);
+
+	if (dev)
+		dev_put(dev);
+
+	return rc;
+
 }
 
 /* route management */
@@ -942,8 +982,15 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
 	if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX)
 		goto err_drop;
 
-	cb = __mctp_cb(skb);
+	/* MCTP drivers must populate halen/haddr */
+	if (dev->type == ARPHRD_MCTP) {
+		cb = mctp_cb(skb);
+	} else {
+		cb = __mctp_cb(skb);
+		cb->halen = 0;
+	}
 	cb->net = READ_ONCE(mdev->net);
+	cb->ifindex = dev->ifindex;
 
 	rt = mctp_route_lookup(net, cb->net, mh->dest);
 
@@ -954,7 +1001,8 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev,
 	if (!rt)
 		goto err_drop;
 
-	mctp_do_route(rt, skb);
+	rt->output(rt, skb);
+	mctp_route_release(rt);
 
 	return NET_RX_SUCCESS;
 
-- 
cgit v1.2.3


From 8e20f591f204f8db7f1182918f8e2285d3f589e0 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 26 Oct 2021 11:06:01 +0100
Subject: net: phy: add phy_interface_t bitmap support

Add support for a bitmap for phy interface modes, which includes:
- a macro to declare the interface bitmap
- an inline helper to zero the interface bitmap
- an inline helper to detect an empty interface bitmap
- inline helpers to do a bitwise AND and OR operations on two interface
  bitmaps

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 04e90423fa88..96e43fbb2dd8 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -155,6 +155,40 @@ typedef enum {
 	PHY_INTERFACE_MODE_MAX,
 } phy_interface_t;
 
+/* PHY interface mode bitmap handling */
+#define DECLARE_PHY_INTERFACE_MASK(name) \
+	DECLARE_BITMAP(name, PHY_INTERFACE_MODE_MAX)
+
+static inline void phy_interface_zero(unsigned long *intf)
+{
+	bitmap_zero(intf, PHY_INTERFACE_MODE_MAX);
+}
+
+static inline bool phy_interface_empty(const unsigned long *intf)
+{
+	return bitmap_empty(intf, PHY_INTERFACE_MODE_MAX);
+}
+
+static inline void phy_interface_and(unsigned long *dst, const unsigned long *a,
+				     const unsigned long *b)
+{
+	bitmap_and(dst, a, b, PHY_INTERFACE_MODE_MAX);
+}
+
+static inline void phy_interface_or(unsigned long *dst, const unsigned long *a,
+				    const unsigned long *b)
+{
+	bitmap_or(dst, a, b, PHY_INTERFACE_MODE_MAX);
+}
+
+static inline void phy_interface_set_rgmii(unsigned long *intf)
+{
+	__set_bit(PHY_INTERFACE_MODE_RGMII, intf);
+	__set_bit(PHY_INTERFACE_MODE_RGMII_ID, intf);
+	__set_bit(PHY_INTERFACE_MODE_RGMII_RXID, intf);
+	__set_bit(PHY_INTERFACE_MODE_RGMII_TXID, intf);
+}
+
 /*
  * phy_supported_speeds - return all speeds currently supported by a PHY device
  */
-- 
cgit v1.2.3


From 38c310eb46f5f80213a92093af11af270c209a76 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 26 Oct 2021 11:06:06 +0100
Subject: net: phylink: add MAC phy_interface_t bitmap

Add a phy_interface_t bitmap so the MAC driver can specifiy which PHY
interface modes it supports.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phylink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index f7b5ed06a815..bc4b866cd99b 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -76,6 +76,7 @@ struct phylink_config {
 	bool ovr_an_inband;
 	void (*get_fixed_state)(struct phylink_config *config,
 				struct phylink_link_state *state);
+	DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
 };
 
 /**
-- 
cgit v1.2.3


From d25f3a74f30aace819163dfa54f2a4b8ca1dc932 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Tue, 26 Oct 2021 11:06:11 +0100
Subject: net: phylink: use supported_interfaces for phylink validation

If the network device supplies a supported interface bitmap, we can use
that during phylink's validation to simplify MAC drivers in two ways by
using the supported_interfaces bitmap to:

1. reject unsupported interfaces before calling into the MAC driver.
2. generate the set of all supported link modes across all supported
   interfaces (used mainly for SFP, but also some 10G PHYs.)

Suggested-by: Sean Anderson <sean.anderson@seco.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/phylink.h   | 12 ++++++++++--
 2 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 14c7d73790b4..6da245dacca4 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -166,9 +166,45 @@ static const char *phylink_an_mode_str(unsigned int mode)
 	return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown";
 }
 
+static int phylink_validate_any(struct phylink *pl, unsigned long *supported,
+				struct phylink_link_state *state)
+{
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(all_adv) = { 0, };
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(all_s) = { 0, };
+	__ETHTOOL_DECLARE_LINK_MODE_MASK(s);
+	struct phylink_link_state t;
+	int intf;
+
+	for (intf = 0; intf < PHY_INTERFACE_MODE_MAX; intf++) {
+		if (test_bit(intf, pl->config->supported_interfaces)) {
+			linkmode_copy(s, supported);
+
+			t = *state;
+			t.interface = intf;
+			pl->mac_ops->validate(pl->config, s, &t);
+			linkmode_or(all_s, all_s, s);
+			linkmode_or(all_adv, all_adv, t.advertising);
+		}
+	}
+
+	linkmode_copy(supported, all_s);
+	linkmode_copy(state->advertising, all_adv);
+
+	return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
+}
+
 static int phylink_validate(struct phylink *pl, unsigned long *supported,
 			    struct phylink_link_state *state)
 {
+	if (!phy_interface_empty(pl->config->supported_interfaces)) {
+		if (state->interface == PHY_INTERFACE_MODE_NA)
+			return phylink_validate_any(pl, supported, state);
+
+		if (!test_bit(state->interface,
+			      pl->config->supported_interfaces))
+			return -EINVAL;
+	}
+
 	pl->mac_ops->validate(pl->config, supported, state);
 
 	return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index bc4b866cd99b..f037470b6fb3 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -67,6 +67,8 @@ enum phylink_op_type {
  * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND
  * @get_fixed_state: callback to execute to determine the fixed link state,
  *		     if MAC link is at %MLO_AN_FIXED mode.
+ * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx
+ *                        are supported by the MAC/PCS.
  */
 struct phylink_config {
 	struct device *dev;
@@ -134,8 +136,14 @@ struct phylink_mac_ops {
  * based on @state->advertising and/or @state->speed and update
  * @state->interface accordingly. See phylink_helper_basex_speed().
  *
- * When @state->interface is %PHY_INTERFACE_MODE_NA, phylink expects the
- * MAC driver to return all supported link modes.
+ * When @config->supported_interfaces has been set, phylink will iterate
+ * over the supported interfaces to determine the full capability of the
+ * MAC. The validation function must not print errors if @state->interface
+ * is set to an unexpected value.
+ *
+ * When @config->supported_interfaces is empty, phylink will call this
+ * function with @state->interface set to %PHY_INTERFACE_MODE_NA, and
+ * expects the MAC driver to return all supported link modes.
  *
  * If the @state->interface mode is not supported, then the @supported
  * mask must be cleared.
-- 
cgit v1.2.3


From e60feb445fce9e51c1558a6aa7faf9dd5ded533b Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 14 Oct 2021 13:11:00 -0400
Subject: fs: export an inode_update_time helper

If you already have an inode and need to update the time on the inode
there is no way to do this properly.  Export this helper to allow file
systems to update time on the inode so the appropriate handler is
called, either ->update_time or generic_update_time.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/inode.c         | 7 ++++---
 include/linux/fs.h | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index ed0cab8a32db..9abc88d7959c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1782,12 +1782,13 @@ EXPORT_SYMBOL(generic_update_time);
  * This does the actual work of updating an inodes time or version.  Must have
  * had called mnt_want_write() before calling this.
  */
-static int update_time(struct inode *inode, struct timespec64 *time, int flags)
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
 {
 	if (inode->i_op->update_time)
 		return inode->i_op->update_time(inode, time, flags);
 	return generic_update_time(inode, time, flags);
 }
+EXPORT_SYMBOL(inode_update_time);
 
 /**
  *	atime_needs_update	-	update the access time
@@ -1857,7 +1858,7 @@ void touch_atime(const struct path *path)
 	 * of the fs read only, e.g. subvolumes in Btrfs.
 	 */
 	now = current_time(inode);
-	update_time(inode, &now, S_ATIME);
+	inode_update_time(inode, &now, S_ATIME);
 	__mnt_drop_write(mnt);
 skip_update:
 	sb_end_write(inode->i_sb);
@@ -2002,7 +2003,7 @@ int file_update_time(struct file *file)
 	if (__mnt_want_write_file(file))
 		return 0;
 
-	ret = update_time(inode, &now, sync_it);
+	ret = inode_update_time(inode, &now, sync_it);
 	__mnt_drop_write_file(file);
 
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7a633353fd2..56eba723477e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2498,6 +2498,8 @@ enum file_time_flags {
 
 extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
+int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+
 static inline void file_accessed(struct file *file)
 {
 	if (!(file->f_flags & O_NOATIME))
-- 
cgit v1.2.3


From 6b3671746a8a3aa05316b829e1357060f35009c1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 26 Oct 2021 08:29:39 -0700
Subject: net/mlx5: remove the recent devlink params

revert commit 46ae40b94d88 ("net/mlx5: Let user configure io_eq_size param")
revert commit a6cb08daa3b4 ("net/mlx5: Let user configure event_eq_size param")
revert commit 554604061979 ("net/mlx5: Let user configure max_macs param")

The EQE parameters are applicable to more drivers, they should
be configured via standard API, probably ethtool. Example of
another driver needing something similar:

https://lore.kernel.org/all/1633454136-14679-3-git-send-email-sbhatta@marvell.com/

The last param for "max_macs" is probably fine but the documentation
is severely lacking. The meaning and implications for changing the
param need to be stated.

Link: https://lore.kernel.org/r/20211026152939.3125950-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/devlink/mlx5.rst          | 20 ------
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c  | 69 -------------------
 drivers/net/ethernet/mellanox/mlx5/core/devlink.h  | 12 ----
 .../net/ethernet/mellanox/mlx5/core/devlink_res.c  | 80 ----------------------
 drivers/net/ethernet/mellanox/mlx5/core/eq.c       |  5 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 21 ------
 include/linux/mlx5/driver.h                        |  4 ++
 include/linux/mlx5/eq.h                            |  1 +
 include/linux/mlx5/mlx5_ifc.h                      |  2 +-
 10 files changed, 9 insertions(+), 207 deletions(-)
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c

(limited to 'include/linux')

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index d467e770906e..4e4b97f7971a 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -14,12 +14,8 @@ Parameters
 
    * - Name
      - Mode
-     - Validation
    * - ``enable_roce``
      - driverinit
-   * - ``max_macs``
-     - driverinit
-     - The range is between 1 and 2^31. Only power of 2 values are supported.
 
 The ``mlx5`` driver also implements the following driver-specific
 parameters.
@@ -50,22 +46,6 @@ parameters.
 
 The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
 
-Resources
-=========
-
-.. list-table:: Driver-specific resources implemented
-   :widths: 5 5 5 85
-
-   * - Name
-     - Description
-   * - ``comp_eq_size``
-     - Control the size of I/O completion EQs.
-       * The default value is 1024, and the range is between 64 and 4096.
-   * - ``event_eq_size``
-     - Control the size of the asynchronous control events EQ.
-       * The default value is 4096, and the range is between 64 and 4096.
-
-
 Info versions
 =============
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 79c15ee62cde..bdb271b604d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o devlink_res.o diag/rsc_dump.o \
+		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
 		fw_reset.o qos.o lib/tout.o
 
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index fc78c745ead1..1c98652b244a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -752,68 +752,6 @@ static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink)
 	mlx5_devlink_eth_param_unregister(devlink);
 }
 
-static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id,
-					     union devlink_param_value val,
-					     struct netlink_ext_ack *extack)
-{
-	struct mlx5_core_dev *dev = devlink_priv(devlink);
-
-	/* At least one unicast mac is needed */
-	if (val.vu32 == 0) {
-		NL_SET_ERR_MSG_MOD(extack, "max_macs value must be greater than 0");
-		return -EINVAL;
-	}
-	/* Check if its power of 2 or not */
-	if (!is_power_of_2(val.vu32)) {
-		NL_SET_ERR_MSG_MOD(extack,
-				   "Only power of 2 values are supported for max_macs");
-		return -EOPNOTSUPP;
-	}
-
-	if (ilog2(val.vu32) >
-	    MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) {
-		NL_SET_ERR_MSG_MOD(extack, "max_macs value is out of the supported range");
-		return -EOPNOTSUPP;
-	}
-
-	return 0;
-}
-
-static const struct devlink_param max_uc_list_param =
-	DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
-			      NULL, NULL, mlx5_devlink_max_uc_list_validate);
-
-static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink)
-{
-	struct mlx5_core_dev *dev = devlink_priv(devlink);
-	union devlink_param_value value;
-	int err;
-
-	if (!MLX5_CAP_GEN(dev, log_max_current_uc_list_wr_supported))
-		return 0;
-
-	err = devlink_param_register(devlink, &max_uc_list_param);
-	if (err)
-		return err;
-
-	value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list);
-	devlink_param_driverinit_value_set(devlink,
-					   DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
-					   value);
-	return 0;
-}
-
-static void
-mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink)
-{
-	struct mlx5_core_dev *dev = devlink_priv(devlink);
-
-	if (!MLX5_CAP_GEN(dev, log_max_current_uc_list_wr_supported))
-		return;
-
-	devlink_param_unregister(devlink, &max_uc_list_param);
-}
-
 #define MLX5_TRAP_DROP(_id, _group_id)					\
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				\
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id, \
@@ -877,17 +815,11 @@ int mlx5_devlink_register(struct devlink *devlink)
 	if (err)
 		goto traps_reg_err;
 
-	err = mlx5_devlink_max_uc_list_param_register(devlink);
-	if (err)
-		goto uc_list_reg_err;
-
 	if (!mlx5_core_is_mp_slave(dev))
 		devlink_set_features(devlink, DEVLINK_F_RELOAD);
 
 	return 0;
 
-uc_list_reg_err:
-	mlx5_devlink_traps_unregister(devlink);
 traps_reg_err:
 	mlx5_devlink_auxdev_params_unregister(devlink);
 auxdev_reg_err:
@@ -898,7 +830,6 @@ auxdev_reg_err:
 
 void mlx5_devlink_unregister(struct devlink *devlink)
 {
-	mlx5_devlink_max_uc_list_param_unregister(devlink);
 	mlx5_devlink_traps_unregister(devlink);
 	mlx5_devlink_auxdev_params_unregister(devlink);
 	devlink_params_unregister(devlink, mlx5_devlink_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 674415fd0b3a..30bf4882779b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -6,14 +6,6 @@
 
 #include <net/devlink.h>
 
-enum mlx5_devlink_resource_id {
-	MLX5_DL_RES_COMP_EQ = 1,
-	MLX5_DL_RES_ASYNC_EQ,
-
-	__MLX5_ID_RES_MAX,
-	MLX5_ID_RES_MAX = __MLX5_ID_RES_MAX - 1,
-};
-
 enum mlx5_devlink_param_id {
 	MLX5_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
 	MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
@@ -39,10 +31,6 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev);
 int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id,
 				  enum devlink_trap_action *action);
 
-void mlx5_devlink_res_register(struct mlx5_core_dev *dev);
-void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev);
-size_t mlx5_devlink_res_size(struct mlx5_core_dev *dev, enum mlx5_devlink_resource_id id);
-
 struct devlink *mlx5_devlink_alloc(struct device *dev);
 void mlx5_devlink_free(struct devlink *devlink);
 int mlx5_devlink_register(struct devlink *devlink);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
deleted file mode 100644
index 549d23745942..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink_res.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
-/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. */
-
-#include "devlink.h"
-#include "mlx5_core.h"
-
-enum {
-	MLX5_EQ_MIN_SIZE = 64,
-	MLX5_EQ_MAX_SIZE = 4096,
-	MLX5_NUM_ASYNC_EQE = 4096,
-	MLX5_COMP_EQ_SIZE = 1024,
-};
-
-static int comp_eq_res_register(struct mlx5_core_dev *dev)
-{
-	struct devlink_resource_size_params comp_eq_size;
-	struct devlink *devlink = priv_to_devlink(dev);
-
-	devlink_resource_size_params_init(&comp_eq_size, MLX5_EQ_MIN_SIZE,
-					  MLX5_EQ_MAX_SIZE, 1, DEVLINK_RESOURCE_UNIT_ENTRY);
-	return devlink_resource_register(devlink, "io_eq_size", MLX5_COMP_EQ_SIZE,
-					 MLX5_DL_RES_COMP_EQ,
-					 DEVLINK_RESOURCE_ID_PARENT_TOP,
-					 &comp_eq_size);
-}
-
-static int async_eq_resource_register(struct mlx5_core_dev *dev)
-{
-	struct devlink_resource_size_params async_eq_size;
-	struct devlink *devlink = priv_to_devlink(dev);
-
-	devlink_resource_size_params_init(&async_eq_size, MLX5_EQ_MIN_SIZE,
-					  MLX5_EQ_MAX_SIZE, 1, DEVLINK_RESOURCE_UNIT_ENTRY);
-	return devlink_resource_register(devlink, "event_eq_size",
-					 MLX5_NUM_ASYNC_EQE, MLX5_DL_RES_ASYNC_EQ,
-					 DEVLINK_RESOURCE_ID_PARENT_TOP,
-					 &async_eq_size);
-}
-
-void mlx5_devlink_res_register(struct mlx5_core_dev *dev)
-{
-	int err;
-
-	err = comp_eq_res_register(dev);
-	if (err)
-		goto err_msg;
-
-	err = async_eq_resource_register(dev);
-	if (err)
-		goto err;
-	return;
-err:
-	devlink_resources_unregister(priv_to_devlink(dev), NULL);
-err_msg:
-	mlx5_core_err(dev, "Failed to register resources, err = %d\n", err);
-}
-
-void mlx5_devlink_res_unregister(struct mlx5_core_dev *dev)
-{
-	devlink_resources_unregister(priv_to_devlink(dev), NULL);
-}
-
-static const size_t default_vals[MLX5_ID_RES_MAX + 1] = {
-	[MLX5_DL_RES_COMP_EQ] = MLX5_COMP_EQ_SIZE,
-	[MLX5_DL_RES_ASYNC_EQ] = MLX5_NUM_ASYNC_EQE,
-};
-
-size_t mlx5_devlink_res_size(struct mlx5_core_dev *dev, enum mlx5_devlink_resource_id id)
-{
-	struct devlink *devlink = priv_to_devlink(dev);
-	u64 size;
-	int err;
-
-	err = devlink_resource_size_get(devlink, id, &size);
-	if (!err)
-		return size;
-	mlx5_core_err(dev, "Failed to get param. using default. err = %d, id = %u\n",
-		      err, id);
-	return default_vals[id];
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 31e69067284b..792e0d6aa861 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -19,7 +19,6 @@
 #include "lib/clock.h"
 #include "diag/fw_tracer.h"
 #include "mlx5_irq.h"
-#include "devlink.h"
 
 enum {
 	MLX5_EQE_OWNER_INIT_VAL	= 0x1,
@@ -647,7 +646,7 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
 
 	param = (struct mlx5_eq_param) {
 		.irq_index = MLX5_IRQ_EQ_CTRL,
-		.nent = mlx5_devlink_res_size(dev, MLX5_DL_RES_ASYNC_EQ),
+		.nent = MLX5_NUM_ASYNC_EQE,
 	};
 
 	gather_async_events_mask(dev, param.mask);
@@ -808,7 +807,7 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
 
 	INIT_LIST_HEAD(&table->comp_eqs_list);
 	ncomp_eqs = table->num_comp_eqs;
-	nent = mlx5_devlink_res_size(dev, MLX5_DL_RES_COMP_EQ);
+	nent = MLX5_COMP_EQ_SIZE;
 	for (i = 0; i < ncomp_eqs; i++) {
 		struct mlx5_eq_param param = {};
 		int vecidx = i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 079ee9e8da10..f8446395163a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -484,23 +484,10 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
 }
 
-static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
-{
-	struct devlink *devlink = priv_to_devlink(dev);
-	union devlink_param_value val;
-	int err;
-
-	err = devlink_param_driverinit_value_get(devlink,
-						 DEVLINK_PARAM_GENERIC_ID_MAX_MACS,
-						 &val);
-	return err ? 0 : val.vu32;
-}
-
 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 {
 	struct mlx5_profile *prof = &dev->profile;
 	void *set_hca_cap;
-	u32 max_uc_list;
 	int err;
 
 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
@@ -574,11 +561,6 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	if (MLX5_CAP_GEN(dev, roce_rw_supported))
 		MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
 
-	max_uc_list = max_uc_list_get_devlink_param(dev);
-	if (max_uc_list)
-		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list,
-			 ilog2(max_uc_list));
-
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
@@ -940,8 +922,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 	dev->hv_vhca = mlx5_hv_vhca_create(dev);
 	dev->rsc_dump = mlx5_rsc_dump_create(dev);
 
-	mlx5_devlink_res_register(dev);
-
 	return 0;
 
 err_sf_table_cleanup:
@@ -977,7 +957,6 @@ err_devcom:
 
 static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 {
-	mlx5_devlink_res_unregister(dev);
 	mlx5_rsc_dump_destroy(dev);
 	mlx5_hv_vhca_destroy(dev->hv_vhca);
 	mlx5_fw_tracer_destroy(dev->tracer);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 47c07f95bbe1..f617dfbcd9fd 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -797,6 +797,10 @@ struct mlx5_db {
 	int			index;
 };
 
+enum {
+	MLX5_COMP_EQ_SIZE = 1024,
+};
+
 enum {
 	MLX5_PTYS_IB = 1 << 0,
 	MLX5_PTYS_EN = 1 << 2,
diff --git a/include/linux/mlx5/eq.h b/include/linux/mlx5/eq.h
index 11161e427608..ea3ff5a8ced3 100644
--- a/include/linux/mlx5/eq.h
+++ b/include/linux/mlx5/eq.h
@@ -5,6 +5,7 @@
 #define MLX5_CORE_EQ_H
 
 #define MLX5_NUM_CMD_EQE   (32)
+#define MLX5_NUM_ASYNC_EQE (0x1000)
 #define MLX5_NUM_SPARE_EQE (0x80)
 
 struct mlx5_eq;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 97465d00de9d..746381eccccf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1603,7 +1603,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         ext_stride_num_range[0x1];
 	u8         roce_rw_supported[0x1];
-	u8         log_max_current_uc_list_wr_supported[0x1];
+	u8         reserved_at_3a2[0x1];
 	u8         log_max_stride_sz_rq[0x5];
 	u8         reserved_at_3a8[0x3];
 	u8         log_min_stride_sz_rq[0x5];
-- 
cgit v1.2.3


From fb4e0a5e73d4bb5ab69b7905abd2ec3b580e9b59 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Fri, 8 Oct 2021 13:33:04 -0700
Subject: skmsg: Extract and reuse sk_msg_is_readable()

tcp_bpf_sock_is_readable() is pretty much generic,
we can extract it and reuse it for non-TCP sockets.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211008203306.37525-3-xiyou.wangcong@gmail.com
---
 include/linux/skmsg.h |  1 +
 net/core/skmsg.c      | 14 ++++++++++++++
 net/ipv4/tcp_bpf.c    | 15 +--------------
 3 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..1ce9a9eb223b 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -128,6 +128,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
 			     struct sk_msg *msg, u32 bytes);
 int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 		   int len, int flags);
+bool sk_msg_is_readable(struct sock *sk);
 
 static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
 {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..a86ef7e844f8 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -474,6 +474,20 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 }
 EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
 
+bool sk_msg_is_readable(struct sock *sk)
+{
+	struct sk_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock))
+		empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
+	return !empty;
+}
+EXPORT_SYMBOL_GPL(sk_msg_is_readable);
+
 static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 						  struct sk_buff *skb)
 {
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 7e71e9e278cb..5f4d6f45d87f 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -150,19 +150,6 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
 EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
 
 #ifdef CONFIG_BPF_SYSCALL
-static bool tcp_bpf_sock_is_readable(struct sock *sk)
-{
-	struct sk_psock *psock;
-	bool empty = true;
-
-	rcu_read_lock();
-	psock = sk_psock(sk);
-	if (likely(psock))
-		empty = list_empty(&psock->ingress_msg);
-	rcu_read_unlock();
-	return !empty;
-}
-
 static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
 			     long timeo)
 {
@@ -491,7 +478,7 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
 	prot[TCP_BPF_BASE].unhash		= sock_map_unhash;
 	prot[TCP_BPF_BASE].close		= sock_map_close;
 	prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
-	prot[TCP_BPF_BASE].sock_is_readable	= tcp_bpf_sock_is_readable;
+	prot[TCP_BPF_BASE].sock_is_readable	= sk_msg_is_readable;
 
 	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
 	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
-- 
cgit v1.2.3


From 99d0a3831e3500d945162cdb2310e3a5fce90b60 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 21 Oct 2021 08:46:10 -1000
Subject: bpf: Move BPF_MAP_TYPE for INODE_STORAGE and TASK_STORAGE outside of
 CONFIG_NET

bpf_types.h has BPF_MAP_TYPE_INODE_STORAGE and BPF_MAP_TYPE_TASK_STORAGE
declared inside #ifdef CONFIG_NET although they are built regardless of
CONFIG_NET. So, when CONFIG_BPF_SYSCALL && !CONFIG_NET, they are built
without the declarations leading to spurious build failures and not
registered to bpf_map_types making them unavailable.

Fix it by moving the BPF_MAP_TYPE for the two map types outside of
CONFIG_NET.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: a10787e6d58c ("bpf: Enable task local storage for tracing programs")
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/YXG1cuuSJDqHQfRY@slm.duckdns.org
---
 include/linux/bpf_types.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..bbe1eefa4c8a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -101,14 +101,14 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
-#ifdef CONFIG_NET
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
-BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 #ifdef CONFIG_BPF_LSM
 BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops)
+#ifdef CONFIG_NET
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
 #if defined(CONFIG_XDP_SOCKETS)
 BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
-- 
cgit v1.2.3


From 54713c85f536048e685258f880bf298a74c3620d Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Tue, 26 Oct 2021 13:00:19 +0200
Subject: bpf: Fix potential race in tail call compatibility check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lorenzo noticed that the code testing for program type compatibility of
tail call maps is potentially racy in that two threads could encounter a
map with an unset type simultaneously and both return true even though they
are inserting incompatible programs.

The race window is quite small, but artificially enlarging it by adding a
usleep_range() inside the check in bpf_prog_array_compatible() makes it
trivial to trigger from userspace with a program that does, essentially:

        map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, 4, 4, 2, 0);
        pid = fork();
        if (pid) {
                key = 0;
                value = xdp_fd;
        } else {
                key = 1;
                value = tc_fd;
        }
        err = bpf_map_update_elem(map_fd, &key, &value, 0);

While the race window is small, it has potentially serious ramifications in
that triggering it would allow a BPF program to tail call to a program of a
different type. So let's get rid of it by protecting the update with a
spinlock. The commit in the Fixes tag is the last commit that touches the
code in question.

v2:
- Use a spinlock instead of an atomic variable and cmpxchg() (Alexei)
v3:
- Put lock and the members it protects into an embedded 'owner' struct (Daniel)

Fixes: 3324b584b6f6 ("ebpf: misc core cleanup")
Reported-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211026110019.363464-1-toke@redhat.com
---
 include/linux/bpf.h   |  7 +++++--
 kernel/bpf/arraymap.c |  1 +
 kernel/bpf/core.c     | 20 +++++++++++++-------
 kernel/bpf/syscall.c  |  6 ++++--
 4 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 020a7d5bf470..3db6f6c95489 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -929,8 +929,11 @@ struct bpf_array_aux {
 	 * stored in the map to make sure that all callers and callees have
 	 * the same prog type and JITed flag.
 	 */
-	enum bpf_prog_type type;
-	bool jited;
+	struct {
+		spinlock_t lock;
+		enum bpf_prog_type type;
+		bool jited;
+	} owner;
 	/* Programs with direct jumps into programs part of this array. */
 	struct list_head poke_progs;
 	struct bpf_map *map;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index cebd4fb06d19..447def540544 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -1072,6 +1072,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
 	INIT_WORK(&aux->work, prog_array_map_clear_deferred);
 	INIT_LIST_HEAD(&aux->poke_progs);
 	mutex_init(&aux->poke_mutex);
+	spin_lock_init(&aux->owner.lock);
 
 	map = array_map_alloc(attr);
 	if (IS_ERR(map)) {
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1e7eb3f1876..6e3ae90ad107 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1823,20 +1823,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
+	bool ret;
+
 	if (fp->kprobe_override)
 		return false;
 
-	if (!array->aux->type) {
+	spin_lock(&array->aux->owner.lock);
+
+	if (!array->aux->owner.type) {
 		/* There's no owner yet where we could check for
 		 * compatibility.
 		 */
-		array->aux->type  = fp->type;
-		array->aux->jited = fp->jited;
-		return true;
+		array->aux->owner.type  = fp->type;
+		array->aux->owner.jited = fp->jited;
+		ret = true;
+	} else {
+		ret = array->aux->owner.type  == fp->type &&
+		      array->aux->owner.jited == fp->jited;
 	}
-
-	return array->aux->type  == fp->type &&
-	       array->aux->jited == fp->jited;
+	spin_unlock(&array->aux->owner.lock);
+	return ret;
 }
 
 static int bpf_check_tail_call(const struct bpf_prog *fp)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9dab49d3f394..1cad6979a0d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -543,8 +543,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 
 	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
 		array = container_of(map, struct bpf_array, map);
-		type  = array->aux->type;
-		jited = array->aux->jited;
+		spin_lock(&array->aux->owner.lock);
+		type  = array->aux->owner.type;
+		jited = array->aux->owner.jited;
+		spin_unlock(&array->aux->owner.lock);
 	}
 
 	seq_printf(m,
-- 
cgit v1.2.3


From cfe6807d82e97e81c3209dca9448f091e1448a57 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Tue, 26 Oct 2021 18:58:11 +0100
Subject: gpio: Allow per-parent interrupt data

The core gpiolib code is able to deal with multiple interrupt parents
for a single gpio irqchip. It however only allows a single piece
of data to be conveyed to all flow handlers (either the gpio_chip
or some other, driver-specific data).

This means that drivers have to go through some interesting dance
to find the correct context, something that isn't great in interrupt
context (see aebdc8abc9db86e2bd33070fc2f961012fff74b4 for a prime
example).

Instead, offer an optional way for a pinctrl/gpio driver to provide
an array of pointers which gets used to provide the correct context
to the flow handler.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20211026175815.52703-2-joey.gouly@arm.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      |  9 +++++++--
 include/linux/gpio/driver.h | 19 +++++++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index d1b9b721218f..abfbf546d159 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1534,9 +1534,14 @@ static int gpiochip_add_irqchip(struct gpio_chip *gc,
 	}
 
 	if (gc->irq.parent_handler) {
-		void *data = gc->irq.parent_handler_data ?: gc;
-
 		for (i = 0; i < gc->irq.num_parents; i++) {
+			void *data;
+
+			if (gc->irq.per_parent_data)
+				data = gc->irq.parent_handler_data_array[i];
+			else
+				data = gc->irq.parent_handler_data ?: gc;
+
 			/*
 			 * The parent IRQ chip is already using the chip_data
 			 * for this IRQ chip, so our callbacks simply use the
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index a0f9901dcae6..a673a359e20b 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -168,11 +168,18 @@ struct gpio_irq_chip {
 
 	/**
 	 * @parent_handler_data:
+	 * @parent_handler_data_array:
 	 *
 	 * Data associated, and passed to, the handler for the parent
-	 * interrupt.
+	 * interrupt. Can either be a single pointer if @per_parent_data
+	 * is false, or an array of @num_parents pointers otherwise.  If
+	 * @per_parent_data is true, @parent_handler_data_array cannot be
+	 * NULL.
 	 */
-	void *parent_handler_data;
+	union {
+		void *parent_handler_data;
+		void **parent_handler_data_array;
+	};
 
 	/**
 	 * @num_parents:
@@ -203,6 +210,14 @@ struct gpio_irq_chip {
 	 */
 	bool threaded;
 
+	/**
+	 * @per_parent_data:
+	 *
+	 * True if parent_handler_data_array describes a @num_parents
+	 * sized array to be used as parent data.
+	 */
+	bool per_parent_data;
+
 	/**
 	 * @init_hw: optional routine to initialize hardware before
 	 * an IRQ chip will be added. This is quite useful when
-- 
cgit v1.2.3


From 7529cc7fbd9c02eda6851f3260416cbe198a321d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Wed, 30 Dec 2020 11:41:52 +0200
Subject: lib: bitmap: Introduce node-aware alloc API

Expose new node-aware API for bitmap allocation:
bitmap_alloc_node() / bitmap_zalloc_node().

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 include/linux/bitmap.h |  2 ++
 lib/bitmap.c           | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 37f36dad18bd..a241dcf50f39 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -123,6 +123,8 @@ struct device;
  */
 unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
 unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node);
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node);
 void bitmap_free(const unsigned long *bitmap);
 
 /* Managed variants of the above. */
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 663dd81967d4..926408883456 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1398,6 +1398,19 @@ unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags)
 }
 EXPORT_SYMBOL(bitmap_zalloc);
 
+unsigned long *bitmap_alloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+	return kmalloc_array_node(BITS_TO_LONGS(nbits), sizeof(unsigned long),
+				  flags, node);
+}
+EXPORT_SYMBOL(bitmap_alloc_node);
+
+unsigned long *bitmap_zalloc_node(unsigned int nbits, gfp_t flags, int node)
+{
+	return bitmap_alloc_node(nbits, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(bitmap_zalloc_node);
+
 void bitmap_free(const unsigned long *bitmap)
 {
 	kfree(bitmap);
-- 
cgit v1.2.3


From 50f477fe9933193e960785f1192be801d7cd307a Mon Sep 17 00:00:00 2001
From: Ben Ben-Ishay <benishay@nvidia.com>
Date: Thu, 2 Jul 2020 17:22:45 +0300
Subject: net/mlx5e: Rename lro_timeout to packet_merge_timeout

TIR stands for transport interface receive, the TIR object is
responsible for performing all transport related operations on
the receive side like packet processing, demultiplexing the packets
to different RQ's, etc.
lro_timeout is a field in the TIR that is used to set the timeout for lro
session, this series introduces new packet merge type, therefore rename
lro_timeout to packet_merge_timeout for all packet merge types.

Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h        | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/params.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/tir.c    | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c   | 2 +-
 include/linux/mlx5/mlx5_ifc.h                       | 6 +++---
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a3a4fece0cac..26e3f413386a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -265,7 +265,7 @@ struct mlx5e_params {
 	bool scatter_fcs_en;
 	bool rx_dim_enabled;
 	bool tx_dim_enabled;
-	u32 lro_timeout;
+	u32 packet_merge_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
 	struct mlx5e_xsk *xsk;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 3cbb596821e8..2b2b3c5cdbd5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -173,7 +173,7 @@ struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
 
 	lro_param = (struct mlx5e_lro_param) {
 		.enabled = params->lro_en,
-		.timeout = params->lro_timeout,
+		.timeout = params->packet_merge_timeout,
 	};
 
 	return lro_param;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index de936dc4bc48..857ea0979159 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -82,9 +82,9 @@ void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
 	if (!lro_param->enabled)
 		return;
 
-	MLX5_SET(tirc, tirc, lro_enable_mask,
-		 MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
-		 MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
+	MLX5_SET(tirc, tirc, packet_merge_mask,
+		 MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO |
+		 MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
 	MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
 		 (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
 	MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f3dec58026d9..0e7a8afeb9bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4404,7 +4404,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
 			params->lro_en = !slow_pci_heuristic(mdev);
 	}
-	params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+	params->packet_merge_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
 	rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 746381eccccf..21c0fd478afa 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3361,8 +3361,8 @@ enum {
 };
 
 enum {
-	MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO  = 0x1,
-	MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO  = 0x2,
+	MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO  = BIT(0),
+	MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO  = BIT(1),
 };
 
 enum {
@@ -3387,7 +3387,7 @@ struct mlx5_ifc_tirc_bits {
 
 	u8         reserved_at_80[0x4];
 	u8         lro_timeout_period_usecs[0x10];
-	u8         lro_enable_mask[0x4];
+	u8         packet_merge_mask[0x4];
 	u8         lro_max_ip_payload_size[0x8];
 
 	u8         reserved_at_a0[0x40];
-- 
cgit v1.2.3


From 7025329d208cae45937d2a0910786a45b9981475 Mon Sep 17 00:00:00 2001
From: Ben Ben-Ishay <benishay@nvidia.com>
Date: Wed, 9 Sep 2020 17:36:39 +0300
Subject: net/mlx5: Add SHAMPO caps, HW bits and enumerations

This commit adds SHAMPO bit to hca_cap and SHAMPO capabilities structure,
SHAMPO related HW spec hardware fields and enumerations.
SHAMPO stands for: split headers and merge payload offload.
SHAMPO new fields:
WQ:
 - headers_mkey: mkey that represents the headers buffer, where the packets
   headers will be written by the HW.

 - shampo_enable: flag to verify if the WQ supports SHAMPO feature.

 - log_reservation_size: the log of the reservation size where the data of
   the packet will be written by the HW.

 - log_max_num_of_packets_per_reservation: log of the maximum number of
   packets that can be written to the same reservation.

 - log_headers_entry_size: log of the header entry size of the headers buffer.

 - log_headers_buffer_entry_num: log of the entries number of the headers buffer.

RQ:
 - shampo_no_match_alignment_granularity: the HW alignment granularity
   in case the received packet doesn't match the current session.

 - shampo_match_criteria_type: the type of match criteria.

 - reservation_timeout: the maximum time that the HW will hold the
   reservation.

mlx5_ifc_shampo_cap_bits, the capabilities of the SHAMPO feature:
 - shampo_log_max_reservation_size: the maximum allowed value of the field
   WQ.log_reservation_size.

 - log_reservation_size: the minimum allowed value of the field
   WQ.log_reservation_size.

 - shampo_min_mss_size: the minimum payload size of packet that can open
   a new session or be merged to a session.

 - shampo_max_log_headers_entry_size: the maximum allowed value of the field
   WQ.log_headers_entry_size

Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  6 +++
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  1 +
 include/linux/mlx5/device.h                    |  4 ++
 include/linux/mlx5/mlx5_ifc.h                  | 56 ++++++++++++++++++++++++--
 4 files changed, 64 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 1037e3629e7e..2d8406fab844 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -269,6 +269,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 			return err;
 	}
 
+	if (MLX5_CAP_GEN(dev, shampo)) {
+		err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f8446395163a..a92a92a52346 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1417,6 +1417,7 @@ static const int types[] = {
 	MLX5_CAP_VDPA_EMULATION,
 	MLX5_CAP_IPSEC,
 	MLX5_CAP_PORT_SELECTION,
+	MLX5_CAP_DEV_SHAMPO,
 };
 
 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f8a0bbb42c3b..0d30a6184e1d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1186,6 +1186,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_VDPA_EMULATION = 0x13,
 	MLX5_CAP_DEV_EVENT = 0x14,
 	MLX5_CAP_IPSEC,
+	MLX5_CAP_DEV_SHAMPO = 0x1d,
 	MLX5_CAP_GENERAL_2 = 0x20,
 	MLX5_CAP_PORT_SELECTION = 0x25,
 	/* NUM OF CAP Types */
@@ -1431,6 +1432,9 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_IPSEC(mdev, cap)\
 	MLX5_GET(ipsec_cap, (mdev)->caps.hca[MLX5_CAP_IPSEC]->cur, cap)
 
+#define MLX5_CAP_DEV_SHAMPO(mdev, cap)\
+	MLX5_GET(shampo_cap, mdev->caps.hca_cur[MLX5_CAP_DEV_SHAMPO], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 21c0fd478afa..f1c134af5fcf 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1350,7 +1350,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_b0[0x1];
 	u8         uplink_follow[0x1];
 	u8         ts_cqe_to_dest_cqn[0x1];
-	u8         reserved_at_b3[0xd];
+	u8         reserved_at_b3[0x7];
+	u8         shampo[0x1];
+	u8         reserved_at_bb[0x5];
 
 	u8         max_sgl_for_optimized_performance[0x8];
 	u8         log_max_cq_sz[0x8];
@@ -1893,7 +1895,21 @@ struct mlx5_ifc_wq_bits {
 	u8         reserved_at_139[0x4];
 	u8         log_wqe_stride_size[0x3];
 
-	u8         reserved_at_140[0x4c0];
+	u8         reserved_at_140[0x80];
+
+	u8         headers_mkey[0x20];
+
+	u8         shampo_enable[0x1];
+	u8         reserved_at_1e1[0x4];
+	u8         log_reservation_size[0x3];
+	u8         reserved_at_1e8[0x5];
+	u8         log_max_num_of_packets_per_reservation[0x3];
+	u8         reserved_at_1f0[0x6];
+	u8         log_headers_entry_size[0x2];
+	u8         reserved_at_1f8[0x4];
+	u8         log_headers_buffer_entry_num[0x4];
+
+	u8         reserved_at_200[0x400];
 
 	struct mlx5_ifc_cmd_pas_bits pas[];
 };
@@ -3169,6 +3185,20 @@ struct mlx5_ifc_roce_addr_layout_bits {
 	u8         reserved_at_e0[0x20];
 };
 
+struct mlx5_ifc_shampo_cap_bits {
+	u8    reserved_at_0[0x3];
+	u8    shampo_log_max_reservation_size[0x5];
+	u8    reserved_at_8[0x3];
+	u8    shampo_log_min_reservation_size[0x5];
+	u8    shampo_min_mss_size[0x10];
+
+	u8    reserved_at_20[0x3];
+	u8    shampo_max_log_headers_entry_size[0x5];
+	u8    reserved_at_28[0x18];
+
+	u8    reserved_at_40[0x7c0];
+};
+
 union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
 	struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2;
@@ -3187,6 +3217,7 @@ union mlx5_ifc_hca_cap_union_bits {
 	struct mlx5_ifc_tls_cap_bits tls_cap;
 	struct mlx5_ifc_device_mem_cap_bits device_mem_cap;
 	struct mlx5_ifc_virtio_emulation_cap_bits virtio_emulation_cap;
+	struct mlx5_ifc_shampo_cap_bits shampo_cap;
 	u8         reserved_at_0[0x8000];
 };
 
@@ -3363,6 +3394,7 @@ enum {
 enum {
 	MLX5_TIRC_PACKET_MERGE_MASK_IPV4_LRO  = BIT(0),
 	MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO  = BIT(1),
+	MLX5_TIRC_PACKET_MERGE_MASK_SHAMPO    = BIT(2),
 };
 
 enum {
@@ -3569,6 +3601,18 @@ enum {
 	MLX5_RQC_STATE_ERR  = 0x3,
 };
 
+enum {
+	MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_BYTE    = 0x0,
+	MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE  = 0x1,
+	MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_PAGE    = 0x2,
+};
+
+enum {
+	MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_NO_MATCH    = 0x0,
+	MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED    = 0x1,
+	MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_FIVE_TUPLE  = 0x2,
+};
+
 struct mlx5_ifc_rqc_bits {
 	u8         rlky[0x1];
 	u8	   delay_drop_en[0x1];
@@ -3601,7 +3645,13 @@ struct mlx5_ifc_rqc_bits {
 	u8         reserved_at_c0[0x10];
 	u8         hairpin_peer_vhca[0x10];
 
-	u8         reserved_at_e0[0xa0];
+	u8         reserved_at_e0[0x46];
+	u8         shampo_no_match_alignment_granularity[0x2];
+	u8         reserved_at_128[0x6];
+	u8         shampo_match_criteria_type[0x2];
+	u8         reservation_timeout[0x10];
+
+	u8         reserved_at_140[0x40];
 
 	struct mlx5_ifc_wq_bits wq;
 };
-- 
cgit v1.2.3


From eaee12f046924eeb1210c7e4f3b326603ff1bd85 Mon Sep 17 00:00:00 2001
From: Khalid Manaa <khalidm@nvidia.com>
Date: Wed, 9 Jun 2021 12:27:32 +0300
Subject: net/mlx5e: Rename TIR lro functions to TIR packet merge functions

This series introduces new packet merge type, therefore rename lro
functions to packet merge to support the new merge type:
- Generalize + rename mlx5e_build_tir_ctx_lro to
  mlx5e_build_tir_ctx_packet_merge.
- Rename mlx5e_modify_tirs_lro to mlx5e_modify_tirs_packet_merge.
- Rename lro bit in mlx5_ifc_modify_tir_bitmask_bits to packet_merge.
- Rename lro_en in mlx5e_params to packet_merge_type type and combine
  packet_merge params into one struct mlx5e_packet_merge_param.

Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       | 14 ++++++-
 .../net/ethernet/mellanox/mlx5/core/en/params.c    | 21 +++-------
 .../net/ethernet/mellanox/mlx5/core/en/params.h    |  6 ---
 drivers/net/ethernet/mellanox/mlx5/core/en/rss.c   | 23 +++++------
 drivers/net/ethernet/mellanox/mlx5/core/en/rss.h   |  7 ++--
 .../net/ethernet/mellanox/mlx5/core/en/rx_res.c    | 25 ++++++------
 .../net/ethernet/mellanox/mlx5/core/en/rx_res.h    |  5 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en/tir.c   | 10 ++---
 drivers/net/ethernet/mellanox/mlx5/core/en/tir.h   |  6 +--
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  4 +-
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |  6 +--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 46 ++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  5 +--
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  7 ++--
 include/linux/mlx5/mlx5_ifc.h                      |  2 +-
 15 files changed, 95 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 26e3f413386a..8c3e7464b30f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -242,6 +242,17 @@ enum mlx5e_priv_flag {
 
 #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
 
+enum packet_merge {
+	MLX5E_PACKET_MERGE_NONE,
+	MLX5E_PACKET_MERGE_LRO,
+	MLX5E_PACKET_MERGE_SHAMPO,
+};
+
+struct mlx5e_packet_merge_param {
+	enum packet_merge type;
+	u32 timeout;
+};
+
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
@@ -259,13 +270,12 @@ struct mlx5e_params {
 	bool tunneled_offload_en;
 	struct dim_cq_moder rx_cq_moderation;
 	struct dim_cq_moder tx_cq_moderation;
-	bool lro_en;
+	struct mlx5e_packet_merge_param packet_merge;
 	u8  tx_min_inline_mode;
 	bool vlan_strip_disable;
 	bool scatter_fcs_en;
 	bool rx_dim_enabled;
 	bool tx_dim_enabled;
-	u32 packet_merge_timeout;
 	u32 pflags;
 	struct bpf_prog *xdp_prog;
 	struct mlx5e_xsk *xsk;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 2b2b3c5cdbd5..15f441a1b80c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -87,7 +87,8 @@ bool mlx5e_rx_is_linear_skb(struct mlx5e_params *params,
 	u32 linear_frag_sz = max(mlx5e_rx_get_linear_frag_sz(params, xsk),
 				 mlx5e_rx_get_linear_frag_sz(params, NULL));
 
-	return !params->lro_en && linear_frag_sz <= PAGE_SIZE;
+	return params->packet_merge.type == MLX5E_PACKET_MERGE_NONE &&
+		linear_frag_sz <= PAGE_SIZE;
 }
 
 bool mlx5e_verify_rx_mpwqe_strides(struct mlx5_core_dev *mdev,
@@ -164,19 +165,8 @@ u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 		mlx5e_rx_is_linear_skb(params, xsk) :
 		mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
 
-	return is_linear_skb ? mlx5e_get_linear_rq_headroom(params, xsk) : 0;
-}
-
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params)
-{
-	struct mlx5e_lro_param lro_param;
-
-	lro_param = (struct mlx5e_lro_param) {
-		.enabled = params->lro_en,
-		.timeout = params->packet_merge_timeout,
-	};
-
-	return lro_param;
+	return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
+		mlx5e_get_linear_rq_headroom(params, xsk) : 0;
 }
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
@@ -485,10 +475,11 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
 
 static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 {
+	bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
 	bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
 		MLX5_CAP_GEN(mdev, relaxed_ordering_write);
 
-	return ro && params->lro_en ?
+	return ro && lro_en ?
 		MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index 879ad46d754e..e9593f5f0661 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -11,11 +11,6 @@ struct mlx5e_xsk_param {
 	u16 chunk_size;
 };
 
-struct mlx5e_lro_param {
-	bool enabled;
-	u32 timeout;
-};
-
 struct mlx5e_cq_param {
 	u32                        cqc[MLX5_ST_SZ_DW(cqc)];
 	struct mlx5_wq_param       wq;
@@ -125,7 +120,6 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev,
 u16 mlx5e_get_rq_headroom(struct mlx5_core_dev *mdev,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk);
-struct mlx5e_lro_param mlx5e_get_lro_param(struct mlx5e_params *params);
 
 /* Build queue parameters */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
index b8b481b335cf..c1cdd8c2e37a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
@@ -127,7 +127,7 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt)
 
 static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
 				enum mlx5_traffic_types tt,
-				const struct mlx5e_lro_param *init_lro_param,
+				const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 				bool inner)
 {
 	struct mlx5e_rss_params_traffic_type rss_tt;
@@ -161,7 +161,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss,
 	rqtn = mlx5e_rqt_get_rqtn(&rss->rqt);
 	mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn,
 				    rqtn, rss->inner_ft_support);
-	mlx5e_tir_builder_build_lro(builder, init_lro_param);
+	mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
 	rss_tt = mlx5e_rss_get_tt_config(rss, tt);
 	mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner);
 
@@ -198,14 +198,14 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types
 }
 
 static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss,
-				 const struct mlx5e_lro_param *init_lro_param,
+				 const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 				 bool inner)
 {
 	enum mlx5_traffic_types tt, max_tt;
 	int err;
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+		err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
 		if (err)
 			goto err_destroy_tirs;
 	}
@@ -297,7 +297,7 @@ int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 
 int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 		   bool inner_ft_support, u32 drop_rqn,
-		   const struct mlx5e_lro_param *init_lro_param)
+		   const struct mlx5e_packet_merge_param *init_pkt_merge_param)
 {
 	int err;
 
@@ -305,12 +305,12 @@ int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 	if (err)
 		goto err_out;
 
-	err = mlx5e_rss_create_tirs(rss, init_lro_param, false);
+	err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false);
 	if (err)
 		goto err_destroy_rqt;
 
 	if (inner_ft_support) {
-		err = mlx5e_rss_create_tirs(rss, init_lro_param, true);
+		err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true);
 		if (err)
 			goto err_destroy_tirs;
 	}
@@ -372,7 +372,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
  */
 int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
 			  enum mlx5_traffic_types tt,
-			  const struct mlx5e_lro_param *init_lro_param,
+			  const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 			  bool inner, u32 *tirn)
 {
 	struct mlx5e_tir *tir;
@@ -381,7 +381,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
 	if (!tir) { /* TIR doesn't exist, create one */
 		int err;
 
-		err = mlx5e_rss_create_tir(rss, tt, init_lro_param, inner);
+		err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner);
 		if (err)
 			return err;
 		tir = rss_get_tir(rss, tt, inner);
@@ -419,7 +419,8 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss)
 			       mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err);
 }
 
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param)
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+				     struct mlx5e_packet_merge_param *pkt_merge_param)
 {
 	struct mlx5e_tir_builder *builder;
 	enum mlx5_traffic_types tt;
@@ -429,7 +430,7 @@ int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_p
 	if (!builder)
 		return -ENOMEM;
 
-	mlx5e_tir_builder_build_lro(builder, lro_param);
+	mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
 
 	final_err = 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
index d522a10dadf3..c6b216416344 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
@@ -17,7 +17,7 @@ struct mlx5e_rss *mlx5e_rss_alloc(void);
 void mlx5e_rss_free(struct mlx5e_rss *rss);
 int mlx5e_rss_init(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 		   bool inner_ft_support, u32 drop_rqn,
-		   const struct mlx5e_lro_param *init_lro_param);
+		   const struct mlx5e_packet_merge_param *init_pkt_merge_param);
 int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss, struct mlx5_core_dev *mdev,
 			   bool inner_ft_support, u32 drop_rqn);
 int mlx5e_rss_cleanup(struct mlx5e_rss *rss);
@@ -30,13 +30,14 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt,
 		       bool inner);
 int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss,
 			  enum mlx5_traffic_types tt,
-			  const struct mlx5e_lro_param *init_lro_param,
+			  const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 			  bool inner, u32 *tirn);
 
 void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, unsigned int num_rqns);
 void mlx5e_rss_disable(struct mlx5e_rss *rss);
 
-int mlx5e_rss_lro_set_param(struct mlx5e_rss *rss, struct mlx5e_lro_param *lro_param);
+int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss,
+				     struct mlx5e_packet_merge_param *pkt_merge_param);
 int mlx5e_rss_get_rxfh(struct mlx5e_rss *rss, u32 *indir, u8 *key, u8 *hfunc);
 int mlx5e_rss_set_rxfh(struct mlx5e_rss *rss, const u32 *indir,
 		       const u8 *key, const u8 *hfunc,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
index 13056cb9757d..142953847996 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
@@ -34,7 +34,7 @@ struct mlx5e_rx_res {
 /* API for rx_res_rss_* */
 
 static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
-				     const struct mlx5e_lro_param *init_lro_param,
+				     const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 				     unsigned int init_nch)
 {
 	bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
@@ -49,7 +49,7 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res,
 		return -ENOMEM;
 
 	err = mlx5e_rss_init(rss, res->mdev, inner_ft_support, res->drop_rqn,
-			     init_lro_param);
+			     init_pkt_merge_param);
 	if (err)
 		goto err_rss_free;
 
@@ -275,7 +275,7 @@ struct mlx5e_rx_res *mlx5e_rx_res_alloc(void)
 }
 
 static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
-				      const struct mlx5e_lro_param *init_lro_param)
+				      const struct mlx5e_packet_merge_param *init_pkt_merge_param)
 {
 	bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT;
 	struct mlx5e_tir_builder *builder;
@@ -306,7 +306,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
 		mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
 					    mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt),
 					    inner_ft_support);
-		mlx5e_tir_builder_build_lro(builder, init_lro_param);
+		mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
 		mlx5e_tir_builder_build_direct(builder);
 
 		err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
@@ -336,7 +336,7 @@ static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res,
 		mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn,
 					    mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt),
 					    inner_ft_support);
-		mlx5e_tir_builder_build_lro(builder, init_lro_param);
+		mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param);
 		mlx5e_tir_builder_build_direct(builder);
 
 		err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true);
@@ -437,7 +437,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res)
 
 int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
 		      enum mlx5e_rx_res_features features, unsigned int max_nch,
-		      u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+		      u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 		      unsigned int init_nch)
 {
 	int err;
@@ -447,11 +447,11 @@ int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
 	res->max_nch = max_nch;
 	res->drop_rqn = drop_rqn;
 
-	err = mlx5e_rx_res_rss_init_def(res, init_lro_param, init_nch);
+	err = mlx5e_rx_res_rss_init_def(res, init_pkt_merge_param, init_nch);
 	if (err)
 		goto err_out;
 
-	err = mlx5e_rx_res_channels_init(res, init_lro_param);
+	err = mlx5e_rx_res_channels_init(res, init_pkt_merge_param);
 	if (err)
 		goto err_rss_destroy;
 
@@ -645,7 +645,8 @@ int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix)
 	return err;
 }
 
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param)
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+					struct mlx5e_packet_merge_param *pkt_merge_param)
 {
 	struct mlx5e_tir_builder *builder;
 	int err, final_err;
@@ -655,7 +656,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
 	if (!builder)
 		return -ENOMEM;
 
-	mlx5e_tir_builder_build_lro(builder, lro_param);
+	mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param);
 
 	final_err = 0;
 
@@ -665,7 +666,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
 		if (!rss)
 			continue;
 
-		err = mlx5e_rss_lro_set_param(rss, lro_param);
+		err = mlx5e_rss_packet_merge_set_param(rss, pkt_merge_param);
 		if (err)
 			final_err = final_err ? : err;
 	}
@@ -673,7 +674,7 @@ int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param
 	for (ix = 0; ix < res->max_nch; ix++) {
 		err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder);
 		if (err) {
-			mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n",
+			mlx5_core_warn(res->mdev, "Failed to update packet merge state of direct TIR %#x for channel %u: err = %d\n",
 				       mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err);
 			if (!final_err)
 				final_err = err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
index 4a15942d79f7..d09f7d174a51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
@@ -25,7 +25,7 @@ enum mlx5e_rx_res_features {
 struct mlx5e_rx_res *mlx5e_rx_res_alloc(void);
 int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev,
 		      enum mlx5e_rx_res_features features, unsigned int max_nch,
-		      u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param,
+		      u32 drop_rqn, const struct mlx5e_packet_merge_param *init_pkt_merge_param,
 		      unsigned int init_nch);
 void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res);
 void mlx5e_rx_res_free(struct mlx5e_rx_res *res);
@@ -57,7 +57,8 @@ int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, u32 rss_idx,
 u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt);
 int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt,
 				     u8 rx_hash_fields);
-int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param);
+int mlx5e_rx_res_packet_merge_set_param(struct mlx5e_rx_res *res,
+					struct mlx5e_packet_merge_param *pkt_merge_param);
 
 int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 *rss_idx, unsigned int init_nch);
 int mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res, u32 rss_idx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
index 857ea0979159..a1afb8585e37 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
@@ -70,16 +70,16 @@ void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
 	MLX5_SET(tirc, tirc, tunneled_offload_en, inner_ft_support);
 }
 
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
-				 const struct mlx5e_lro_param *lro_param)
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+					  const struct mlx5e_packet_merge_param *pkt_merge_param)
 {
 	void *tirc = mlx5e_tir_builder_get_tirc(builder);
 	const unsigned int rough_max_l2_l3_hdr_sz = 256;
 
 	if (builder->modify)
-		MLX5_SET(modify_tir_in, builder->in, bitmask.lro, 1);
+		MLX5_SET(modify_tir_in, builder->in, bitmask.packet_merge, 1);
 
-	if (!lro_param->enabled)
+	if (pkt_merge_param->type == MLX5E_PACKET_MERGE_NONE)
 		return;
 
 	MLX5_SET(tirc, tirc, packet_merge_mask,
@@ -87,7 +87,7 @@ void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
 		 MLX5_TIRC_PACKET_MERGE_MASK_IPV6_LRO);
 	MLX5_SET(tirc, tirc, lro_max_ip_payload_size,
 		 (MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - rough_max_l2_l3_hdr_sz) >> 8);
-	MLX5_SET(tirc, tirc, lro_timeout_period_usecs, lro_param->timeout);
+	MLX5_SET(tirc, tirc, lro_timeout_period_usecs, pkt_merge_param->timeout);
 }
 
 static int mlx5e_hfunc_to_hw(u8 hfunc)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
index e45149a78ed9..857a84bcd53a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
@@ -18,7 +18,7 @@ struct mlx5e_rss_params_traffic_type {
 };
 
 struct mlx5e_tir_builder;
-struct mlx5e_lro_param;
+struct mlx5e_packet_merge_param;
 
 struct mlx5e_tir_builder *mlx5e_tir_builder_alloc(bool modify);
 void mlx5e_tir_builder_free(struct mlx5e_tir_builder *builder);
@@ -27,8 +27,8 @@ void mlx5e_tir_builder_clear(struct mlx5e_tir_builder *builder);
 void mlx5e_tir_builder_build_inline(struct mlx5e_tir_builder *builder, u32 tdn, u32 rqn);
 void mlx5e_tir_builder_build_rqt(struct mlx5e_tir_builder *builder, u32 tdn,
 				 u32 rqtn, bool inner_ft_support);
-void mlx5e_tir_builder_build_lro(struct mlx5e_tir_builder *builder,
-				 const struct mlx5e_lro_param *lro_param);
+void mlx5e_tir_builder_build_packet_merge(struct mlx5e_tir_builder *builder,
+					  const struct mlx5e_packet_merge_param *pkt_merge_param);
 void mlx5e_tir_builder_build_rss(struct mlx5e_tir_builder *builder,
 				 const struct mlx5e_rss_params_hash *rss_hash,
 				 const struct mlx5e_rss_params_traffic_type *rss_tt,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 25926e581d18..5a46b6e1b9da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1952,8 +1952,8 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 			return -EOPNOTSUPP;
 		if (!mlx5e_striding_rq_possible(mdev, &priv->channels.params))
 			return -EINVAL;
-	} else if (priv->channels.params.lro_en) {
-		netdev_warn(netdev, "Can't set legacy RQ with LRO, disable LRO first\n");
+	} else if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+		netdev_warn(netdev, "Can't set legacy RQ with HW-GRO/LRO, disable them first\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 81ebf281cdb4..ad0d234632a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -411,7 +411,7 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
 			 u32 rss_context, u32 *tirn)
 {
 	if (fs->flow_type & FLOW_RSS) {
-		struct mlx5e_lro_param lro_param;
+		struct mlx5e_packet_merge_param pkt_merge_param;
 		struct mlx5e_rss *rss;
 		u32 flow_type;
 		int err;
@@ -426,8 +426,8 @@ static int flow_get_tirn(struct mlx5e_priv *priv,
 		if (tt < 0)
 			return -EINVAL;
 
-		lro_param = mlx5e_get_lro_param(&priv->channels.params);
-		err = mlx5e_rss_obtain_tirn(rss, tt, &lro_param, false, tirn);
+		pkt_merge_param = priv->channels.params.packet_merge;
+		err = mlx5e_rss_obtain_tirn(rss, tt, &pkt_merge_param, false, tirn);
 		if (err)
 			return err;
 		eth_rule->rss = rss;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e7a8afeb9bd..0c039906a1fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2222,17 +2222,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
 	chs->num = 0;
 }
 
-static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
+static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rx_res *res = priv->rx_res;
-	struct mlx5e_lro_param lro_param;
 
-	lro_param = mlx5e_get_lro_param(&priv->channels.params);
-
-	return mlx5e_rx_res_lro_set_param(res, &lro_param);
+	return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
 }
 
-static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro);
+static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
 
 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
 			 struct mlx5e_params *params, u16 mtu)
@@ -3351,16 +3348,25 @@ static int set_feature_lro(struct net_device *netdev, bool enable)
 	}
 
 	new_params = *cur_params;
-	new_params.lro_en = enable;
 
-	if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
-		if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
-		    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
-			reset = false;
+	if (enable)
+		new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
+	else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
+		new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
+	else
+		goto out;
+
+	if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
+	      new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
+		if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
+			if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
+			    mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
+				reset = false;
+		}
 	}
 
 	err = mlx5e_safe_switch_params(priv, &new_params,
-				       mlx5e_modify_tirs_lro_ctx, NULL, reset);
+				       mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
 out:
 	mutex_unlock(&priv->state_lock);
 	return err;
@@ -3687,7 +3693,7 @@ int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
 		goto out;
 	}
 
-	if (params->lro_en)
+	if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
 		reset = false;
 
 	if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -4144,8 +4150,8 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
 	struct net_device *netdev = priv->netdev;
 	struct mlx5e_params new_params;
 
-	if (priv->channels.params.lro_en) {
-		netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n");
+	if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+		netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
 		return -EINVAL;
 	}
 
@@ -4402,9 +4408,10 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 	    params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
 		/* No XSK params: checking the availability of striding RQ in general. */
 		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
-			params->lro_en = !slow_pci_heuristic(mdev);
+			params->packet_merge.type = slow_pci_heuristic(mdev) ?
+				MLX5E_PACKET_MERGE_NONE : MLX5E_PACKET_MERGE_LRO;
 	}
-	params->packet_merge_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
+	params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
 
 	/* CQ moderation params */
 	rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
@@ -4693,7 +4700,6 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	enum mlx5e_rx_res_features features;
-	struct mlx5e_lro_param lro_param;
 	int err;
 
 	priv->rx_res = mlx5e_rx_res_alloc();
@@ -4711,9 +4717,9 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP;
 	if (priv->channels.params.tunneled_offload_en)
 		features |= MLX5E_RX_RES_FEATURE_INNER_FT;
-	lro_param = mlx5e_get_lro_param(&priv->channels.params);
 	err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
-				priv->max_nch, priv->drop_rq.rqn, &lro_param,
+				priv->max_nch, priv->drop_rq.rqn,
+				&priv->channels.params.packet_merge,
 				priv->channels.params.num_channels);
 	if (err)
 		goto err_close_drop_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 0684ac6699b2..5230e0422cae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -793,7 +793,6 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_lro_param lro_param;
 	int err;
 
 	priv->rx_res = mlx5e_rx_res_alloc();
@@ -808,9 +807,9 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 		return err;
 	}
 
-	lro_param = mlx5e_get_lro_param(&priv->channels.params);
 	err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
-				priv->max_nch, priv->drop_rq.rqn, &lro_param,
+				priv->max_nch, priv->drop_rq.rqn,
+				&priv->channels.params.packet_merge,
 				priv->channels.params.num_channels);
 	if (err)
 		goto err_close_drop_rq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 3b8d8ada1a01..84297cc1b509 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -67,7 +67,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
 		MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE;
 
-	params->lro_en = false;
+	params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
 	params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
 	params->tunneled_offload_en = false;
 }
@@ -356,7 +356,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
 static int mlx5i_init_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_lro_param lro_param;
 	int err;
 
 	priv->rx_res = mlx5e_rx_res_alloc();
@@ -371,9 +370,9 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
 		goto err_destroy_q_counters;
 	}
 
-	lro_param = mlx5e_get_lro_param(&priv->channels.params);
 	err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
-				priv->max_nch, priv->drop_rq.rqn, &lro_param,
+				priv->max_nch, priv->drop_rq.rqn,
+				&priv->channels.params.packet_merge,
 				priv->channels.params.num_channels);
 	if (err)
 		goto err_close_drop_rq;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f1c134af5fcf..0bb78c04336c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -6707,7 +6707,7 @@ struct mlx5_ifc_modify_tir_bitmask_bits {
 	u8         reserved_at_3c[0x1];
 	u8         hash[0x1];
 	u8         reserved_at_3e[0x1];
-	u8         lro[0x1];
+	u8         packet_merge[0x1];
 };
 
 struct mlx5_ifc_modify_tir_out_bits {
-- 
cgit v1.2.3


From d7b896acbdcb3ef5dab1fd2f33ba5a8da6ba1dda Mon Sep 17 00:00:00 2001
From: Ben Ben-Ishay <benishay@nvidia.com>
Date: Tue, 14 Jul 2020 14:40:32 +0300
Subject: net/mlx5e: Add support to klm_umr_wqe

This commit adds the needed definitions for using the klm_umr_wqe.
UMR stands for user-mode memory registration, is a mechanism to alter
address translation properties of MKEY by posting WorkQueueElement
aka WQE on send queue.
MKEY stands for memory key, MKEY are used to describe a region in memory that
can be later used by HW.
KLM stands for {Key, Length, MemVa}, KLM_MKEY is indirect MKEY that enables
to map multiple memory spaces with different sizes in unified MKEY.
klm_umr_wqe is a UMR that use to update a KLM_MKEY.
SHAMPO feature uses KLM_MKEY for memory registration of his header buffer.

Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h | 24 +++++++++++++++++++++++-
 include/linux/mlx5/device.h                  |  1 +
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8c3e7464b30f..98b56d8bddb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -152,6 +152,25 @@ struct page_pool;
 #define MLX5E_UMR_WQEBBS \
 	(DIV_ROUND_UP(MLX5E_UMR_WQE_INLINE_SZ, MLX5_SEND_WQE_BB))
 
+#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
+	(sizeof(struct mlx5e_umr_wqe) +\
+	(sizeof(struct mlx5_klm) * (sgl_len)))
+
+#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
+	(DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
+
+#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
+	(DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
+
+#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
+	(((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
+
+#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
+	ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT)
+
+#define MLX5E_MAX_KLM_PER_WQE(mdev) \
+	MLX5E_KLM_ENTRIES_PER_WQE(MLX5E_TX_MPW_MAX_NUM_DS << MLX5_MKEY_BSF_OCTO_SIZE)
+
 #define MLX5E_MSG_LEVEL			NETIF_MSG_LINK
 
 #define mlx5e_dbg(mlevel, priv, format, ...)                    \
@@ -217,7 +236,10 @@ struct mlx5e_umr_wqe {
 	struct mlx5_wqe_ctrl_seg       ctrl;
 	struct mlx5_wqe_umr_ctrl_seg   uctrl;
 	struct mlx5_mkey_seg           mkc;
-	struct mlx5_mtt                inline_mtts[0];
+	union {
+		struct mlx5_mtt inline_mtts[0];
+		struct mlx5_klm inline_klms[0];
+	};
 };
 
 enum mlx5e_priv_flag {
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0d30a6184e1d..c920e5932368 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -290,6 +290,7 @@ enum {
 	MLX5_UMR_INLINE			= (1 << 7),
 };
 
+#define MLX5_UMR_KLM_ALIGNMENT 4
 #define MLX5_UMR_MTT_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
 #define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
-- 
cgit v1.2.3


From f97d5c2a453e26071e3b0ec12161de57c4a237c4 Mon Sep 17 00:00:00 2001
From: Khalid Manaa <khalidm@nvidia.com>
Date: Tue, 19 May 2020 15:45:38 +0300
Subject: net/mlx5e: Add handle SHAMPO cqe support

This patch adds the new CQE SHAMPO fields:
- flush: indicates that we must close the current session and pass the SKB
         to the network stack.

- match: indicates that the current packet matches the oppened session,
         the packet will be merge into the current SKB.

- header_size: the size of the packet headers that written into the headers
               buffer.

- header_entry_index: the entry index in the headers buffer.

- data_offset: packets data offset in the WQE.

Also new cqe handler is added to handle SHAMPO packets:
- The new handler uses CQE SHAMPO fields to build the SKB.
  CQE's Flush and match fields are not used in this patch, packets are not
  merged in this patch.

Signed-off-by: Khalid Manaa <khalidm@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 205 ++++++++++++++++++++----
 include/linux/mlx5/device.h                     |  23 ++-
 3 files changed, 194 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 8431cdd8006c..c95b6b65c4de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -943,6 +943,7 @@ struct mlx5e_priv {
 struct mlx5e_rx_handlers {
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe;
 	mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
+	mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe_shampo;
 };
 
 extern const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 397a4e769076..68759c8fd31e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -62,10 +62,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 				   u16 cqe_bcnt, u32 head_offset, u32 page_idx);
 static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 
 const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
 	.handle_rx_cqe       = mlx5e_handle_rx_cqe,
 	.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
+	.handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
 };
 
 static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
@@ -185,8 +187,9 @@ static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq,
 			mlx5e_read_mini_arr_slot(wq, cqd, cqcc);
 
 		mlx5e_decompress_cqe_no_hash(rq, wq, cqcc);
-		INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-				mlx5e_handle_rx_cqe, rq, &cqd->title);
+		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+				mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+				rq, &cqd->title);
 	}
 	mlx5e_cqes_update_owner(wq, cqcc - wq->cc);
 	wq->cc = cqcc;
@@ -206,8 +209,9 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq,
 	mlx5e_read_title_slot(rq, wq, cc);
 	mlx5e_read_mini_arr_slot(wq, cqd, cc + 1);
 	mlx5e_decompress_cqe(rq, wq, cc);
-	INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-			mlx5e_handle_rx_cqe, rq, &cqd->title);
+	INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+			mlx5e_handle_rx_cqe_mpwrq_shampo, mlx5e_handle_rx_cqe,
+			rq, &cqd->title);
 	cqd->mini_arr_idx++;
 
 	return mlx5e_decompress_cqes_cont(rq, wq, 1, budget_rem) - 1;
@@ -448,13 +452,13 @@ mlx5e_add_skb_frag(struct mlx5e_rq *rq, struct sk_buff *skb,
 static inline void
 mlx5e_copy_skb_header(struct device *pdev, struct sk_buff *skb,
 		      struct mlx5e_dma_info *dma_info,
-		      int offset_from, u32 headlen)
+		      int offset_from, int dma_offset, u32 headlen)
 {
 	const void *from = page_address(dma_info->page) + offset_from;
 	/* Aligning len to sizeof(long) optimizes memcpy performance */
 	unsigned int len = ALIGN(headlen, sizeof(long));
 
-	dma_sync_single_for_cpu(pdev, dma_info->addr + offset_from, len,
+	dma_sync_single_for_cpu(pdev, dma_info->addr + dma_offset, len,
 				DMA_FROM_DEVICE);
 	skb_copy_to_linear_data(skb, from, len);
 }
@@ -820,8 +824,8 @@ static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp)
 
 	if (tcp_ack) {
 		tcp->ack                = 1;
-		tcp->ack_seq            = cqe->lro_ack_seq_num;
-		tcp->window             = cqe->lro_tcp_win;
+		tcp->ack_seq            = cqe->lro.ack_seq_num;
+		tcp->window             = cqe->lro.tcp_win;
 	}
 }
 
@@ -847,7 +851,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 		tcp = ip_p + sizeof(struct iphdr);
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
 
-		ipv4->ttl               = cqe->lro_min_ttl;
+		ipv4->ttl               = cqe->lro.min_ttl;
 		ipv4->tot_len           = cpu_to_be16(tot_len);
 		ipv4->check             = 0;
 		ipv4->check             = ip_fast_csum((unsigned char *)ipv4,
@@ -867,7 +871,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe,
 		tcp = ip_p + sizeof(struct ipv6hdr);
 		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 
-		ipv6->hop_limit         = cqe->lro_min_ttl;
+		ipv6->hop_limit         = cqe->lro.min_ttl;
 		ipv6->payload_len       = cpu_to_be16(payload_len);
 
 		mlx5e_lro_update_tcp_hdr(cqe, tcp);
@@ -1237,7 +1241,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	}
 
 	/* copy header */
-	mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, headlen);
+	mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
+			      headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
@@ -1433,6 +1438,30 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_rep = {
 };
 #endif
 
+static void
+mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
+		    u32 data_bcnt, u32 data_offset)
+{
+	net_prefetchw(skb->data);
+
+	while (data_bcnt) {
+		u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - data_offset, data_bcnt);
+		unsigned int truesize;
+
+		if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
+			truesize = pg_consumed_bytes;
+		else
+			truesize = ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
+
+		mlx5e_add_skb_frag(rq, skb, di, data_offset,
+				   pg_consumed_bytes, truesize);
+
+		data_bcnt -= pg_consumed_bytes;
+		data_offset = 0;
+		di++;
+	}
+}
+
 static struct sk_buff *
 mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 				   u16 cqe_bcnt, u32 head_offset, u32 page_idx)
@@ -1458,20 +1487,9 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
 		frag_offset -= PAGE_SIZE;
 	}
 
-	while (byte_cnt) {
-		u32 pg_consumed_bytes =
-			min_t(u32, PAGE_SIZE - frag_offset, byte_cnt);
-		unsigned int truesize =
-			ALIGN(pg_consumed_bytes, BIT(rq->mpwqe.log_stride_sz));
-
-		mlx5e_add_skb_frag(rq, skb, di, frag_offset,
-				   pg_consumed_bytes, truesize);
-		byte_cnt -= pg_consumed_bytes;
-		frag_offset = 0;
-		di++;
-	}
+	mlx5e_fill_skb_data(skb, rq, di, byte_cnt, frag_offset);
 	/* copy header */
-	mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, headlen);
+	mlx5e_copy_skb_header(rq->pdev, skb, head_di, head_offset, head_offset, headlen);
 	/* skb linear part was allocated with headlen and aligned to long */
 	skb->tail += headlen;
 	skb->len  += headlen;
@@ -1525,6 +1543,123 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 	return skb;
 }
 
+static struct sk_buff *
+mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
+			  struct mlx5_cqe64 *cqe, u16 header_index)
+{
+	struct mlx5e_dma_info *head = &rq->mpwqe.shampo->info[header_index];
+	u16 head_offset = head->addr & (PAGE_SIZE - 1);
+	u16 head_size = cqe->shampo.header_size;
+	u16 rx_headroom = rq->buff.headroom;
+	struct sk_buff *skb = NULL;
+	void *hdr, *data;
+	u32 frag_size;
+
+	hdr		= page_address(head->page) + head_offset;
+	data		= hdr + rx_headroom;
+	frag_size	= MLX5_SKB_FRAG_SZ(rx_headroom + head_size);
+
+	if (likely(frag_size <= BIT(MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE))) {
+		/* build SKB around header */
+		dma_sync_single_range_for_cpu(rq->pdev, head->addr, 0, frag_size, DMA_FROM_DEVICE);
+		prefetchw(hdr);
+		prefetch(data);
+		skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size);
+
+		if (unlikely(!skb))
+			return NULL;
+
+		/* queue up for recycling/reuse */
+		page_ref_inc(head->page);
+
+	} else {
+		/* allocate SKB and copy header for large header */
+		skb = napi_alloc_skb(rq->cq.napi,
+				     ALIGN(head_size, sizeof(long)));
+		if (unlikely(!skb)) {
+			rq->stats->buff_alloc_err++;
+			return NULL;
+		}
+
+		prefetchw(skb->data);
+		mlx5e_copy_skb_header(rq->pdev, skb, head,
+				      head_offset + rx_headroom,
+				      rx_headroom, head_size);
+		/* skb linear part was allocated with headlen and aligned to long */
+		skb->tail += head_size;
+		skb->len  += head_size;
+	}
+	return skb;
+}
+
+static void
+mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
+{
+	struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
+	u64 addr = shampo->info[header_index].addr;
+
+	if (((header_index + 1) & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) == 0) {
+		shampo->info[header_index].addr = ALIGN_DOWN(addr, PAGE_SIZE);
+		mlx5e_page_release(rq, &shampo->info[header_index], true);
+	}
+	bitmap_clear(shampo->bitmap, header_index, 1);
+}
+
+static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
+{
+	u16 data_bcnt		= mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size;
+	u16 header_index	= be16_to_cpu(cqe->shampo.header_entry_index);
+	u32 wqe_offset		= be32_to_cpu(cqe->shampo.data_offset);
+	u16 cstrides		= mpwrq_get_cqe_consumed_strides(cqe);
+	u32 data_offset		= wqe_offset & (PAGE_SIZE - 1);
+	u32 cqe_bcnt		= mpwrq_get_cqe_byte_cnt(cqe);
+	u16 wqe_id		= be16_to_cpu(cqe->wqe_id);
+	u32 page_idx		= wqe_offset >> PAGE_SHIFT;
+	struct mlx5e_rx_wqe_ll *wqe;
+	struct sk_buff *skb = NULL;
+	struct mlx5e_dma_info *di;
+	struct mlx5e_mpw_info *wi;
+	struct mlx5_wq_ll *wq;
+
+	wi = &rq->mpwqe.info[wqe_id];
+	wi->consumed_strides += cstrides;
+
+	if (unlikely(MLX5E_RX_ERR_CQE(cqe))) {
+		trigger_report(rq, cqe);
+		rq->stats->wqe_err++;
+		goto mpwrq_cqe_out;
+	}
+
+	if (unlikely(mpwrq_is_filler_cqe(cqe))) {
+		struct mlx5e_rq_stats *stats = rq->stats;
+
+		stats->mpwqe_filler_cqes++;
+		stats->mpwqe_filler_strides += cstrides;
+		goto mpwrq_cqe_out;
+	}
+
+	skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
+
+	if (unlikely(!skb))
+		goto free_hd_entry;
+
+	di = &wi->umr.dma_info[page_idx];
+	mlx5e_fill_skb_data(skb, rq, di, data_bcnt, data_offset);
+
+	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+	napi_gro_receive(rq->cq.napi, skb);
+free_hd_entry:
+	mlx5e_free_rx_shampo_hd_entry(rq, header_index);
+mpwrq_cqe_out:
+	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
+		return;
+
+	wq  = &rq->mpwqe.wq;
+	wqe = mlx5_wq_ll_get_wqe(wq, wqe_id);
+	mlx5e_free_rx_mpwqe(rq, wi, true);
+	mlx5_wq_ll_pop(wq, cqe->wqe_id, &wqe->next.next_wqe_index);
+}
+
 static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 {
 	u16 cstrides       = mpwrq_get_cqe_consumed_strides(cqe);
@@ -1617,8 +1752,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 
 		mlx5_cqwq_pop(cqwq);
 
-		INDIRECT_CALL_2(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
-				mlx5e_handle_rx_cqe, rq, cqe);
+		INDIRECT_CALL_3(rq->handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq,
+				mlx5e_handle_rx_cqe, mlx5e_handle_rx_cqe_mpwrq_shampo,
+				rq, cqe);
 	} while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(cqwq)));
 
 out:
@@ -1822,15 +1958,24 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
 		rq->post_wqes = mlx5e_post_rx_mpwqes;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-		rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
 		if (mlx5_fpga_is_ipsec_device(mdev)) {
 			netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
 			return -EINVAL;
 		}
-		if (!rq->handle_rx_cqe) {
-			netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
-			return -EINVAL;
+		if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
+			rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe_shampo;
+			if (!rq->handle_rx_cqe) {
+				netdev_err(netdev, "RX handler of SHAMPO MPWQE RQ is not set\n");
+				return -EINVAL;
+			}
+		} else {
+			rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
+			if (!rq->handle_rx_cqe) {
+				netdev_err(netdev, "RX handler of MPWQE RQ is not set\n");
+				return -EINVAL;
+			}
 		}
+
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		rq->wqe.skb_from_cqe = xsk ?
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index c920e5932368..56bcf95d4ab7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -800,10 +800,23 @@ struct mlx5_cqe64 {
 	u8		tls_outer_l3_tunneled;
 	u8		rsvd0;
 	__be16		wqe_id;
-	u8		lro_tcppsh_abort_dupack;
-	u8		lro_min_ttl;
-	__be16		lro_tcp_win;
-	__be32		lro_ack_seq_num;
+	union {
+		struct {
+			u8	tcppsh_abort_dupack;
+			u8	min_ttl;
+			__be16	tcp_win;
+			__be32	ack_seq_num;
+		} lro;
+		struct {
+			u8	reserved0:1;
+			u8	match:1;
+			u8	flush:1;
+			u8	reserved3:5;
+			u8	header_size;
+			__be16	header_entry_index;
+			__be32	data_offset;
+		} shampo;
+	};
 	__be32		rss_hash_result;
 	u8		rss_hash_type;
 	u8		ml_path;
@@ -873,7 +886,7 @@ static inline u8 get_cqe_opcode(struct mlx5_cqe64 *cqe)
 
 static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe)
 {
-	return (cqe->lro_tcppsh_abort_dupack >> 6) & 1;
+	return (cqe->lro.tcppsh_abort_dupack >> 6) & 1;
 }
 
 static inline u8 get_cqe_l4_hdr_type(struct mlx5_cqe64 *cqe)
-- 
cgit v1.2.3


From a2247f19ee1c5ad75ef095cdfb909a3244b88aa8 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 27 Oct 2021 11:22:19 +0900
Subject: block: Add independent access ranges support

The Concurrent Positioning Ranges VPD page (for SCSI) and data log page
(for ATA) contain parameters describing the set of contiguous LBAs that
can be served independently by a single LUN multi-actuator hard-disk.
Similarly, a logically defined block device composed of multiple disks
can in some cases execute requests directed at different sector ranges
in parallel. A dm-linear device aggregating 2 block devices together is
an example.

This patch implements support for exposing a block device independent
access ranges to the user through sysfs to allow optimizing device
accesses to increase performance.

To describe the set of independent sector ranges of a device (actuators
of a multi-actuator HDDs or table entries of a dm-linear device),
The type struct blk_independent_access_ranges is introduced. This
structure describes the sector ranges using an array of
struct blk_independent_access_range structures. This range structure
defines the start sector and number of sectors of the access range.
The ranges in the array cannot overlap and must contain all sectors
within the device capacity.

The function disk_set_independent_access_ranges() allows a device
driver to signal to the block layer that a device has multiple
independent access ranges.  In this case, a struct
blk_independent_access_ranges is attached to the device request queue
by the function disk_set_independent_access_ranges(). The function
disk_alloc_independent_access_ranges() is provided for drivers to
allocate this structure.

struct blk_independent_access_ranges contains kobjects (struct kobject)
to expose to the user through sysfs the set of independent access ranges
supported by a device. When the device is initialized, sysfs
registration of the ranges information is done from blk_register_queue()
using the block layer internal function
disk_register_independent_access_ranges(). If a driver calls
disk_set_independent_access_ranges() for a registered queue, e.g. when a
device is revalidated, disk_set_independent_access_ranges() will execute
disk_register_independent_access_ranges() to update the sysfs attribute
files.  The sysfs file structure created starts from the
independent_access_ranges sub-directory and contains the start sector
and number of sectors of each range, with the information for each range
grouped in numbered sub-directories.

E.g. for a dual actuator HDD, the user sees:

$ tree /sys/block/sdk/queue/independent_access_ranges/
/sys/block/sdk/queue/independent_access_ranges/
|-- 0
|   |-- nr_sectors
|   `-- sector
`-- 1
    |-- nr_sectors
    `-- sector

For a regular device with a single access range, the
independent_access_ranges sysfs directory does not exist.

Device revalidation may lead to changes to this structure and to the
attribute values. When manipulated, the queue sysfs_lock and
sysfs_dir_lock mutexes are held for atomicity, similarly to how the
blk-mq and elevator sysfs queue sub-directories are protected.

The code related to the management of independent access ranges is
added in the new file block/blk-ia-ranges.c.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20211027022223.183838-2-damien.lemoal@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Makefile         |   2 +-
 block/blk-ia-ranges.c  | 348 +++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-sysfs.c      |  26 ++--
 block/blk.h            |   4 +
 include/linux/blkdev.h |  39 ++++++
 5 files changed, 410 insertions(+), 9 deletions(-)
 create mode 100644 block/blk-ia-ranges.c

(limited to 'include/linux')

diff --git a/block/Makefile b/block/Makefile
index 602f7f47b7b6..44df57e562bf 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -9,7 +9,7 @@ obj-y		:= bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
 			blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
 			blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \
 			genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \
-			disk-events.o
+			disk-events.o blk-ia-ranges.o
 
 obj-$(CONFIG_BOUNCE)		+= bounce.o
 obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o
diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c
new file mode 100644
index 000000000000..c246c425d0d7
--- /dev/null
+++ b/block/blk-ia-ranges.c
@@ -0,0 +1,348 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Block device concurrent positioning ranges.
+ *
+ *  Copyright (C) 2021 Western Digital Corporation or its Affiliates.
+ */
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+
+#include "blk.h"
+
+static ssize_t
+blk_ia_range_sector_show(struct blk_independent_access_range *iar,
+			 char *buf)
+{
+	return sprintf(buf, "%llu\n", iar->sector);
+}
+
+static ssize_t
+blk_ia_range_nr_sectors_show(struct blk_independent_access_range *iar,
+			     char *buf)
+{
+	return sprintf(buf, "%llu\n", iar->nr_sectors);
+}
+
+struct blk_ia_range_sysfs_entry {
+	struct attribute attr;
+	ssize_t (*show)(struct blk_independent_access_range *iar, char *buf);
+};
+
+static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = {
+	.attr = { .name = "sector", .mode = 0444 },
+	.show = blk_ia_range_sector_show,
+};
+
+static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = {
+	.attr = { .name = "nr_sectors", .mode = 0444 },
+	.show = blk_ia_range_nr_sectors_show,
+};
+
+static struct attribute *blk_ia_range_attrs[] = {
+	&blk_ia_range_sector_entry.attr,
+	&blk_ia_range_nr_sectors_entry.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(blk_ia_range);
+
+static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj,
+				      struct attribute *attr, char *buf)
+{
+	struct blk_ia_range_sysfs_entry *entry =
+		container_of(attr, struct blk_ia_range_sysfs_entry, attr);
+	struct blk_independent_access_range *iar =
+		container_of(kobj, struct blk_independent_access_range, kobj);
+	ssize_t ret;
+
+	mutex_lock(&iar->queue->sysfs_lock);
+	ret = entry->show(iar, buf);
+	mutex_unlock(&iar->queue->sysfs_lock);
+
+	return ret;
+}
+
+static const struct sysfs_ops blk_ia_range_sysfs_ops = {
+	.show	= blk_ia_range_sysfs_show,
+};
+
+/*
+ * Independent access range entries are not freed individually, but alltogether
+ * with struct blk_independent_access_ranges and its array of ranges. Since
+ * kobject_add() takes a reference on the parent kobject contained in
+ * struct blk_independent_access_ranges, the array of independent access range
+ * entries cannot be freed until kobject_del() is called for all entries.
+ * So we do not need to do anything here, but still need this no-op release
+ * operation to avoid complaints from the kobject code.
+ */
+static void blk_ia_range_sysfs_nop_release(struct kobject *kobj)
+{
+}
+
+static struct kobj_type blk_ia_range_ktype = {
+	.sysfs_ops	= &blk_ia_range_sysfs_ops,
+	.default_groups	= blk_ia_range_groups,
+	.release	= blk_ia_range_sysfs_nop_release,
+};
+
+/*
+ * This will be executed only after all independent access range entries are
+ * removed with kobject_del(), at which point, it is safe to free everything,
+ * including the array of ranges.
+ */
+static void blk_ia_ranges_sysfs_release(struct kobject *kobj)
+{
+	struct blk_independent_access_ranges *iars =
+		container_of(kobj, struct blk_independent_access_ranges, kobj);
+
+	kfree(iars);
+}
+
+static struct kobj_type blk_ia_ranges_ktype = {
+	.release	= blk_ia_ranges_sysfs_release,
+};
+
+/**
+ * disk_register_ia_ranges - register with sysfs a set of independent
+ *			    access ranges
+ * @disk:	Target disk
+ * @new_iars:	New set of independent access ranges
+ *
+ * Register with sysfs a set of independent access ranges for @disk.
+ * If @new_iars is not NULL, this set of ranges is registered and the old set
+ * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is
+ * registered if it is not already.
+ */
+int disk_register_independent_access_ranges(struct gendisk *disk,
+				struct blk_independent_access_ranges *new_iars)
+{
+	struct request_queue *q = disk->queue;
+	struct blk_independent_access_ranges *iars;
+	int i, ret;
+
+	lockdep_assert_held(&q->sysfs_dir_lock);
+	lockdep_assert_held(&q->sysfs_lock);
+
+	/* If a new range set is specified, unregister the old one */
+	if (new_iars) {
+		if (q->ia_ranges)
+			disk_unregister_independent_access_ranges(disk);
+		q->ia_ranges = new_iars;
+	}
+
+	iars = q->ia_ranges;
+	if (!iars)
+		return 0;
+
+	/*
+	 * At this point, iars is the new set of sector access ranges that needs
+	 * to be registered with sysfs.
+	 */
+	WARN_ON(iars->sysfs_registered);
+	ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype,
+				   &q->kobj, "%s", "independent_access_ranges");
+	if (ret) {
+		q->ia_ranges = NULL;
+		kfree(iars);
+		return ret;
+	}
+
+	for (i = 0; i < iars->nr_ia_ranges; i++) {
+		iars->ia_range[i].queue = q;
+		ret = kobject_init_and_add(&iars->ia_range[i].kobj,
+					   &blk_ia_range_ktype, &iars->kobj,
+					   "%d", i);
+		if (ret) {
+			while (--i >= 0)
+				kobject_del(&iars->ia_range[i].kobj);
+			kobject_del(&iars->kobj);
+			kobject_put(&iars->kobj);
+			return ret;
+		}
+	}
+
+	iars->sysfs_registered = true;
+
+	return 0;
+}
+
+void disk_unregister_independent_access_ranges(struct gendisk *disk)
+{
+	struct request_queue *q = disk->queue;
+	struct blk_independent_access_ranges *iars = q->ia_ranges;
+	int i;
+
+	lockdep_assert_held(&q->sysfs_dir_lock);
+	lockdep_assert_held(&q->sysfs_lock);
+
+	if (!iars)
+		return;
+
+	if (iars->sysfs_registered) {
+		for (i = 0; i < iars->nr_ia_ranges; i++)
+			kobject_del(&iars->ia_range[i].kobj);
+		kobject_del(&iars->kobj);
+		kobject_put(&iars->kobj);
+	} else {
+		kfree(iars);
+	}
+
+	q->ia_ranges = NULL;
+}
+
+static struct blk_independent_access_range *
+disk_find_ia_range(struct blk_independent_access_ranges *iars,
+		  sector_t sector)
+{
+	struct blk_independent_access_range *iar;
+	int i;
+
+	for (i = 0; i < iars->nr_ia_ranges; i++) {
+		iar = &iars->ia_range[i];
+		if (sector >= iar->sector &&
+		    sector < iar->sector + iar->nr_sectors)
+			return iar;
+	}
+
+	return NULL;
+}
+
+static bool disk_check_ia_ranges(struct gendisk *disk,
+				struct blk_independent_access_ranges *iars)
+{
+	struct blk_independent_access_range *iar, *tmp;
+	sector_t capacity = get_capacity(disk);
+	sector_t sector = 0;
+	int i;
+
+	/*
+	 * While sorting the ranges in increasing LBA order, check that the
+	 * ranges do not overlap, that there are no sector holes and that all
+	 * sectors belong to one range.
+	 */
+	for (i = 0; i < iars->nr_ia_ranges; i++) {
+		tmp = disk_find_ia_range(iars, sector);
+		if (!tmp || tmp->sector != sector) {
+			pr_warn("Invalid non-contiguous independent access ranges\n");
+			return false;
+		}
+
+		iar = &iars->ia_range[i];
+		if (tmp != iar) {
+			swap(iar->sector, tmp->sector);
+			swap(iar->nr_sectors, tmp->nr_sectors);
+		}
+
+		sector += iar->nr_sectors;
+	}
+
+	if (sector != capacity) {
+		pr_warn("Independent access ranges do not match disk capacity\n");
+		return false;
+	}
+
+	return true;
+}
+
+static bool disk_ia_ranges_changed(struct gendisk *disk,
+				   struct blk_independent_access_ranges *new)
+{
+	struct blk_independent_access_ranges *old = disk->queue->ia_ranges;
+	int i;
+
+	if (!old)
+		return true;
+
+	if (old->nr_ia_ranges != new->nr_ia_ranges)
+		return true;
+
+	for (i = 0; i < old->nr_ia_ranges; i++) {
+		if (new->ia_range[i].sector != old->ia_range[i].sector ||
+		    new->ia_range[i].nr_sectors != old->ia_range[i].nr_sectors)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * disk_alloc_independent_access_ranges - Allocate an independent access ranges
+ *                                        data structure
+ * @disk:		target disk
+ * @nr_ia_ranges:	Number of independent access ranges
+ *
+ * Allocate a struct blk_independent_access_ranges structure with @nr_ia_ranges
+ * access range descriptors.
+ */
+struct blk_independent_access_ranges *
+disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges)
+{
+	struct blk_independent_access_ranges *iars;
+
+	iars = kzalloc_node(struct_size(iars, ia_range, nr_ia_ranges),
+			    GFP_KERNEL, disk->queue->node);
+	if (iars)
+		iars->nr_ia_ranges = nr_ia_ranges;
+	return iars;
+}
+EXPORT_SYMBOL_GPL(disk_alloc_independent_access_ranges);
+
+/**
+ * disk_set_independent_access_ranges - Set a disk independent access ranges
+ * @disk:	target disk
+ * @iars:	independent access ranges structure
+ *
+ * Set the independent access ranges information of the request queue
+ * of @disk to @iars. If @iars is NULL and the independent access ranges
+ * structure already set is cleared. If there are no differences between
+ * @iars and the independent access ranges structure already set, @iars
+ * is freed.
+ */
+void disk_set_independent_access_ranges(struct gendisk *disk,
+				struct blk_independent_access_ranges *iars)
+{
+	struct request_queue *q = disk->queue;
+
+	if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) {
+		kfree(iars);
+		iars = NULL;
+	}
+
+	mutex_lock(&q->sysfs_dir_lock);
+	mutex_lock(&q->sysfs_lock);
+
+	if (iars) {
+		if (!disk_check_ia_ranges(disk, iars)) {
+			kfree(iars);
+			iars = NULL;
+			goto reg;
+		}
+
+		if (!disk_ia_ranges_changed(disk, iars)) {
+			kfree(iars);
+			goto unlock;
+		}
+	}
+
+	/*
+	 * This may be called for a registered queue. E.g. during a device
+	 * revalidation. If that is the case, we need to unregister the old
+	 * set of independent access ranges and register the new set. If the
+	 * queue is not registered, registration of the device request queue
+	 * will register the independent access ranges, so only swap in the
+	 * new set and free the old one.
+	 */
+reg:
+	if (blk_queue_registered(q)) {
+		disk_register_independent_access_ranges(disk, iars);
+	} else {
+		swap(q->ia_ranges, iars);
+		kfree(iars);
+	}
+
+unlock:
+	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
+}
+EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 36f14d658e81..cef1f713370b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -873,16 +873,15 @@ int blk_register_queue(struct gendisk *disk)
 	}
 
 	mutex_lock(&q->sysfs_lock);
+
+	ret = disk_register_independent_access_ranges(disk, NULL);
+	if (ret)
+		goto put_dev;
+
 	if (q->elevator) {
 		ret = elv_register_queue(q, false);
-		if (ret) {
-			mutex_unlock(&q->sysfs_lock);
-			mutex_unlock(&q->sysfs_dir_lock);
-			kobject_del(&q->kobj);
-			blk_trace_remove_sysfs(dev);
-			kobject_put(&dev->kobj);
-			return ret;
-		}
+		if (ret)
+			goto put_dev;
 	}
 
 	blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
@@ -913,6 +912,16 @@ unlock:
 		percpu_ref_switch_to_percpu(&q->q_usage_counter);
 	}
 
+	return ret;
+
+put_dev:
+	disk_unregister_independent_access_ranges(disk);
+	mutex_unlock(&q->sysfs_lock);
+	mutex_unlock(&q->sysfs_dir_lock);
+	kobject_del(&q->kobj);
+	blk_trace_remove_sysfs(dev);
+	kobject_put(&dev->kobj);
+
 	return ret;
 }
 
@@ -958,6 +967,7 @@ void blk_unregister_queue(struct gendisk *disk)
 	mutex_lock(&q->sysfs_lock);
 	if (q->elevator)
 		elv_unregister_queue(q);
+	disk_unregister_independent_access_ranges(disk);
 	mutex_unlock(&q->sysfs_lock);
 	mutex_unlock(&q->sysfs_dir_lock);
 
diff --git a/block/blk.h b/block/blk.h
index 6a039e6c7d07..7afffd548daf 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -454,4 +454,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 extern const struct address_space_operations def_blk_aops;
 
+int disk_register_independent_access_ranges(struct gendisk *disk,
+				struct blk_independent_access_ranges *new_iars);
+void disk_unregister_independent_access_ranges(struct gendisk *disk);
+
 #endif /* BLK_INTERNAL_H */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f72ccb2829db..6d95a4b36cfa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -150,6 +150,34 @@ static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev,
 
 #endif /* CONFIG_BLK_DEV_ZONED */
 
+/*
+ * Independent access ranges: struct blk_independent_access_range describes
+ * a range of contiguous sectors that can be accessed using device command
+ * execution resources that are independent from the resources used for
+ * other access ranges. This is typically found with single-LUN multi-actuator
+ * HDDs where each access range is served by a different set of heads.
+ * The set of independent ranges supported by the device is defined using
+ * struct blk_independent_access_ranges. The independent ranges must not overlap
+ * and must include all sectors within the disk capacity (no sector holes
+ * allowed).
+ * For a device with multiple ranges, requests targeting sectors in different
+ * ranges can be executed in parallel. A request can straddle an access range
+ * boundary.
+ */
+struct blk_independent_access_range {
+	struct kobject		kobj;
+	struct request_queue	*queue;
+	sector_t		sector;
+	sector_t		nr_sectors;
+};
+
+struct blk_independent_access_ranges {
+	struct kobject				kobj;
+	bool					sysfs_registered;
+	unsigned int				nr_ia_ranges;
+	struct blk_independent_access_range	ia_range[];
+};
+
 struct request_queue {
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
@@ -331,6 +359,12 @@ struct request_queue {
 
 #define BLK_MAX_WRITE_HINTS	5
 	u64			write_hints[BLK_MAX_WRITE_HINTS];
+
+	/*
+	 * Independent sector access ranges. This is always NULL for
+	 * devices that do not have multiple independent access ranges.
+	 */
+	struct blk_independent_access_ranges *ia_ranges;
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
@@ -698,6 +732,11 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua);
 
+struct blk_independent_access_ranges *
+disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges);
+void disk_set_independent_access_ranges(struct gendisk *disk,
+				struct blk_independent_access_ranges *iars);
+
 /*
  * Elevator features for blk_queue_required_elevator_features:
  */
-- 
cgit v1.2.3


From fe22e1c2f705676a705d821301fc52eecc2fe055 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Wed, 27 Oct 2021 11:22:21 +0900
Subject: libata: support concurrent positioning ranges log

Add support to discover if an ATA device supports the Concurrent
Positioning Ranges data log (address 0x47), indicating that the device
is capable of seeking to multiple different locations in parallel using
multiple actuators serving different LBA ranges.

Also add support to translate the concurrent positioning ranges log
into its equivalent Concurrent Positioning Ranges VPD page B9h in
libata-scsi.c.

The format of the Concurrent Positioning Ranges Log is defined in ACS-5
r9.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Link: https://lore.kernel.org/r/20211027022223.183838-4-damien.lemoal@wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/ata/libata-core.c | 57 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/ata/libata-scsi.c | 48 +++++++++++++++++++++++++++++++--------
 include/linux/ata.h       |  1 +
 include/linux/libata.h    | 15 +++++++++++++
 4 files changed, 110 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index eed65311b5d1..75f1a6cd6621 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2459,18 +2459,70 @@ static void ata_dev_config_devslp(struct ata_device *dev)
 	}
 }
 
+static void ata_dev_config_cpr(struct ata_device *dev)
+{
+	unsigned int err_mask;
+	size_t buf_len;
+	int i, nr_cpr = 0;
+	struct ata_cpr_log *cpr_log = NULL;
+	u8 *desc, *buf = NULL;
+
+	if (!ata_identify_page_supported(dev,
+				 ATA_LOG_CONCURRENT_POSITIONING_RANGES))
+		goto out;
+
+	/*
+	 * Read IDENTIFY DEVICE data log, page 0x47
+	 * (concurrent positioning ranges). We can have at most 255 32B range
+	 * descriptors plus a 64B header.
+	 */
+	buf_len = (64 + 255 * 32 + 511) & ~511;
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		goto out;
+
+	err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE,
+				     ATA_LOG_CONCURRENT_POSITIONING_RANGES,
+				     buf, buf_len >> 9);
+	if (err_mask)
+		goto out;
+
+	nr_cpr = buf[0];
+	if (!nr_cpr)
+		goto out;
+
+	cpr_log = kzalloc(struct_size(cpr_log, cpr, nr_cpr), GFP_KERNEL);
+	if (!cpr_log)
+		goto out;
+
+	cpr_log->nr_cpr = nr_cpr;
+	desc = &buf[64];
+	for (i = 0; i < nr_cpr; i++, desc += 32) {
+		cpr_log->cpr[i].num = desc[0];
+		cpr_log->cpr[i].num_storage_elements = desc[1];
+		cpr_log->cpr[i].start_lba = get_unaligned_le64(&desc[8]);
+		cpr_log->cpr[i].num_lbas = get_unaligned_le64(&desc[16]);
+	}
+
+out:
+	swap(dev->cpr_log, cpr_log);
+	kfree(cpr_log);
+	kfree(buf);
+}
+
 static void ata_dev_print_features(struct ata_device *dev)
 {
 	if (!(dev->flags & ATA_DFLAG_FEATURES_MASK))
 		return;
 
 	ata_dev_info(dev,
-		     "Features:%s%s%s%s%s\n",
+		     "Features:%s%s%s%s%s%s\n",
 		     dev->flags & ATA_DFLAG_TRUSTED ? " Trust" : "",
 		     dev->flags & ATA_DFLAG_DA ? " Dev-Attention" : "",
 		     dev->flags & ATA_DFLAG_DEVSLP ? " Dev-Sleep" : "",
 		     dev->flags & ATA_DFLAG_NCQ_SEND_RECV ? " NCQ-sndrcv" : "",
-		     dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "");
+		     dev->flags & ATA_DFLAG_NCQ_PRIO ? " NCQ-prio" : "",
+		     dev->cpr_log ? " CPR" : "");
 }
 
 /**
@@ -2634,6 +2686,7 @@ int ata_dev_configure(struct ata_device *dev)
 		ata_dev_config_sense_reporting(dev);
 		ata_dev_config_zac(dev);
 		ata_dev_config_trusted(dev);
+		ata_dev_config_cpr(dev);
 		dev->cdb_len = 32;
 
 		if (ata_msg_drv(ap) && print_info)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 1fb4611f7eeb..15a279f773c7 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1895,7 +1895,7 @@ static unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf)
  */
 static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
 {
-	int num_pages;
+	int i, num_pages = 0;
 	static const u8 pages[] = {
 		0x00,	/* page 0x00, this page */
 		0x80,	/* page 0x80, unit serial no page */
@@ -1905,13 +1905,17 @@ static unsigned int ata_scsiop_inq_00(struct ata_scsi_args *args, u8 *rbuf)
 		0xb1,	/* page 0xb1, block device characteristics page */
 		0xb2,	/* page 0xb2, thin provisioning page */
 		0xb6,	/* page 0xb6, zoned block device characteristics */
+		0xb9,	/* page 0xb9, concurrent positioning ranges */
 	};
 
-	num_pages = sizeof(pages);
-	if (!(args->dev->flags & ATA_DFLAG_ZAC))
-		num_pages--;
+	for (i = 0; i < sizeof(pages); i++) {
+		if (pages[i] == 0xb6 &&
+		    !(args->dev->flags & ATA_DFLAG_ZAC))
+			continue;
+		rbuf[num_pages + 4] = pages[i];
+		num_pages++;
+	}
 	rbuf[3] = num_pages;	/* number of supported VPD pages */
-	memcpy(rbuf + 4, pages, num_pages);
 	return 0;
 }
 
@@ -2121,6 +2125,26 @@ static unsigned int ata_scsiop_inq_b6(struct ata_scsi_args *args, u8 *rbuf)
 	return 0;
 }
 
+static unsigned int ata_scsiop_inq_b9(struct ata_scsi_args *args, u8 *rbuf)
+{
+	struct ata_cpr_log *cpr_log = args->dev->cpr_log;
+	u8 *desc = &rbuf[64];
+	int i;
+
+	/* SCSI Concurrent Positioning Ranges VPD page: SBC-5 rev 1 or later */
+	rbuf[1] = 0xb9;
+	put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[3]);
+
+	for (i = 0; i < cpr_log->nr_cpr; i++, desc += 32) {
+		desc[0] = cpr_log->cpr[i].num;
+		desc[1] = cpr_log->cpr[i].num_storage_elements;
+		put_unaligned_be64(cpr_log->cpr[i].start_lba, &desc[8]);
+		put_unaligned_be64(cpr_log->cpr[i].num_lbas, &desc[16]);
+	}
+
+	return 0;
+}
+
 /**
  *	modecpy - Prepare response for MODE SENSE
  *	@dest: output buffer
@@ -4120,11 +4144,17 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd)
 			ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b2);
 			break;
 		case 0xb6:
-			if (dev->flags & ATA_DFLAG_ZAC) {
+			if (dev->flags & ATA_DFLAG_ZAC)
 				ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b6);
-				break;
-			}
-			fallthrough;
+			else
+				ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
+			break;
+		case 0xb9:
+			if (dev->cpr_log)
+				ata_scsi_rbuf_fill(&args, ata_scsiop_inq_b9);
+			else
+				ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
+			break;
 		default:
 			ata_scsi_set_invalid_field(dev, cmd, 2, 0xff);
 			break;
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 1b44f40c7700..199e47e97d64 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -329,6 +329,7 @@ enum {
 	ATA_LOG_SECURITY	  = 0x06,
 	ATA_LOG_SATA_SETTINGS	  = 0x08,
 	ATA_LOG_ZONED_INFORMATION = 0x09,
+	ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47,
 
 	/* Identify device SATA settings log:*/
 	ATA_LOG_DEVSLP_OFFSET	  = 0x30,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c0c64f03e107..236ec689056a 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -676,6 +676,18 @@ struct ata_ering {
 	struct ata_ering_entry	ring[ATA_ERING_SIZE];
 };
 
+struct ata_cpr {
+	u8			num;
+	u8			num_storage_elements;
+	u64			start_lba;
+	u64			num_lbas;
+};
+
+struct ata_cpr_log {
+	u8			nr_cpr;
+	struct ata_cpr		cpr[];
+};
+
 struct ata_device {
 	struct ata_link		*link;
 	unsigned int		devno;		/* 0 or 1 */
@@ -735,6 +747,9 @@ struct ata_device {
 	u32			zac_zones_optimal_nonseq;
 	u32			zac_zones_max_open;
 
+	/* Concurrent positioning ranges */
+	struct ata_cpr_log	*cpr_log;
+
 	/* error history */
 	int			spdn_cnt;
 	/* ering is CLEAR_END, read comment above CLEAR_END */
-- 
cgit v1.2.3


From 785d584c30ffc1224027536fe55bdc15ee509f14 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Mon, 18 Oct 2021 17:21:37 +0200
Subject: nvme: add new discovery log page entry definitions

TP8014 adds a new SUBTYPE value and a new field EFLAGS for the
discovery log page entry.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 357482dedb59..855dd9b3e84b 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -27,8 +27,14 @@
 #define NVME_NSID_ALL		0xffffffff
 
 enum nvme_subsys_type {
-	NVME_NQN_DISC	= 1,		/* Discovery type target subsystem */
-	NVME_NQN_NVME	= 2,		/* NVME type target subsystem */
+	/* Referral to another discovery type target subsystem */
+	NVME_NQN_DISC	= 1,
+
+	/* NVME type target subsystem */
+	NVME_NQN_NVME	= 2,
+
+	/* Current discovery type target subsystem */
+	NVME_NQN_CURR	= 3,
 };
 
 enum nvme_ctrl_type {
@@ -1312,6 +1318,12 @@ struct nvmf_common_command {
 
 #define MAX_DISC_LOGS	255
 
+/* Discovery log page entry flags (EFLAGS): */
+enum {
+	NVME_DISC_EFLAGS_EPCSD		= (1 << 1),
+	NVME_DISC_EFLAGS_DUPRETINFO	= (1 << 0),
+};
+
 /* Discovery log page entry */
 struct nvmf_disc_rsp_page_entry {
 	__u8		trtype;
@@ -1321,7 +1333,8 @@ struct nvmf_disc_rsp_page_entry {
 	__le16		portid;
 	__le16		cntlid;
 	__le16		asqsz;
-	__u8		resv8[22];
+	__le16		eflags;
+	__u8		resv10[20];
 	char		trsvcid[NVMF_TRSVCID_SIZE];
 	__u8		resv64[192];
 	char		subnqn[NVMF_NQN_FIELD_LEN];
-- 
cgit v1.2.3


From 33f98a9798f55fd77c36ce79d8cfa5329e55a789 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 13 Oct 2021 10:57:41 -0700
Subject: x86/boot/compressed: Avoid duplicate malloc() implementations

The early malloc() and free() implementation in include/linux/decompress/mm.h
(which is also included by the static decompressors) is static. This is
fine when the only thing interested in using malloc() is the decompression
code, but the x86 early boot environment may use malloc() in a couple places,
leading to a potential collision when the static copies of the available
memory region ("malloc_ptr") gets reset to the global "free_mem_ptr" value.
As it happened, the existing usage pattern was accidentally safe because each
user did 1 malloc() and 1 free() before returning and were not nested:

extract_kernel() (misc.c)
	choose_random_location() (kaslr.c)
		mem_avoid_init()
			handle_mem_options()
				malloc()
				...
				free()
	...
	parse_elf() (misc.c)
		malloc()
		...
		free()

Once the future FGKASLR series is added, however, it will insert
additional malloc() calls local to fgkaslr.c in the middle of
parse_elf()'s malloc()/free() pair:

	parse_elf() (misc.c)
		malloc()
		if (...) {
			layout_randomized_image(output, &ehdr, phdrs);
				malloc() <- boom
				...
		else
			layout_image(output, &ehdr, phdrs);
		free()

To avoid collisions, there must be a single implementation of malloc().
Adjust include/linux/decompress/mm.h so that visibility can be
controlled, provide prototypes in misc.h, and implement the functions in
misc.c. This also results in a small size savings:

$ size vmlinux.before vmlinux.after
   text    data     bss     dec     hex filename
8842314     468  178320 9021102  89a6ae vmlinux.before
8842240     468  178320 9021028  89a664 vmlinux.after

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20211013175742.1197608-4-keescook@chromium.org
---
 arch/x86/boot/compressed/kaslr.c |  4 ----
 arch/x86/boot/compressed/misc.c  |  3 +++
 arch/x86/boot/compressed/misc.h  |  2 ++
 include/linux/decompress/mm.h    | 12 ++++++++++--
 4 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 67c3208b668a..411b268bc0a2 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -32,10 +32,6 @@
 #include <generated/utsrelease.h>
 #include <asm/efi.h>
 
-/* Macros used by the included decompressor code below. */
-#define STATIC
-#include <linux/decompress/mm.h>
-
 #define _SETUP
 #include <asm/setup.h>	/* For COMMAND_LINE_SIZE */
 #undef _SETUP
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 743f13ea25c1..a4339cb2d247 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -28,6 +28,9 @@
 
 /* Macros used by the included decompressor code below. */
 #define STATIC		static
+/* Define an externally visible malloc()/free(). */
+#define MALLOC_VISIBLE
+#include <linux/decompress/mm.h>
 
 /*
  * Provide definitions of memzero and memmove as some of the decompressors will
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 31139256859f..975ef4ae7395 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -44,6 +44,8 @@ extern char _head[], _end[];
 /* misc.c */
 extern memptr free_mem_ptr;
 extern memptr free_mem_end_ptr;
+void *malloc(int size);
+void free(void *where);
 extern struct boot_params *boot_params;
 void __putstr(const char *s);
 void __puthex(unsigned long value);
diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h
index 868e9eacd69e..9192986b1a73 100644
--- a/include/linux/decompress/mm.h
+++ b/include/linux/decompress/mm.h
@@ -25,13 +25,21 @@
 #define STATIC_RW_DATA static
 #endif
 
+/*
+ * When an architecture needs to share the malloc()/free() implementation
+ * between compilation units, it needs to have non-local visibility.
+ */
+#ifndef MALLOC_VISIBLE
+#define MALLOC_VISIBLE static
+#endif
+
 /* A trivial malloc implementation, adapted from
  *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
  */
 STATIC_RW_DATA unsigned long malloc_ptr;
 STATIC_RW_DATA int malloc_count;
 
-static void *malloc(int size)
+MALLOC_VISIBLE void *malloc(int size)
 {
 	void *p;
 
@@ -52,7 +60,7 @@ static void *malloc(int size)
 	return p;
 }
 
-static void free(void *where)
+MALLOC_VISIBLE void free(void *where)
 {
 	malloc_count--;
 	if (!malloc_count)
-- 
cgit v1.2.3


From 9baf93d68bcc3d0a6042283b82603c076e25e4f5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 25 Oct 2021 16:27:16 -0300
Subject: fsnotify: pass data_type to fsnotify_name()

Align the arguments of fsnotify_name() to those of fsnotify().

Link: https://lore.kernel.org/r/20211025192746.66445-2-krisman@collabora.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 12d3a7d308ab..d1144d7c3536 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -26,20 +26,21 @@
  * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only
  * the child is interested and not the parent.
  */
-static inline void fsnotify_name(struct inode *dir, __u32 mask,
-				 struct inode *child,
-				 const struct qstr *name, u32 cookie)
+static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
+				struct inode *dir, const struct qstr *name,
+				u32 cookie)
 {
 	if (atomic_long_read(&dir->i_sb->s_fsnotify_connectors) == 0)
-		return;
+		return 0;
 
-	fsnotify(mask, child, FSNOTIFY_EVENT_INODE, dir, name, NULL, cookie);
+	return fsnotify(mask, data, data_type, dir, name, NULL, cookie);
 }
 
 static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
 				   __u32 mask)
 {
-	fsnotify_name(dir, mask, d_inode(dentry), &dentry->d_name, 0);
+	fsnotify_name(mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
+		      dir, &dentry->d_name, 0);
 }
 
 static inline void fsnotify_inode(struct inode *inode, __u32 mask)
@@ -154,8 +155,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
 		new_dir_mask |= FS_ISDIR;
 	}
 
-	fsnotify_name(old_dir, old_dir_mask, source, old_name, fs_cookie);
-	fsnotify_name(new_dir, new_dir_mask, source, new_name, fs_cookie);
+	fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE,
+		      old_dir, old_name, fs_cookie);
+	fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE,
+		      new_dir, new_name, fs_cookie);
 
 	if (target)
 		fsnotify_link_count(target);
@@ -209,7 +212,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode,
 	fsnotify_link_count(inode);
 	audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0);
+	fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE,
+		      dir, &new_dentry->d_name, 0);
 }
 
 /*
-- 
cgit v1.2.3


From fd5a3ff49a19aa69e2bc1e26e98037c2d778e61a Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 25 Oct 2021 16:27:17 -0300
Subject: fsnotify: pass dentry instead of inode data

Define a new data type to pass for event - FSNOTIFY_EVENT_DENTRY.
Use it to pass the dentry instead of it's ->d_inode where available.

This is needed in preparation to the refactor to retrieve the super
block from the data field.  In some cases (i.e. mkdir in kernfs), the
data inode comes from a negative dentry, such that no super block
information would be available. By receiving the dentry itself, instead
of the inode, fsnotify can derive the super block even on these cases.

Link: https://lore.kernel.org/r/20211025192746.66445-3-krisman@collabora.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
[Expand explanation in commit message]
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify.h         |  5 ++---
 include/linux/fsnotify_backend.h | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index d1144d7c3536..df0fa4687a18 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -39,8 +39,7 @@ static inline int fsnotify_name(__u32 mask, const void *data, int data_type,
 static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry,
 				   __u32 mask)
 {
-	fsnotify_name(mask, d_inode(dentry), FSNOTIFY_EVENT_INODE,
-		      dir, &dentry->d_name, 0);
+	fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0);
 }
 
 static inline void fsnotify_inode(struct inode *inode, __u32 mask)
@@ -87,7 +86,7 @@ notify_child:
  */
 static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
 {
-	fsnotify_parent(dentry, mask, d_inode(dentry), FSNOTIFY_EVENT_INODE);
+	fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
 }
 
 static inline int fsnotify_file(struct file *file, __u32 mask)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 1ce66748a2d2..a2db821e8a8f 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -248,6 +248,7 @@ enum fsnotify_data_type {
 	FSNOTIFY_EVENT_NONE,
 	FSNOTIFY_EVENT_PATH,
 	FSNOTIFY_EVENT_INODE,
+	FSNOTIFY_EVENT_DENTRY,
 };
 
 static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
@@ -255,6 +256,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
 	switch (data_type) {
 	case FSNOTIFY_EVENT_INODE:
 		return (struct inode *)data;
+	case FSNOTIFY_EVENT_DENTRY:
+		return d_inode(data);
 	case FSNOTIFY_EVENT_PATH:
 		return d_inode(((const struct path *)data)->dentry);
 	default:
@@ -262,6 +265,19 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
 	}
 }
 
+static inline struct dentry *fsnotify_data_dentry(const void *data, int data_type)
+{
+	switch (data_type) {
+	case FSNOTIFY_EVENT_DENTRY:
+		/* Non const is needed for dget() */
+		return (struct dentry *)data;
+	case FSNOTIFY_EVENT_PATH:
+		return ((const struct path *)data)->dentry;
+	default:
+		return NULL;
+	}
+}
+
 static inline const struct path *fsnotify_data_path(const void *data,
 						    int data_type)
 {
-- 
cgit v1.2.3


From dabe729dddca550446e9cc118c96d1f91703345b Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 25 Oct 2021 16:27:18 -0300
Subject: fsnotify: clarify contract for create event hooks

Clarify argument names and contract for fsnotify_create() and
fsnotify_mkdir() to reflect the anomaly of kernfs, which leaves dentries
negavite after mkdir/create.

Remove the WARN_ON(!inode) in audit code that were added by the Fixes
commit under the wrong assumption that dentries cannot be negative after
mkdir/create.

Fixes: aa93bdc5500c ("fsnotify: use helpers to access data by data_type")
Link: https://lore.kernel.org/linux-fsdevel/87mtp5yz0q.fsf@collabora.com/
Link: https://lore.kernel.org/r/20211025192746.66445-4-krisman@collabora.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reported-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify.h | 22 ++++++++++++++++------
 kernel/audit_fsnotify.c  |  3 +--
 kernel/audit_watch.c     |  3 +--
 3 files changed, 18 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index df0fa4687a18..1e5f7435a4b5 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -192,16 +192,22 @@ static inline void fsnotify_inoderemove(struct inode *inode)
 
 /*
  * fsnotify_create - 'name' was linked in
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
+ * ->d_inode later
  */
-static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
+static inline void fsnotify_create(struct inode *dir, struct dentry *dentry)
 {
-	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
+	audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_dirent(inode, dentry, FS_CREATE);
+	fsnotify_dirent(dir, dentry, FS_CREATE);
 }
 
 /*
  * fsnotify_link - new hardlink in 'inode' directory
+ *
+ * Caller must make sure that new_dentry->d_name is stable.
  * Note: We have to pass also the linked inode ptr as some filesystems leave
  *   new_dentry->d_inode NULL and instantiate inode pointer later
  */
@@ -230,12 +236,16 @@ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry)
 
 /*
  * fsnotify_mkdir - directory 'name' was created
+ *
+ * Caller must make sure that dentry->d_name is stable.
+ * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate
+ * ->d_inode later
  */
-static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
+static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry)
 {
-	audit_inode_child(inode, dentry, AUDIT_TYPE_CHILD_CREATE);
+	audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE);
 
-	fsnotify_dirent(inode, dentry, FS_CREATE | FS_ISDIR);
+	fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR);
 }
 
 /*
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 60739d5e3373..02348b48447c 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -160,8 +160,7 @@ static int audit_mark_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
 
 	audit_mark = container_of(inode_mark, struct audit_fsnotify_mark, mark);
 
-	if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group) ||
-	    WARN_ON_ONCE(!inode))
+	if (WARN_ON_ONCE(inode_mark->group != audit_fsnotify_group))
 		return 0;
 
 	if (mask & (FS_CREATE|FS_MOVED_TO|FS_DELETE|FS_MOVED_FROM)) {
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 2acf7ca49154..223eed7b39cd 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -472,8 +472,7 @@ static int audit_watch_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
 
 	parent = container_of(inode_mark, struct audit_parent, mark);
 
-	if (WARN_ON_ONCE(inode_mark->group != audit_watch_group) ||
-	    WARN_ON_ONCE(!inode))
+	if (WARN_ON_ONCE(inode_mark->group != audit_watch_group))
 		return 0;
 
 	if (mask & (FS_CREATE|FS_MOVED_TO) && inode)
-- 
cgit v1.2.3


From 808967a0a4d2f4ce6a2005c5692fffbecaf018c1 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:23 -0300
Subject: fsnotify: Add helper to detect overflow_event

Similarly to fanotify_is_perm_event and friends, provide a helper
predicate to say whether a mask is of an overflow event.

Link: https://lore.kernel.org/r/20211025192746.66445-9-krisman@collabora.com
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.h    | 3 ++-
 include/linux/fsnotify_backend.h | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 4a5e555dc3d2..c42cf8fd7d79 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -315,7 +315,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
  */
 static inline bool fanotify_is_hashed_event(u32 mask)
 {
-	return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
+	return !(fanotify_is_perm_event(mask) ||
+		 fsnotify_is_overflow_event(mask));
 }
 
 static inline unsigned int fanotify_event_hash_bucket(
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a2db821e8a8f..749bc85e1d1c 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -510,6 +510,11 @@ static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
 	fsnotify_add_event(group, group->overflow_event, NULL, NULL);
 }
 
+static inline bool fsnotify_is_overflow_event(u32 mask)
+{
+	return mask & FS_Q_OVERFLOW;
+}
+
 static inline bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
 {
 	assert_spin_locked(&group->notification_lock);
-- 
cgit v1.2.3


From 1ad03c3a326a86e259389592117252c851873395 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:24 -0300
Subject: fsnotify: Add wrapper around fsnotify_add_event

fsnotify_add_event is growing in number of parameters, which in most
case are just passed a NULL pointer.  So, split out a new
fsnotify_insert_event function to clean things up for users who don't
need an insert hook.

Link: https://lore.kernel.org/r/20211025192746.66445-10-krisman@collabora.com
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c        |  4 ++--
 fs/notify/inotify/inotify_fsnotify.c |  2 +-
 fs/notify/notification.c             | 12 ++++++------
 include/linux/fsnotify_backend.h     | 23 ++++++++++++++++-------
 4 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 310246f8d3f1..f82e20228999 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -781,8 +781,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	}
 
 	fsn_event = &event->fse;
-	ret = fsnotify_add_event(group, fsn_event, fanotify_merge,
-				 fanotify_insert_event);
+	ret = fsnotify_insert_event(group, fsn_event, fanotify_merge,
+				    fanotify_insert_event);
 	if (ret) {
 		/* Permission events shouldn't be merged */
 		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index d1a64daa0171..a96582cbfad1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -116,7 +116,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
 	if (len)
 		strcpy(event->name, name->name);
 
-	ret = fsnotify_add_event(group, fsn_event, inotify_merge, NULL);
+	ret = fsnotify_add_event(group, fsn_event, inotify_merge);
 	if (ret) {
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 32f45543b9c6..44bb10f50715 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -78,12 +78,12 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
  * 2 if the event was not queued - either the queue of events has overflown
  *   or the group is shutting down.
  */
-int fsnotify_add_event(struct fsnotify_group *group,
-		       struct fsnotify_event *event,
-		       int (*merge)(struct fsnotify_group *,
-				    struct fsnotify_event *),
-		       void (*insert)(struct fsnotify_group *,
-				      struct fsnotify_event *))
+int fsnotify_insert_event(struct fsnotify_group *group,
+			  struct fsnotify_event *event,
+			  int (*merge)(struct fsnotify_group *,
+				       struct fsnotify_event *),
+			  void (*insert)(struct fsnotify_group *,
+					 struct fsnotify_event *))
 {
 	int ret = 0;
 	struct list_head *list = &group->notification_list;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 749bc85e1d1c..b323d0c4b967 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -498,16 +498,25 @@ extern int fsnotify_fasync(int fd, struct file *file, int on);
 extern void fsnotify_destroy_event(struct fsnotify_group *group,
 				   struct fsnotify_event *event);
 /* attach the event to the group notification queue */
-extern int fsnotify_add_event(struct fsnotify_group *group,
-			      struct fsnotify_event *event,
-			      int (*merge)(struct fsnotify_group *,
-					   struct fsnotify_event *),
-			      void (*insert)(struct fsnotify_group *,
-					     struct fsnotify_event *));
+extern int fsnotify_insert_event(struct fsnotify_group *group,
+				 struct fsnotify_event *event,
+				 int (*merge)(struct fsnotify_group *,
+					      struct fsnotify_event *),
+				 void (*insert)(struct fsnotify_group *,
+						struct fsnotify_event *));
+
+static inline int fsnotify_add_event(struct fsnotify_group *group,
+				     struct fsnotify_event *event,
+				     int (*merge)(struct fsnotify_group *,
+						  struct fsnotify_event *))
+{
+	return fsnotify_insert_event(group, event, merge, NULL);
+}
+
 /* Queue overflow event to a notification group */
 static inline void fsnotify_queue_overflow(struct fsnotify_group *group)
 {
-	fsnotify_add_event(group, group->overflow_event, NULL, NULL);
+	fsnotify_add_event(group, group->overflow_event, NULL);
 }
 
 static inline bool fsnotify_is_overflow_event(u32 mask)
-- 
cgit v1.2.3


From 29335033c574a15334015d8c4e36862cff3d3384 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:25 -0300
Subject: fsnotify: Retrieve super block from the data field

Some file system events (i.e. FS_ERROR) might not be associated with an
inode or directory.  For these, we can retrieve the super block from the
data field.  But, since the super_block is available in the data field
on every event type, simplify the code to always retrieve it from there,
through a new helper.

Link: https://lore.kernel.org/r/20211025192746.66445-11-krisman@collabora.com
Suggested-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c             |  7 +++----
 include/linux/fsnotify_backend.h | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 963e6ce75b96..fde3a1115a17 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -455,16 +455,16 @@ static void fsnotify_iter_next(struct fsnotify_iter_info *iter_info)
  *		@file_name is relative to
  * @file_name:	optional file name associated with event
  * @inode:	optional inode associated with event -
- *		either @dir or @inode must be non-NULL.
- *		if both are non-NULL event may be reported to both.
+ *		If @dir and @inode are both non-NULL, event may be
+ *		reported to both.
  * @cookie:	inotify rename cookie
  */
 int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	     const struct qstr *file_name, struct inode *inode, u32 cookie)
 {
 	const struct path *path = fsnotify_data_path(data, data_type);
+	struct super_block *sb = fsnotify_data_sb(data, data_type);
 	struct fsnotify_iter_info iter_info = {};
-	struct super_block *sb;
 	struct mount *mnt = NULL;
 	struct inode *parent = NULL;
 	int ret = 0;
@@ -483,7 +483,6 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		 */
 		parent = dir;
 	}
-	sb = inode->i_sb;
 
 	/*
 	 * Optimization: srcu_read_lock() has a memory barrier which can
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b323d0c4b967..035438fe4a43 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -289,6 +289,21 @@ static inline const struct path *fsnotify_data_path(const void *data,
 	}
 }
 
+static inline struct super_block *fsnotify_data_sb(const void *data,
+						   int data_type)
+{
+	switch (data_type) {
+	case FSNOTIFY_EVENT_INODE:
+		return ((struct inode *)data)->i_sb;
+	case FSNOTIFY_EVENT_DENTRY:
+		return ((struct dentry *)data)->d_sb;
+	case FSNOTIFY_EVENT_PATH:
+		return ((const struct path *)data)->dentry->d_sb;
+	default:
+		return NULL;
+	}
+}
+
 enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_INODE,
 	FSNOTIFY_OBJ_TYPE_PARENT,
-- 
cgit v1.2.3


From 24dca90590509a7a6cbe0650100c90c5b8a3468a Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:26 -0300
Subject: fsnotify: Protect fsnotify_handle_inode_event from no-inode events

FAN_FS_ERROR allows events without inodes - i.e. for file system-wide
errors.  Even though fsnotify_handle_inode_event is not currently used
by fanotify, this patch protects other backends from cases where neither
inode or dir are provided.  Also document the constraints of the
interface (inode and dir cannot be both NULL).

Link: https://lore.kernel.org/r/20211025192746.66445-12-krisman@collabora.com
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/nfsd/filecache.c              | 3 +++
 fs/notify/fsnotify.c             | 3 +++
 include/linux/fsnotify_backend.h | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index be3c1aad50ea..fdf89fcf1a0c 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -602,6 +602,9 @@ nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
 				struct inode *inode, struct inode *dir,
 				const struct qstr *name, u32 cookie)
 {
+	if (WARN_ON_ONCE(!inode))
+		return 0;
+
 	trace_nfsd_file_fsnotify_handle_event(inode, mask);
 
 	/* Should be no marks on non-regular files */
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index fde3a1115a17..4034ca566f95 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -252,6 +252,9 @@ static int fsnotify_handle_inode_event(struct fsnotify_group *group,
 	if (WARN_ON_ONCE(!ops->handle_inode_event))
 		return 0;
 
+	if (WARN_ON_ONCE(!inode && !dir))
+		return 0;
+
 	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
 	    path && d_unlinked(path->dentry))
 		return 0;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 035438fe4a43..b71dc788018e 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -136,6 +136,7 @@ struct mem_cgroup;
  * @dir:	optional directory associated with event -
  *		if @file_name is not NULL, this is the directory that
  *		@file_name is relative to.
+ *		Either @inode or @dir must be non-NULL.
  * @file_name:	optional file name associated with event
  * @cookie:	inotify rename cookie
  *
-- 
cgit v1.2.3


From 330ae77d2a5b0af32c0f29e139bf28ec8591de59 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:27 -0300
Subject: fsnotify: Pass group argument to free_event

For group-wide mempool backed events, like FS_ERROR, the free_event
callback will need to reference the group's mempool to free the memory.
Wire that argument into the current callers.

Link: https://lore.kernel.org/r/20211025192746.66445-13-krisman@collabora.com
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c        | 3 ++-
 fs/notify/group.c                    | 2 +-
 fs/notify/inotify/inotify_fsnotify.c | 3 ++-
 fs/notify/notification.c             | 2 +-
 include/linux/fsnotify_backend.h     | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f82e20228999..c620b4f6fe12 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -835,7 +835,8 @@ static void fanotify_free_name_event(struct fanotify_event *event)
 	kfree(FANOTIFY_NE(event));
 }
 
-static void fanotify_free_event(struct fsnotify_event *fsn_event)
+static void fanotify_free_event(struct fsnotify_group *group,
+				struct fsnotify_event *fsn_event)
 {
 	struct fanotify_event *event;
 
diff --git a/fs/notify/group.c b/fs/notify/group.c
index fb89c351295d..6a297efc4788 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -88,7 +88,7 @@ void fsnotify_destroy_group(struct fsnotify_group *group)
 	 * that deliberately ignores overflow events.
 	 */
 	if (group->overflow_event)
-		group->ops->free_event(group->overflow_event);
+		group->ops->free_event(group, group->overflow_event);
 
 	fsnotify_put_group(group);
 }
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index a96582cbfad1..d92d7b0adc9a 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -177,7 +177,8 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
 		dec_inotify_instances(group->inotify_data.ucounts);
 }
 
-static void inotify_free_event(struct fsnotify_event *fsn_event)
+static void inotify_free_event(struct fsnotify_group *group,
+			       struct fsnotify_event *fsn_event)
 {
 	kfree(INOTIFY_E(fsn_event));
 }
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 44bb10f50715..9022ae650cf8 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -64,7 +64,7 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
 		WARN_ON(!list_empty(&event->list));
 		spin_unlock(&group->notification_lock);
 	}
-	group->ops->free_event(event);
+	group->ops->free_event(group, event);
 }
 
 /*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b71dc788018e..3a7c31436182 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -156,7 +156,7 @@ struct fsnotify_ops {
 			    const struct qstr *file_name, u32 cookie);
 	void (*free_group_priv)(struct fsnotify_group *group);
 	void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
-	void (*free_event)(struct fsnotify_event *event);
+	void (*free_event)(struct fsnotify_group *group, struct fsnotify_event *event);
 	/* called on final put+free to free memory */
 	void (*free_mark)(struct fsnotify_mark *mark);
 };
-- 
cgit v1.2.3


From 4fe595cf1c80e7a5af4d00c4da29def64aff57a2 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:31 -0300
Subject: fanotify: Require fid_mode for any non-fd event

Like inode events, FAN_FS_ERROR will require fid mode.  Therefore,
convert the verification during fanotify_mark(2) to require fid for any
non-fd event.  This means fid_mode will not only be required for inode
events, but for any event that doesn't provide a descriptor.

Link: https://lore.kernel.org/r/20211025192746.66445-17-krisman@collabora.com
Suggested-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 12 ++++++------
 include/linux/fanotify.h           |  3 +++
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index adeae6d65e35..66ee3c2805c7 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1458,14 +1458,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		goto fput_and_out;
 
 	/*
-	 * Events with data type inode do not carry enough information to report
-	 * event->fd, so we do not allow setting a mask for inode events unless
-	 * group supports reporting fid.
-	 * inode events are not supported on a mount mark, because they do not
-	 * carry enough information (i.e. path) to be filtered by mount point.
+	 * Events that do not carry enough information to report
+	 * event->fd require a group that supports reporting fid.  Those
+	 * events are not supported on a mount mark, because they do not
+	 * carry enough information (i.e. path) to be filtered by mount
+	 * point.
 	 */
 	fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
-	if (mask & FANOTIFY_INODE_EVENTS &&
+	if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) &&
 	    (!fid_mode || mark_type == FAN_MARK_MOUNT))
 		goto fput_and_out;
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index eec3b7c40811..52d464802d99 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -84,6 +84,9 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
  */
 #define FANOTIFY_DIRENT_EVENTS	(FAN_MOVE | FAN_CREATE | FAN_DELETE)
 
+/* Events that can be reported with event->fd */
+#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
+
 /* Events that can only be reported with data type FSNOTIFY_EVENT_INODE */
 #define FANOTIFY_INODE_EVENTS	(FANOTIFY_DIRENT_EVENTS | \
 				 FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
-- 
cgit v1.2.3


From 9daa811073fa19c08e8aad3b90f9235fed161acf Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:32 -0300
Subject: fsnotify: Support FS_ERROR event type

Expose a new type of fsnotify event for filesystems to report errors for
userspace monitoring tools.  fanotify will send this type of
notification for FAN_FS_ERROR events.  This also introduce a helper for
generating the new event.

Link: https://lore.kernel.org/r/20211025192746.66445-18-krisman@collabora.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fsnotify.h         | 13 +++++++++++++
 include/linux/fsnotify_backend.h | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 1e5f7435a4b5..787545e87eeb 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -339,4 +339,17 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
 		fsnotify_dentry(dentry, mask);
 }
 
+static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode,
+				    int error)
+{
+	struct fs_error_report report = {
+		.error = error,
+		.inode = inode,
+		.sb = sb,
+	};
+
+	return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR,
+			NULL, NULL, NULL, 0);
+}
+
 #endif	/* _LINUX_FS_NOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 3a7c31436182..00dbaafbcf95 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -42,6 +42,12 @@
 
 #define FS_UNMOUNT		0x00002000	/* inode on umount fs */
 #define FS_Q_OVERFLOW		0x00004000	/* Event queued overflowed */
+#define FS_ERROR		0x00008000	/* Filesystem Error (fanotify) */
+
+/*
+ * FS_IN_IGNORED overloads FS_ERROR.  It is only used internally by inotify
+ * which does not support FS_ERROR.
+ */
 #define FS_IN_IGNORED		0x00008000	/* last inotify event here */
 
 #define FS_OPEN_PERM		0x00010000	/* open event in an permission hook */
@@ -95,7 +101,8 @@
 #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \
 			     FS_EVENTS_POSS_ON_CHILD | \
 			     FS_DELETE_SELF | FS_MOVE_SELF | FS_DN_RENAME | \
-			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED)
+			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
+			     FS_ERROR)
 
 /* Extra flags that may be reported with event or control handling of events */
 #define ALL_FSNOTIFY_FLAGS  (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
@@ -250,6 +257,13 @@ enum fsnotify_data_type {
 	FSNOTIFY_EVENT_PATH,
 	FSNOTIFY_EVENT_INODE,
 	FSNOTIFY_EVENT_DENTRY,
+	FSNOTIFY_EVENT_ERROR,
+};
+
+struct fs_error_report {
+	int error;
+	struct inode *inode;
+	struct super_block *sb;
 };
 
 static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
@@ -261,6 +275,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
 		return d_inode(data);
 	case FSNOTIFY_EVENT_PATH:
 		return d_inode(((const struct path *)data)->dentry);
+	case FSNOTIFY_EVENT_ERROR:
+		return ((struct fs_error_report *)data)->inode;
 	default:
 		return NULL;
 	}
@@ -300,6 +316,20 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
 		return ((struct dentry *)data)->d_sb;
 	case FSNOTIFY_EVENT_PATH:
 		return ((const struct path *)data)->dentry->d_sb;
+	case FSNOTIFY_EVENT_ERROR:
+		return ((struct fs_error_report *) data)->sb;
+	default:
+		return NULL;
+	}
+}
+
+static inline struct fs_error_report *fsnotify_data_error_report(
+							const void *data,
+							int data_type)
+{
+	switch (data_type) {
+	case FSNOTIFY_EVENT_ERROR:
+		return (struct fs_error_report *) data;
 	default:
 		return NULL;
 	}
-- 
cgit v1.2.3


From 734a1a5eccc5f7473002b0669f788e135f1f64aa Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:34 -0300
Subject: fanotify: Pre-allocate pool of error events

Pre-allocate slots for file system errors to have greater chances of
succeeding, since error events can happen in GFP_NOFS context.  This
patch introduces a group-wide mempool of error events, shared by all
FAN_FS_ERROR marks in this group.

Link: https://lore.kernel.org/r/20211025192746.66445-20-krisman@collabora.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      |  3 +++
 fs/notify/fanotify/fanotify.h      | 11 +++++++++++
 fs/notify/fanotify/fanotify_user.c | 26 +++++++++++++++++++++++++-
 include/linux/fsnotify_backend.h   |  2 ++
 4 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 8f152445d75c..01d68dfc74aa 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -819,6 +819,9 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
 	if (group->fanotify_data.ucounts)
 		dec_ucount(group->fanotify_data.ucounts,
 			   UCOUNT_FANOTIFY_GROUPS);
+
+	if (mempool_initialized(&group->fanotify_data.error_events_pool))
+		mempool_exit(&group->fanotify_data.error_events_pool);
 }
 
 static void fanotify_free_path_event(struct fanotify_event *event)
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index c42cf8fd7d79..a577e87fac2b 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -141,6 +141,7 @@ enum fanotify_event_type {
 	FANOTIFY_EVENT_TYPE_PATH,
 	FANOTIFY_EVENT_TYPE_PATH_PERM,
 	FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
+	FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
 	__FANOTIFY_EVENT_TYPE_NUM
 };
 
@@ -196,6 +197,16 @@ FANOTIFY_NE(struct fanotify_event *event)
 	return container_of(event, struct fanotify_name_event, fae);
 }
 
+struct fanotify_error_event {
+	struct fanotify_event fae;
+};
+
+static inline struct fanotify_error_event *
+FANOTIFY_EE(struct fanotify_event *event)
+{
+	return container_of(event, struct fanotify_error_event, fae);
+}
+
 static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
 {
 	if (event->type == FANOTIFY_EVENT_TYPE_FID)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 66ee3c2805c7..2f4182b754b2 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -30,6 +30,7 @@
 #define FANOTIFY_DEFAULT_MAX_EVENTS	16384
 #define FANOTIFY_OLD_DEFAULT_MAX_MARKS	8192
 #define FANOTIFY_DEFAULT_MAX_GROUPS	128
+#define FANOTIFY_DEFAULT_FEE_POOL_SIZE	32
 
 /*
  * Legacy fanotify marks limits (8192) is per group and we introduced a tunable
@@ -1054,6 +1055,15 @@ out_dec_ucounts:
 	return ERR_PTR(ret);
 }
 
+static int fanotify_group_init_error_pool(struct fsnotify_group *group)
+{
+	if (mempool_initialized(&group->fanotify_data.error_events_pool))
+		return 0;
+
+	return mempool_init_kmalloc_pool(&group->fanotify_data.error_events_pool,
+					 FANOTIFY_DEFAULT_FEE_POOL_SIZE,
+					 sizeof(struct fanotify_error_event));
+}
 
 static int fanotify_add_mark(struct fsnotify_group *group,
 			     fsnotify_connp_t *connp, unsigned int type,
@@ -1062,6 +1072,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	mutex_lock(&group->mark_mutex);
 	fsn_mark = fsnotify_find_mark(connp, group);
@@ -1072,13 +1083,26 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 			return PTR_ERR(fsn_mark);
 		}
 	}
+
+	/*
+	 * Error events are pre-allocated per group, only if strictly
+	 * needed (i.e. FAN_FS_ERROR was requested).
+	 */
+	if (!(flags & FAN_MARK_IGNORED_MASK) && (mask & FAN_FS_ERROR)) {
+		ret = fanotify_group_init_error_pool(group);
+		if (ret)
+			goto out;
+	}
+
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
 	if (added & ~fsnotify_conn_mask(fsn_mark->connector))
 		fsnotify_recalc_mask(fsn_mark->connector);
+
+out:
 	mutex_unlock(&group->mark_mutex);
 
 	fsnotify_put_mark(fsn_mark);
-	return 0;
+	return ret;
 }
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 00dbaafbcf95..51ef2b079bfa 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -19,6 +19,7 @@
 #include <linux/atomic.h>
 #include <linux/user_namespace.h>
 #include <linux/refcount.h>
+#include <linux/mempool.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -246,6 +247,7 @@ struct fsnotify_group {
 			int flags;           /* flags from fanotify_init() */
 			int f_flags; /* event_f_flags from fanotify_init() */
 			struct ucounts *ucounts;
+			mempool_t error_events_pool;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
cgit v1.2.3


From 9709bd548f11a092d124698118013f66e1740f9b Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@collabora.com>
Date: Mon, 25 Oct 2021 16:27:43 -0300
Subject: fanotify: Allow users to request FAN_FS_ERROR events

Wire up the FAN_FS_ERROR event in the fanotify_mark syscall, allowing
user space to request the monitoring of FAN_FS_ERROR events.

These events are limited to filesystem marks, so check it is the
case in the syscall handler.

Link: https://lore.kernel.org/r/20211025192746.66445-29-krisman@collabora.com
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@collabora.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 2 +-
 fs/notify/fanotify/fanotify_user.c | 4 ++++
 include/linux/fanotify.h           | 6 +++++-
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index af61425e6e3b..b6091775aa6e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -822,7 +822,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 	BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
 	BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
 
 	mask = fanotify_group_event_mask(group, iter_info, mask, data,
 					 data_type, dir);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index fb817028d0b6..559bc1e9926d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1520,6 +1520,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
+	if (mask & FAN_FS_ERROR &&
+	    mark_type != FAN_MARK_FILESYSTEM)
+		goto fput_and_out;
+
 	/*
 	 * Events that do not carry enough information to report
 	 * event->fd require a group that supports reporting fid.  Those
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 52d464802d99..616af2ea20f3 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -91,9 +91,13 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 #define FANOTIFY_INODE_EVENTS	(FANOTIFY_DIRENT_EVENTS | \
 				 FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
 
+/* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */
+#define FANOTIFY_ERROR_EVENTS	(FAN_FS_ERROR)
+
 /* Events that user can request to be notified on */
 #define FANOTIFY_EVENTS		(FANOTIFY_PATH_EVENTS | \
-				 FANOTIFY_INODE_EVENTS)
+				 FANOTIFY_INODE_EVENTS | \
+				 FANOTIFY_ERROR_EVENTS)
 
 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM | \
-- 
cgit v1.2.3


From 1bb6b81029456f4e2e6727c5167f43bdfc34bee5 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 27 Oct 2021 13:21:07 +0100
Subject: block: avoid extra iter advance with async iocb

Nobody cares about iov iterators state if we return -EIOCBQUEUED, so as
the we now have __blkdev_direct_IO_async(), which gets pages only once,
we can skip expensive iov_iter_advance(). It's around 1-2% of all CPU
spent.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/a6158edfbfa2ae3bc24aed29a72f035df18fad2f.1635337135.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         |  2 +-
 block/fops.c        | 20 +++++++++++++++-----
 include/linux/bio.h |  1 +
 3 files changed, 17 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index ead1f8a9ff5e..15ab0d6d1c06 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1046,7 +1046,7 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty)
 }
 EXPORT_SYMBOL_GPL(__bio_release_pages);
 
-static void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
 {
 	size_t size = iov_iter_count(iter);
 
diff --git a/block/fops.c b/block/fops.c
index a7b328296912..092e5079e827 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -352,11 +352,21 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	bio->bi_end_io = blkdev_bio_end_io_async;
 	bio->bi_ioprio = iocb->ki_ioprio;
 
-	ret = bio_iov_iter_get_pages(bio, iter);
-	if (unlikely(ret)) {
-		bio->bi_status = BLK_STS_IOERR;
-		bio_endio(bio);
-		return ret;
+	if (iov_iter_is_bvec(iter)) {
+		/*
+		 * Users don't rely on the iterator being in any particular
+		 * state for async I/O returning -EIOCBQUEUED, hence we can
+		 * avoid expensive iov_iter_advance(). Bypass
+		 * bio_iov_iter_get_pages() and set the bvec directly.
+		 */
+		bio_iov_bvec_set(bio, iter);
+	} else {
+		ret = bio_iov_iter_get_pages(bio, iter);
+		if (unlikely(ret)) {
+			bio->bi_status = BLK_STS_IOERR;
+			bio_endio(bio);
+			return ret;
+		}
 	}
 	dio->size = bio->bi_iter.bi_size;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c88700d1bdc3..fe6bdfbbef66 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -417,6 +417,7 @@ int bio_add_zone_append_page(struct bio *bio, struct page *page,
 void __bio_add_page(struct bio *bio, struct page *page,
 		unsigned int len, unsigned int off);
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter);
+void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter);
 void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
-- 
cgit v1.2.3


From 5460601de590158b37619f8e18b678aa18da6345 Mon Sep 17 00:00:00 2001
From: Gal Pressman <galpress@amazon.com>
Date: Tue, 12 Oct 2021 15:09:01 +0300
Subject: dma-buf: Fix pin callback comment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pin callback does not necessarily have to move the memory to system
memory, remove the sentence from the comment.

Link: https://lore.kernel.org/r/20211012120903.96933-2-galpress@amazon.com
Signed-off-by: Gal Pressman <galpress@amazon.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/dma-buf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 8b32b4bdd590..8afe182fe8f2 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -86,8 +86,8 @@ struct dma_buf_ops {
 	 * @pin:
 	 *
 	 * This is called by dma_buf_pin() and lets the exporter know that the
-	 * DMA-buf can't be moved any more. The exporter should pin the buffer
-	 * into system memory to make sure it is generally accessible by other
+	 * DMA-buf can't be moved any more. Ideally, the exporter should
+	 * pin the buffer so that it is generally accessible by all
 	 * devices.
 	 *
 	 * This is called with the &dmabuf.resv object locked and is mutual
-- 
cgit v1.2.3


From 3ab7b6ac5d829e60c3b89d415811ff1c9f358c8e Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Mon, 25 Oct 2021 10:09:23 -0700
Subject: pwm: Introduce single-PWM of_xlate function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing pxa driver and the upcoming addition of PWM support in the
TI sn565dsi86 DSI/eDP bridge driver both has a single PWM channel and
thereby a need for a of_xlate function with the period as its single
argument.

Introduce a common helper function in the core that can be used as
of_xlate by such drivers and migrate the pxa driver to use this.

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Tested-by: Steev Klimaszewski <steev@kali.org>
Tested-By: Steev Klimaszewski <steev@kali.org>
Signed-off-by: Robert Foss <robert.foss@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211025170925.3096444-1-bjorn.andersson@linaro.org
---
 drivers/pwm/core.c    | 26 ++++++++++++++++++++++++++
 drivers/pwm/pwm-pxa.c | 16 +---------------
 include/linux/pwm.h   |  2 ++
 3 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 4527f09a5c50..2c6b155002a2 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -152,6 +152,32 @@ of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
 }
 EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags);
 
+struct pwm_device *
+of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+{
+	struct pwm_device *pwm;
+
+	if (pc->of_pwm_n_cells < 1)
+		return ERR_PTR(-EINVAL);
+
+	/* validate that one cell is specified, optionally with flags */
+	if (args->args_count != 1 && args->args_count != 2)
+		return ERR_PTR(-EINVAL);
+
+	pwm = pwm_request_from_chip(pc, 0, NULL);
+	if (IS_ERR(pwm))
+		return pwm;
+
+	pwm->args.period = args->args[0];
+	pwm->args.polarity = PWM_POLARITY_NORMAL;
+
+	if (args->args_count == 2 && args->args[2] & PWM_POLARITY_INVERTED)
+		pwm->args.polarity = PWM_POLARITY_INVERSED;
+
+	return pwm;
+}
+EXPORT_SYMBOL_GPL(of_pwm_single_xlate);
+
 static void of_pwmchip_add(struct pwm_chip *chip)
 {
 	if (!chip->dev || !chip->dev->of_node)
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index a9efdcf839ae..238ec88c130b 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -148,20 +148,6 @@ static const struct platform_device_id *pxa_pwm_get_id_dt(struct device *dev)
 	return id ? id->data : NULL;
 }
 
-static struct pwm_device *
-pxa_pwm_of_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
-{
-	struct pwm_device *pwm;
-
-	pwm = pwm_request_from_chip(pc, 0, NULL);
-	if (IS_ERR(pwm))
-		return pwm;
-
-	pwm->args.period = args->args[0];
-
-	return pwm;
-}
-
 static int pwm_probe(struct platform_device *pdev)
 {
 	const struct platform_device_id *id = platform_get_device_id(pdev);
@@ -187,7 +173,7 @@ static int pwm_probe(struct platform_device *pdev)
 	pc->chip.npwm = (id->driver_data & HAS_SECONDARY_PWM) ? 2 : 1;
 
 	if (IS_ENABLED(CONFIG_OF)) {
-		pc->chip.of_xlate = pxa_pwm_of_xlate;
+		pc->chip.of_xlate = of_pwm_single_xlate;
 		pc->chip.of_pwm_n_cells = 1;
 	}
 
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 725c9b784e60..dd51d4931fdc 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -414,6 +414,8 @@ struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 
 struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
 		const struct of_phandle_args *args);
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+				       const struct of_phandle_args *args);
 
 struct pwm_device *pwm_get(struct device *dev, const char *con_id);
 struct pwm_device *of_pwm_get(struct device *dev, struct device_node *np,
-- 
cgit v1.2.3


From ce5e48036c9e76a2a5bd4d9079eac273087a533a Mon Sep 17 00:00:00 2001
From: 王贇 <yun.wang@linux.alibaba.com>
Date: Wed, 27 Oct 2021 11:14:44 +0800
Subject: ftrace: disable preemption when recursion locked

As the documentation explained, ftrace_test_recursion_trylock()
and ftrace_test_recursion_unlock() were supposed to disable and
enable preemption properly, however currently this work is done
outside of the function, which could be missing by mistake.

And since the internal using of trace_test_and_set_recursion()
and trace_clear_recursion() also require preemption disabled, we
can just merge the logical.

This patch will make sure the preemption has been disabled when
trace_test_and_set_recursion() return bit >= 0, and
trace_clear_recursion() will enable the preemption if previously
enabled.

Link: https://lkml.kernel.org/r/13bde807-779c-aa4c-0672-20515ae365ea@linux.alibaba.com

CC: Petr Mladek <pmladek@suse.com>
Cc: Guo Ren <guoren@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Jiri Kosina <jikos@kernel.org>
Cc: Joe Lawrence <joe.lawrence@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Jisheng Zhang <jszhang@kernel.org>
CC: Steven Rostedt <rostedt@goodmis.org>
CC: Miroslav Benes <mbenes@suse.cz>
Reported-by: Abaci <abaci@linux.alibaba.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Michael Wang <yun.wang@linux.alibaba.com>
[ Removed extra line in comment - SDR ]
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/csky/kernel/probes/ftrace.c     |  2 --
 arch/parisc/kernel/ftrace.c          |  2 --
 arch/powerpc/kernel/kprobes-ftrace.c |  2 --
 arch/riscv/kernel/probes/ftrace.c    |  2 --
 arch/x86/kernel/kprobes/ftrace.c     |  2 --
 include/linux/trace_recursion.h      | 11 ++++++++++-
 kernel/livepatch/patch.c             | 12 ++++++------
 kernel/trace/ftrace.c                | 15 +++++----------
 kernel/trace/trace_functions.c       |  5 -----
 9 files changed, 21 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index b388228abbf2..834cffcfbce3 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -17,7 +17,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		return;
 
 	regs = ftrace_get_regs(fregs);
-	preempt_disable_notrace();
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (!p) {
 		p = get_kprobe((kprobe_opcode_t *)(ip - MCOUNT_INSN_SIZE));
@@ -57,7 +56,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		__this_cpu_write(current_kprobe, NULL);
 	}
 out:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 01581f715737..b14011d3c2f1 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -211,7 +211,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		return;
 
 	regs = ftrace_get_regs(fregs);
-	preempt_disable_notrace();
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (unlikely(!p) || kprobe_disabled(p))
 		goto out;
@@ -240,7 +239,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	}
 	__this_cpu_write(current_kprobe, NULL);
 out:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
index 7154d58338cc..072ebe7f290b 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -26,7 +26,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 		return;
 
 	regs = ftrace_get_regs(fregs);
-	preempt_disable_notrace();
 	p = get_kprobe((kprobe_opcode_t *)nip);
 	if (unlikely(!p) || kprobe_disabled(p))
 		goto out;
@@ -61,7 +60,6 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
 		__this_cpu_write(current_kprobe, NULL);
 	}
 out:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
index aab85a82f419..7142ec42e889 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -15,7 +15,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
-	preempt_disable_notrace();
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (unlikely(!p) || kprobe_disabled(p))
 		goto out;
@@ -52,7 +51,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		__this_cpu_write(current_kprobe, NULL);
 	}
 out:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 596de2f6d3a5..dd2ec14adb77 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -25,7 +25,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
-	preempt_disable_notrace();
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (unlikely(!p) || kprobe_disabled(p))
 		goto out;
@@ -59,7 +58,6 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		__this_cpu_write(current_kprobe, NULL);
 	}
 out:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 24f284eb55a7..a13f23b04d73 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -155,6 +155,9 @@ extern void ftrace_record_recursion(unsigned long ip, unsigned long parent_ip);
 # define do_ftrace_record_recursion(ip, pip)	do { } while (0)
 #endif
 
+/*
+ * Preemption is promised to be disabled when return bit >= 0.
+ */
 static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsigned long pip,
 							int start, int max)
 {
@@ -189,14 +192,20 @@ static __always_inline int trace_test_and_set_recursion(unsigned long ip, unsign
 	current->trace_recursion = val;
 	barrier();
 
+	preempt_disable_notrace();
+
 	return bit + 1;
 }
 
+/*
+ * Preemption will be enabled (if it was previously enabled).
+ */
 static __always_inline void trace_clear_recursion(int bit)
 {
 	if (!bit)
 		return;
 
+	preempt_enable_notrace();
 	barrier();
 	bit--;
 	trace_recursion_clear(bit);
@@ -209,7 +218,7 @@ static __always_inline void trace_clear_recursion(int bit)
  * tracing recursed in the same context (normal vs interrupt),
  *
  * Returns: -1 if a recursion happened.
- *           >= 0 if no recursion
+ *           >= 0 if no recursion.
  */
 static __always_inline int ftrace_test_recursion_trylock(unsigned long ip,
 							 unsigned long parent_ip)
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index e8029aea67f1..fe316c021d73 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -49,14 +49,15 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 
 	ops = container_of(fops, struct klp_ops, fops);
 
+	/*
+	 * The ftrace_test_recursion_trylock() will disable preemption,
+	 * which is required for the variant of synchronize_rcu() that is
+	 * used to allow patching functions where RCU is not watching.
+	 * See klp_synchronize_transition() for more details.
+	 */
 	bit = ftrace_test_recursion_trylock(ip, parent_ip);
 	if (WARN_ON_ONCE(bit < 0))
 		return;
-	/*
-	 * A variant of synchronize_rcu() is used to allow patching functions
-	 * where RCU is not watching, see klp_synchronize_transition().
-	 */
-	preempt_disable_notrace();
 
 	func = list_first_or_null_rcu(&ops->func_stack, struct klp_func,
 				      stack_node);
@@ -120,7 +121,6 @@ static void notrace klp_ftrace_handler(unsigned long ip,
 	klp_arch_set_pc(fregs, (unsigned long)func->new_func);
 
 unlock:
-	preempt_enable_notrace();
 	ftrace_test_recursion_unlock(bit);
 }
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2057ad363772..b4ed1a301232 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7198,16 +7198,15 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 	struct ftrace_ops *op;
 	int bit;
 
+	/*
+	 * The ftrace_test_and_set_recursion() will disable preemption,
+	 * which is required since some of the ops may be dynamically
+	 * allocated, they must be freed after a synchronize_rcu().
+	 */
 	bit = trace_test_and_set_recursion(ip, parent_ip, TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
-	/*
-	 * Some of the ops may be dynamically allocated,
-	 * they must be freed after a synchronize_rcu().
-	 */
-	preempt_disable_notrace();
-
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 		/* Stub functions don't need to be called nor tested */
 		if (op->flags & FTRACE_OPS_FL_STUB)
@@ -7231,7 +7230,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		}
 	} while_for_each_ftrace_op(op);
 out:
-	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
 
@@ -7279,12 +7277,9 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
-	preempt_disable_notrace();
-
 	if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
 		op->func(ip, parent_ip, op, fregs);
 
-	preempt_enable_notrace();
 	trace_clear_recursion(bit);
 }
 NOKPROBE_SYMBOL(ftrace_ops_assist_func);
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 1f0e63f5d1f9..9f1bfbe105e8 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -186,7 +186,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 		return;
 
 	trace_ctx = tracing_gen_ctx();
-	preempt_disable_notrace();
 
 	cpu = smp_processor_id();
 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
@@ -194,7 +193,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
 		trace_function(tr, ip, parent_ip, trace_ctx);
 
 	ftrace_test_recursion_unlock(bit);
-	preempt_enable_notrace();
 }
 
 #ifdef CONFIG_UNWINDER_ORC
@@ -298,8 +296,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		return;
 
-	preempt_disable_notrace();
-
 	cpu = smp_processor_id();
 	data = per_cpu_ptr(tr->array_buffer.data, cpu);
 	if (atomic_read(&data->disabled))
@@ -324,7 +320,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip,
 
 out:
 	ftrace_test_recursion_unlock(bit);
-	preempt_enable_notrace();
 }
 
 static void
-- 
cgit v1.2.3


From e531e90b5ab0f7ce5ff298e165214c1aec6ed187 Mon Sep 17 00:00:00 2001
From: "Robin H. Johnson" <robbat2@gentoo.org>
Date: Mon, 30 Aug 2021 21:37:23 -0700
Subject: tracing: Increase PERF_MAX_TRACE_SIZE to handle Sentinel1 and docker
 together

Running endpoint security solutions like Sentinel1 that use perf-based
tracing heavily lead to this repeated dump complaining about dockerd.
The default value of 2048 is nowhere near not large enough.

Using the prior patch "tracing: show size of requested buffer", we get
"perf buffer not large enough, wanted 6644, have 6144", after repeated
up-sizing (I did 2/4/6/8K). With 8K, the problem doesn't occur at all,
so below is the trace for 6K.

I'm wondering if this value should be selectable at boot time, but this
is a good starting point.

```
------------[ cut here ]------------
perf buffer not large enough, wanted 6644, have 6144
WARNING: CPU: 1 PID: 4997 at kernel/trace/trace_event_perf.c:402 perf_trace_buf_alloc+0x8c/0xa0
Modules linked in: [..]
CPU: 1 PID: 4997 Comm: sh Tainted: G                T 5.13.13-x86_64-00039-gb3959163488e #63
Hardware name: LENOVO 20KH002JUS/20KH002JUS, BIOS N23ET66W (1.41 ) 09/02/2019
RIP: 0010:perf_trace_buf_alloc+0x8c/0xa0
Code: 80 3d 43 97 d0 01 00 74 07 31 c0 5b 5d 41 5c c3 ba 00 18 00 00 89 ee 48 c7 c7 00 82 7d 91 c6 05 25 97 d0 01 01 e8 22 ee bc 00 <0f> 0b 31 c0 eb db 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 55 89
RSP: 0018:ffffb922026b7d58 EFLAGS: 00010282
RAX: 0000000000000000 RBX: ffff9da5ee012000 RCX: 0000000000000027
RDX: ffff9da881657828 RSI: 0000000000000001 RDI: ffff9da881657820
RBP: 00000000000019f4 R08: 0000000000000000 R09: ffffb922026b7b80
R10: ffffb922026b7b78 R11: ffffffff91dda688 R12: 000000000000000f
R13: ffff9da5ee012108 R14: ffff9da8816570a0 R15: ffffb922026b7e30
FS:  00007f420db1a080(0000) GS:ffff9da881640000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000060 CR3: 00000002504a8006 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 kprobe_perf_func+0x11e/0x270
 ? do_execveat_common.isra.0+0x1/0x1c0
 ? do_execveat_common.isra.0+0x5/0x1c0
 kprobe_ftrace_handler+0x10e/0x1d0
 0xffffffffc03aa0c8
 ? do_execveat_common.isra.0+0x1/0x1c0
 do_execveat_common.isra.0+0x5/0x1c0
 __x64_sys_execve+0x33/0x40
 do_syscall_64+0x6b/0xc0
 ? do_syscall_64+0x11/0xc0
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f420dc1db37
Code: ff ff 76 e7 f7 d8 64 41 89 00 eb df 0f 1f 80 00 00 00 00 f7 d8 64 41 89 00 eb dc 0f 1f 84 00 00 00 00 00 b8 3b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 01 43 0f 00 f7 d8 64 89 01 48
RSP: 002b:00007ffd4e8b4e38 EFLAGS: 00000246 ORIG_RAX: 000000000000003b
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f420dc1db37
RDX: 0000564338d1e740 RSI: 0000564338d32d50 RDI: 0000564338d28f00
RBP: 0000564338d28f00 R08: 0000564338d32d50 R09: 0000000000000020
R10: 00000000000001b6 R11: 0000000000000246 R12: 0000564338d28f00
R13: 0000564338d32d50 R14: 0000564338d1e740 R15: 0000564338d28c60
---[ end trace 83ab3e8e16275e49 ]---
```

Link: https://lkml.kernel.org/r/20210831043723.13481-2-robbat2@gentoo.org

Signed-off-by: Robin H. Johnson <robbat2@gentoo.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3e475eeb5a99..50453b287615 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -671,7 +671,7 @@ struct trace_event_file {
 	}								\
 	early_initcall(trace_init_perf_perm_##name);
 
-#define PERF_MAX_TRACE_SIZE	2048
+#define PERF_MAX_TRACE_SIZE	8192
 
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
-- 
cgit v1.2.3


From f941eadd8d6d4ee2f8c9aeab8e1da5e647533a7d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Oct 2021 14:41:31 -0700
Subject: bpf: Avoid races in __bpf_prog_run() for 32bit arches

__bpf_prog_run() can run from non IRQ contexts, meaning
it could be re entered if interrupted.

This calls for the irq safe variant of u64_stats_update_{begin|end},
or risk a deadlock.

This patch is a nop on 64bit arches, fortunately.

syzbot report:

WARNING: inconsistent lock state
5.12.0-rc3-syzkaller #0 Not tainted
--------------------------------
inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage.
udevd/4013 [HC0[0]:SC0[0]:HE1:SE1] takes:
ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: sk_filter include/linux/filter.h:867 [inline]
ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: do_one_broadcast net/netlink/af_netlink.c:1468 [inline]
ff7c9dec (&(&pstats->syncp)->seq){+.?.}-{0:0}, at: netlink_broadcast_filtered+0x27c/0x4fc net/netlink/af_netlink.c:1520
{IN-SOFTIRQ-W} state was registered at:
  lock_acquire.part.0+0xf0/0x41c kernel/locking/lockdep.c:5510
  lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5483
  do_write_seqcount_begin_nested include/linux/seqlock.h:520 [inline]
  do_write_seqcount_begin include/linux/seqlock.h:545 [inline]
  u64_stats_update_begin include/linux/u64_stats_sync.h:129 [inline]
  bpf_prog_run_pin_on_cpu include/linux/filter.h:624 [inline]
  bpf_prog_run_clear_cb+0x1bc/0x270 include/linux/filter.h:755
  run_filter+0xa0/0x17c net/packet/af_packet.c:2031
  packet_rcv+0xc0/0x3e0 net/packet/af_packet.c:2104
  dev_queue_xmit_nit+0x2bc/0x39c net/core/dev.c:2387
  xmit_one net/core/dev.c:3588 [inline]
  dev_hard_start_xmit+0x94/0x518 net/core/dev.c:3609
  sch_direct_xmit+0x11c/0x1f0 net/sched/sch_generic.c:313
  qdisc_restart net/sched/sch_generic.c:376 [inline]
  __qdisc_run+0x194/0x7f8 net/sched/sch_generic.c:384
  qdisc_run include/net/pkt_sched.h:136 [inline]
  qdisc_run include/net/pkt_sched.h:128 [inline]
  __dev_xmit_skb net/core/dev.c:3795 [inline]
  __dev_queue_xmit+0x65c/0xf84 net/core/dev.c:4150
  dev_queue_xmit+0x14/0x18 net/core/dev.c:4215
  neigh_resolve_output net/core/neighbour.c:1491 [inline]
  neigh_resolve_output+0x170/0x228 net/core/neighbour.c:1471
  neigh_output include/net/neighbour.h:510 [inline]
  ip6_finish_output2+0x2e4/0x9fc net/ipv6/ip6_output.c:117
  __ip6_finish_output net/ipv6/ip6_output.c:182 [inline]
  __ip6_finish_output+0x164/0x3f8 net/ipv6/ip6_output.c:161
  ip6_finish_output+0x2c/0xb0 net/ipv6/ip6_output.c:192
  NF_HOOK_COND include/linux/netfilter.h:290 [inline]
  ip6_output+0x74/0x294 net/ipv6/ip6_output.c:215
  dst_output include/net/dst.h:448 [inline]
  NF_HOOK include/linux/netfilter.h:301 [inline]
  NF_HOOK include/linux/netfilter.h:295 [inline]
  mld_sendpack+0x2a8/0x7e4 net/ipv6/mcast.c:1679
  mld_send_cr net/ipv6/mcast.c:1975 [inline]
  mld_ifc_timer_expire+0x1e8/0x494 net/ipv6/mcast.c:2474
  call_timer_fn+0xd0/0x570 kernel/time/timer.c:1431
  expire_timers kernel/time/timer.c:1476 [inline]
  __run_timers kernel/time/timer.c:1745 [inline]
  run_timer_softirq+0x2e4/0x384 kernel/time/timer.c:1758
  __do_softirq+0x204/0x7ac kernel/softirq.c:345
  do_softirq_own_stack include/asm-generic/softirq_stack.h:10 [inline]
  invoke_softirq kernel/softirq.c:228 [inline]
  __irq_exit_rcu+0x1d8/0x200 kernel/softirq.c:422
  irq_exit+0x10/0x3c kernel/softirq.c:446
  __handle_domain_irq+0xb4/0x120 kernel/irq/irqdesc.c:692
  handle_domain_irq include/linux/irqdesc.h:176 [inline]
  gic_handle_irq+0x84/0xac drivers/irqchip/irq-gic.c:370
  __irq_svc+0x5c/0x94 arch/arm/kernel/entry-armv.S:205
  debug_smp_processor_id+0x0/0x24 lib/smp_processor_id.c:53
  rcu_read_lock_held_common kernel/rcu/update.c:108 [inline]
  rcu_read_lock_sched_held+0x24/0x7c kernel/rcu/update.c:123
  trace_lock_acquire+0x24c/0x278 include/trace/events/lock.h:13
  lock_acquire+0x3c/0x74 kernel/locking/lockdep.c:5481
  rcu_lock_acquire include/linux/rcupdate.h:267 [inline]
  rcu_read_lock include/linux/rcupdate.h:656 [inline]
  avc_has_perm_noaudit+0x6c/0x260 security/selinux/avc.c:1150
  selinux_inode_permission+0x140/0x220 security/selinux/hooks.c:3141
  security_inode_permission+0x44/0x60 security/security.c:1268
  inode_permission.part.0+0x5c/0x13c fs/namei.c:521
  inode_permission fs/namei.c:494 [inline]
  may_lookup fs/namei.c:1652 [inline]
  link_path_walk.part.0+0xd4/0x38c fs/namei.c:2208
  link_path_walk fs/namei.c:2189 [inline]
  path_lookupat+0x3c/0x1b8 fs/namei.c:2419
  filename_lookup+0xa8/0x1a4 fs/namei.c:2453
  user_path_at_empty+0x74/0x90 fs/namei.c:2733
  do_readlinkat+0x5c/0x12c fs/stat.c:417
  __do_sys_readlink fs/stat.c:450 [inline]
  sys_readlink+0x24/0x28 fs/stat.c:447
  ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64
  0x7eaa4974
irq event stamp: 298277
hardirqs last  enabled at (298277): [<802000d0>] no_work_pending+0x4/0x34
hardirqs last disabled at (298276): [<8020c9b8>] do_work_pending+0x9c/0x648 arch/arm/kernel/signal.c:676
softirqs last  enabled at (298216): [<8020167c>] __do_softirq+0x584/0x7ac kernel/softirq.c:372
softirqs last disabled at (298201): [<8024dff4>] do_softirq_own_stack include/asm-generic/softirq_stack.h:10 [inline]
softirqs last disabled at (298201): [<8024dff4>] invoke_softirq kernel/softirq.c:228 [inline]
softirqs last disabled at (298201): [<8024dff4>] __irq_exit_rcu+0x1d8/0x200 kernel/softirq.c:422

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&pstats->syncp)->seq);
  <Interrupt>
    lock(&(&pstats->syncp)->seq);

 *** DEADLOCK ***

1 lock held by udevd/4013:
 #0: 82b09c5c (rcu_read_lock){....}-{1:2}, at: sk_filter_trim_cap+0x54/0x434 net/core/filter.c:139

stack backtrace:
CPU: 1 PID: 4013 Comm: udevd Not tainted 5.12.0-rc3-syzkaller #0
Hardware name: ARM-Versatile Express
Backtrace:
[<81802550>] (dump_backtrace) from [<818027c4>] (show_stack+0x18/0x1c arch/arm/kernel/traps.c:252)
 r7:00000080 r6:600d0093 r5:00000000 r4:82b58344
[<818027ac>] (show_stack) from [<81809e98>] (__dump_stack lib/dump_stack.c:79 [inline])
[<818027ac>] (show_stack) from [<81809e98>] (dump_stack+0xb8/0xe8 lib/dump_stack.c:120)
[<81809de0>] (dump_stack) from [<81804a00>] (print_usage_bug.part.0+0x228/0x230 kernel/locking/lockdep.c:3806)
 r7:86bcb768 r6:81a0326c r5:830f96a8 r4:86bcb0c0
[<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (print_usage_bug kernel/locking/lockdep.c:3776 [inline])
[<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (valid_state kernel/locking/lockdep.c:3818 [inline])
[<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (mark_lock_irq kernel/locking/lockdep.c:4021 [inline])
[<818047d8>] (print_usage_bug.part.0) from [<802bb1b8>] (mark_lock.part.0+0xc34/0x136c kernel/locking/lockdep.c:4478)
 r10:83278fe8 r9:82c6d748 r8:00000000 r7:82c6d2d4 r6:00000004 r5:86bcb768
 r4:00000006
[<802ba584>] (mark_lock.part.0) from [<802bc644>] (mark_lock kernel/locking/lockdep.c:4442 [inline])
[<802ba584>] (mark_lock.part.0) from [<802bc644>] (mark_usage kernel/locking/lockdep.c:4391 [inline])
[<802ba584>] (mark_lock.part.0) from [<802bc644>] (__lock_acquire+0x9bc/0x3318 kernel/locking/lockdep.c:4854)
 r10:86bcb768 r9:86bcb0c0 r8:00000001 r7:00040000 r6:0000075a r5:830f96a8
 r4:00000000
[<802bbc88>] (__lock_acquire) from [<802bfb90>] (lock_acquire.part.0+0xf0/0x41c kernel/locking/lockdep.c:5510)
 r10:00000000 r9:600d0013 r8:00000000 r7:00000000 r6:828a2680 r5:828a2680
 r4:861e5bc8
[<802bfaa0>] (lock_acquire.part.0) from [<802bff28>] (lock_acquire+0x6c/0x74 kernel/locking/lockdep.c:5483)
 r10:8146137c r9:00000000 r8:00000001 r7:00000000 r6:00000000 r5:00000000
 r4:ff7c9dec
[<802bfebc>] (lock_acquire) from [<81381eb4>] (do_write_seqcount_begin_nested include/linux/seqlock.h:520 [inline])
[<802bfebc>] (lock_acquire) from [<81381eb4>] (do_write_seqcount_begin include/linux/seqlock.h:545 [inline])
[<802bfebc>] (lock_acquire) from [<81381eb4>] (u64_stats_update_begin include/linux/u64_stats_sync.h:129 [inline])
[<802bfebc>] (lock_acquire) from [<81381eb4>] (__bpf_prog_run_save_cb include/linux/filter.h:727 [inline])
[<802bfebc>] (lock_acquire) from [<81381eb4>] (bpf_prog_run_save_cb include/linux/filter.h:741 [inline])
[<802bfebc>] (lock_acquire) from [<81381eb4>] (sk_filter_trim_cap+0x26c/0x434 net/core/filter.c:149)
 r10:a4095dd0 r9:ff7c9dd0 r8:e44be000 r7:8146137c r6:00000001 r5:8611ba80
 r4:00000000
[<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (sk_filter include/linux/filter.h:867 [inline])
[<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (do_one_broadcast net/netlink/af_netlink.c:1468 [inline])
[<81381c48>] (sk_filter_trim_cap) from [<8146137c>] (netlink_broadcast_filtered+0x27c/0x4fc net/netlink/af_netlink.c:1520)
 r10:00000001 r9:833d6b1c r8:00000000 r7:8572f864 r6:8611ba80 r5:8698d800
 r4:8572f800
[<81461100>] (netlink_broadcast_filtered) from [<81463e60>] (netlink_broadcast net/netlink/af_netlink.c:1544 [inline])
[<81461100>] (netlink_broadcast_filtered) from [<81463e60>] (netlink_sendmsg+0x3d0/0x478 net/netlink/af_netlink.c:1925)
 r10:00000000 r9:00000002 r8:8698d800 r7:000000b7 r6:8611b900 r5:861e5f50
 r4:86aa3000
[<81463a90>] (netlink_sendmsg) from [<81321f54>] (sock_sendmsg_nosec net/socket.c:654 [inline])
[<81463a90>] (netlink_sendmsg) from [<81321f54>] (sock_sendmsg+0x3c/0x4c net/socket.c:674)
 r10:00000000 r9:861e5dd4 r8:00000000 r7:86570000 r6:00000000 r5:86570000
 r4:861e5f50
[<81321f18>] (sock_sendmsg) from [<813234d0>] (____sys_sendmsg+0x230/0x29c net/socket.c:2350)
 r5:00000040 r4:861e5f50
[<813232a0>] (____sys_sendmsg) from [<8132549c>] (___sys_sendmsg+0xac/0xe4 net/socket.c:2404)
 r10:00000128 r9:861e4000 r8:00000000 r7:00000000 r6:86570000 r5:861e5f50
 r4:00000000
[<813253f0>] (___sys_sendmsg) from [<81325684>] (__sys_sendmsg net/socket.c:2433 [inline])
[<813253f0>] (___sys_sendmsg) from [<81325684>] (__do_sys_sendmsg net/socket.c:2442 [inline])
[<813253f0>] (___sys_sendmsg) from [<81325684>] (sys_sendmsg+0x58/0xa0 net/socket.c:2440)
 r8:80200224 r7:00000128 r6:00000000 r5:7eaa541c r4:86570000
[<8132562c>] (sys_sendmsg) from [<80200060>] (ret_fast_syscall+0x0/0x2c arch/arm/mm/proc-v7.S:64)
Exception stack(0x861e5fa8 to 0x861e5ff0)
5fa0:                   00000000 00000000 0000000c 7eaa541c 00000000 00000000
5fc0: 00000000 00000000 76fbf840 00000128 00000000 0000008f 7eaa541c 000563f8
5fe0: 00056110 7eaa53e0 00036cec 76c9bf44
 r6:76fbf840 r5:00000000 r4:00000000

Fixes: 492ecee892c2 ("bpf: enable program stats")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211026214133.3114279-2-eric.dumazet@gmail.com
---
 include/linux/filter.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 47f80adbe744..2fffe9cc50f9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -612,13 +612,14 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 	if (static_branch_unlikely(&bpf_stats_enabled_key)) {
 		struct bpf_prog_stats *stats;
 		u64 start = sched_clock();
+		unsigned long flags;
 
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
 		stats = this_cpu_ptr(prog->stats);
-		u64_stats_update_begin(&stats->syncp);
+		flags = u64_stats_update_begin_irqsave(&stats->syncp);
 		stats->cnt++;
 		stats->nsecs += sched_clock() - start;
-		u64_stats_update_end(&stats->syncp);
+		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 	} else {
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
 	}
-- 
cgit v1.2.3


From 61a0abaee2092eee69e44fe60336aa2f5b578938 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Oct 2021 14:41:33 -0700
Subject: bpf: Use u64_stats_t in struct bpf_prog_stats

Commit 316580b69d0a ("u64_stats: provide u64_stats_t type")
fixed possible load/store tearing on 64bit arches.

For instance the following C code

stats->nsecs += sched_clock() - start;

Could be rightfully implemented like this by a compiler,
confusing concurrent readers a lot:

stats->nsecs += sched_clock();
// arbitrary delay
stats->nsecs -= start;

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211026214133.3114279-4-eric.dumazet@gmail.com
---
 include/linux/filter.h  | 10 +++++-----
 kernel/bpf/syscall.c    | 18 ++++++++++++------
 kernel/bpf/trampoline.c |  6 +++---
 3 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 2fffe9cc50f9..9782e3245852 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -553,9 +553,9 @@ struct bpf_binary_header {
 };
 
 struct bpf_prog_stats {
-	u64 cnt;
-	u64 nsecs;
-	u64 misses;
+	u64_stats_t cnt;
+	u64_stats_t nsecs;
+	u64_stats_t misses;
 	struct u64_stats_sync syncp;
 } __aligned(2 * sizeof(u64));
 
@@ -617,8 +617,8 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
 		stats = this_cpu_ptr(prog->stats);
 		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		stats->cnt++;
-		stats->nsecs += sched_clock() - start;
+		u64_stats_inc(&stats->cnt);
+		u64_stats_add(&stats->nsecs, sched_clock() - start);
 		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 	} else {
 		ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5beb321b3b3b..3e1c024ce3ed 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1804,8 +1804,14 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+struct bpf_prog_kstats {
+	u64 nsecs;
+	u64 cnt;
+	u64 misses;
+};
+
 static void bpf_prog_get_stats(const struct bpf_prog *prog,
-			       struct bpf_prog_stats *stats)
+			       struct bpf_prog_kstats *stats)
 {
 	u64 nsecs = 0, cnt = 0, misses = 0;
 	int cpu;
@@ -1818,9 +1824,9 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
 		st = per_cpu_ptr(prog->stats, cpu);
 		do {
 			start = u64_stats_fetch_begin_irq(&st->syncp);
-			tnsecs = st->nsecs;
-			tcnt = st->cnt;
-			tmisses = st->misses;
+			tnsecs = u64_stats_read(&st->nsecs);
+			tcnt = u64_stats_read(&st->cnt);
+			tmisses = u64_stats_read(&st->misses);
 		} while (u64_stats_fetch_retry_irq(&st->syncp, start));
 		nsecs += tnsecs;
 		cnt += tcnt;
@@ -1836,7 +1842,7 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_prog *prog = filp->private_data;
 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
-	struct bpf_prog_stats stats;
+	struct bpf_prog_kstats stats;
 
 	bpf_prog_get_stats(prog, &stats);
 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
@@ -3577,7 +3583,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
 	struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
 	struct bpf_prog_info info;
 	u32 info_len = attr->info.info_len;
-	struct bpf_prog_stats stats;
+	struct bpf_prog_kstats stats;
 	char __user *uinsns;
 	u32 ulen;
 	int err;
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index e5963de368ed..e98de5e73ba5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -545,7 +545,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
 
 	stats = this_cpu_ptr(prog->stats);
 	u64_stats_update_begin(&stats->syncp);
-	stats->misses++;
+	u64_stats_inc(&stats->misses);
 	u64_stats_update_end(&stats->syncp);
 }
 
@@ -590,8 +590,8 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
 
 		stats = this_cpu_ptr(prog->stats);
 		flags = u64_stats_update_begin_irqsave(&stats->syncp);
-		stats->cnt++;
-		stats->nsecs += sched_clock() - start;
+		u64_stats_inc(&stats->cnt);
+		u64_stats_add(&stats->nsecs, sched_clock() - start);
 		u64_stats_update_end_irqrestore(&stats->syncp, flags);
 	}
 }
-- 
cgit v1.2.3


From 259714100d98b50bf04d36a21bf50ca8b829fc11 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Mon, 25 Oct 2021 15:01:53 +0800
Subject: PM / wakeirq: support enabling wake-up irq after runtime_suspend
 called

When the dedicated wake IRQ is level trigger, and it uses the
device's low-power status as the wakeup source, that means if the
device is not in low-power state, the wake IRQ will be triggered
if enabled; For this case, need enable the wake IRQ after running
the device's ->runtime_suspend() which make it enter low-power state.

e.g.
Assume the wake IRQ is a low level trigger type, and the wakeup
signal comes from the low-power status of the device.
The wakeup signal is low level at running time (0), and becomes
high level when the device enters low-power state (runtime_suspend
(1) is called), a wakeup event at (2) make the device exit low-power
state, then the wakeup signal also becomes low level.

                ------------------
               |           ^     ^|
----------------           |     | --------------
 |<---(0)--->|<--(1)--|   (3)   (2)    (4)

if enable the wake IRQ before running runtime_suspend during (0),
a wake IRQ will arise, it causes resume immediately;
it works if enable wake IRQ ( e.g. at (3) or (4)) after running
->runtime_suspend().

This patch introduces a new status WAKE_IRQ_DEDICATED_REVERSE to
optionally support enabling wake IRQ after running ->runtime_suspend().

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/power.h   |   7 ++-
 drivers/base/power/runtime.c |   6 ++-
 drivers/base/power/wakeirq.c | 101 ++++++++++++++++++++++++++++++++++---------
 include/linux/pm_wakeirq.h   |   9 +++-
 4 files changed, 96 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 54292cdd7808..0eb7f02b3ad5 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -25,8 +25,10 @@ extern u64 pm_runtime_active_time(struct device *dev);
 
 #define WAKE_IRQ_DEDICATED_ALLOCATED	BIT(0)
 #define WAKE_IRQ_DEDICATED_MANAGED	BIT(1)
+#define WAKE_IRQ_DEDICATED_REVERSE	BIT(2)
 #define WAKE_IRQ_DEDICATED_MASK		(WAKE_IRQ_DEDICATED_ALLOCATED | \
-					 WAKE_IRQ_DEDICATED_MANAGED)
+					 WAKE_IRQ_DEDICATED_MANAGED | \
+					 WAKE_IRQ_DEDICATED_REVERSE)
 
 struct wake_irq {
 	struct device *dev;
@@ -39,7 +41,8 @@ extern void dev_pm_arm_wake_irq(struct wake_irq *wirq);
 extern void dev_pm_disarm_wake_irq(struct wake_irq *wirq);
 extern void dev_pm_enable_wake_irq_check(struct device *dev,
 					 bool can_change_status);
-extern void dev_pm_disable_wake_irq_check(struct device *dev);
+extern void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable);
+extern void dev_pm_enable_wake_irq_complete(struct device *dev);
 
 #ifdef CONFIG_PM_SLEEP
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index ec94049442b9..d504cd4ab3cb 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -645,6 +645,8 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	if (retval)
 		goto fail;
 
+	dev_pm_enable_wake_irq_complete(dev);
+
  no_callback:
 	__update_runtime_status(dev, RPM_SUSPENDED);
 	pm_runtime_deactivate_timer(dev);
@@ -690,7 +692,7 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 	return retval;
 
  fail:
-	dev_pm_disable_wake_irq_check(dev);
+	dev_pm_disable_wake_irq_check(dev, true);
 	__update_runtime_status(dev, RPM_ACTIVE);
 	dev->power.deferred_resume = false;
 	wake_up_all(&dev->power.wait_queue);
@@ -873,7 +875,7 @@ static int rpm_resume(struct device *dev, int rpmflags)
 
 	callback = RPM_GET_CALLBACK(dev, runtime_resume);
 
-	dev_pm_disable_wake_irq_check(dev);
+	dev_pm_disable_wake_irq_check(dev, false);
 	retval = rpm_callback(callback, dev);
 	if (retval) {
 		__update_runtime_status(dev, RPM_SUSPENDED);
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index b91a3a9bf9f6..0004db4a9d3b 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -142,24 +142,7 @@ static irqreturn_t handle_threaded_wake_irq(int irq, void *_wirq)
 	return IRQ_HANDLED;
 }
 
-/**
- * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
- * @dev: Device entry
- * @irq: Device wake-up interrupt
- *
- * Unless your hardware has separate wake-up interrupts in addition
- * to the device IO interrupts, you don't need this.
- *
- * Sets up a threaded interrupt handler for a device that has
- * a dedicated wake-up interrupt in addition to the device IO
- * interrupt.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq() and dev_pm_disable_wake_irq()
- * functions.
- */
-int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
+static int __dev_pm_set_dedicated_wake_irq(struct device *dev, int irq, unsigned int flag)
 {
 	struct wake_irq *wirq;
 	int err;
@@ -197,7 +180,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	if (err)
 		goto err_free_irq;
 
-	wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED;
+	wirq->status = WAKE_IRQ_DEDICATED_ALLOCATED | flag;
 
 	return err;
 
@@ -210,8 +193,57 @@ err_free:
 
 	return err;
 }
+
+
+/**
+ * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
+ * @dev: Device entry
+ * @irq: Device wake-up interrupt
+ *
+ * Unless your hardware has separate wake-up interrupts in addition
+ * to the device IO interrupts, you don't need this.
+ *
+ * Sets up a threaded interrupt handler for a device that has
+ * a dedicated wake-up interrupt in addition to the device IO
+ * interrupt.
+ *
+ * The interrupt starts disabled, and needs to be managed for
+ * the device by the bus code or the device driver using
+ * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
+ * functions.
+ */
+int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
+{
+	return __dev_pm_set_dedicated_wake_irq(dev, irq, 0);
+}
 EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
 
+/**
+ * dev_pm_set_dedicated_wake_irq_reverse - Request a dedicated wake-up interrupt
+ *                                         with reverse enable ordering
+ * @dev: Device entry
+ * @irq: Device wake-up interrupt
+ *
+ * Unless your hardware has separate wake-up interrupts in addition
+ * to the device IO interrupts, you don't need this.
+ *
+ * Sets up a threaded interrupt handler for a device that has a dedicated
+ * wake-up interrupt in addition to the device IO interrupt. It sets
+ * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
+ * to enable dedicated wake-up interrupt after running the runtime suspend
+ * callback for @dev.
+ *
+ * The interrupt starts disabled, and needs to be managed for
+ * the device by the bus code or the device driver using
+ * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
+ * functions.
+ */
+int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
+{
+	return __dev_pm_set_dedicated_wake_irq(dev, irq, WAKE_IRQ_DEDICATED_REVERSE);
+}
+EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
+
 /**
  * dev_pm_enable_wake_irq - Enable device wake-up interrupt
  * @dev: Device
@@ -282,27 +314,54 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
 	return;
 
 enable:
-	enable_irq(wirq->irq);
+	if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
+		enable_irq(wirq->irq);
 }
 
 /**
  * dev_pm_disable_wake_irq_check - Checks and disables wake-up interrupt
  * @dev: Device
+ * @cond_disable: if set, also check WAKE_IRQ_DEDICATED_REVERSE
  *
  * Disables wake-up interrupt conditionally based on status.
  * Should be only called from rpm_suspend() and rpm_resume() path.
  */
-void dev_pm_disable_wake_irq_check(struct device *dev)
+void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
 {
 	struct wake_irq *wirq = dev->power.wakeirq;
 
 	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
 		return;
 
+	if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
+		return;
+
 	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
 		disable_irq_nosync(wirq->irq);
 }
 
+/**
+ * dev_pm_enable_wake_irq_complete - enable wake IRQ not enabled before
+ * @dev: Device using the wake IRQ
+ *
+ * Enable wake IRQ conditionally based on status, mainly used if want to
+ * enable wake IRQ after running ->runtime_suspend() which depends on
+ * WAKE_IRQ_DEDICATED_REVERSE.
+ *
+ * Should be only called from rpm_suspend() path.
+ */
+void dev_pm_enable_wake_irq_complete(struct device *dev)
+{
+	struct wake_irq *wirq = dev->power.wakeirq;
+
+	if (!wirq || !(wirq->status & WAKE_IRQ_DEDICATED_MASK))
+		return;
+
+	if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED &&
+	    wirq->status & WAKE_IRQ_DEDICATED_REVERSE)
+		enable_irq(wirq->irq);
+}
+
 /**
  * dev_pm_arm_wake_irq - Arm device wake-up
  * @wirq: Device wake-up interrupt
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index cd5b62db9084..e63a63aa47a3 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -17,8 +17,8 @@
 #ifdef CONFIG_PM
 
 extern int dev_pm_set_wake_irq(struct device *dev, int irq);
-extern int dev_pm_set_dedicated_wake_irq(struct device *dev,
-					 int irq);
+extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
+extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
 extern void dev_pm_clear_wake_irq(struct device *dev);
 extern void dev_pm_enable_wake_irq(struct device *dev);
 extern void dev_pm_disable_wake_irq(struct device *dev);
@@ -35,6 +35,11 @@ static inline int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 	return 0;
 }
 
+static inline int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
+{
+	return 0;
+}
+
 static inline void dev_pm_clear_wake_irq(struct device *dev)
 {
 }
-- 
cgit v1.2.3


From 570b1cac477643cbf01a45fa5d018430a1fddbce Mon Sep 17 00:00:00 2001
From: Xie Yongji <xieyongji@bytedance.com>
Date: Tue, 26 Oct 2021 22:40:12 +0800
Subject: block: Add a helper to validate the block size

There are some duplicated codes to validate the block
size in block drivers. This limitation actually comes
from block layer, so this patch tries to add a new block
layer helper for that.

Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
Link: https://lore.kernel.org/r/20211026144015.188-2-xieyongji@bytedance.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6d95a4b36cfa..d2d627e2c782 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -44,6 +44,14 @@ struct blk_crypto_profile;
  */
 #define BLKCG_MAX_POLS		6
 
+static inline int blk_validate_block_size(unsigned int bsize)
+{
+	if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+		return -EINVAL;
+
+	return 0;
+}
+
 static inline bool blk_op_is_passthrough(unsigned int op)
 {
 	op &= REQ_OP_MASK;
-- 
cgit v1.2.3


From 9dfc685e0262d4c5e44e13302f89841fa75173ca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 Oct 2021 14:30:14 -0700
Subject: inet: remove races in inet{6}_getname()

syzbot reported data-races in inet_getname() multiple times,
it is time we fix this instead of pretending applications
should not trigger them.

getsockname() and getpeername() are not really considered fast path.

v2: added the missing BPF_CGROUP_RUN_SA_PROG() declaration
    needed when CONFIG_CGROUP_BPF=n, as reported by
    kernel test robot <lkp@intel.com>

syzbot typical report:

BUG: KCSAN: data-race in __inet_hash_connect / inet_getname

write to 0xffff888136d66cf8 of 2 bytes by task 14374 on cpu 1:
 __inet_hash_connect+0x7ec/0x950 net/ipv4/inet_hashtables.c:831
 inet_hash_connect+0x85/0x90 net/ipv4/inet_hashtables.c:853
 tcp_v4_connect+0x782/0xbb0 net/ipv4/tcp_ipv4.c:275
 __inet_stream_connect+0x156/0x6e0 net/ipv4/af_inet.c:664
 inet_stream_connect+0x44/0x70 net/ipv4/af_inet.c:728
 __sys_connect_file net/socket.c:1896 [inline]
 __sys_connect+0x254/0x290 net/socket.c:1913
 __do_sys_connect net/socket.c:1923 [inline]
 __se_sys_connect net/socket.c:1920 [inline]
 __x64_sys_connect+0x3d/0x50 net/socket.c:1920
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xa0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888136d66cf8 of 2 bytes by task 14408 on cpu 0:
 inet_getname+0x11f/0x170 net/ipv4/af_inet.c:790
 __sys_getsockname+0x11d/0x1b0 net/socket.c:1946
 __do_sys_getsockname net/socket.c:1961 [inline]
 __se_sys_getsockname net/socket.c:1958 [inline]
 __x64_sys_getsockname+0x3e/0x50 net/socket.c:1958
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xa0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000 -> 0xdee0

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 14408 Comm: syz-executor.3 Not tainted 5.15.0-rc3-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20211026213014.3026708-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/bpf-cgroup.h |  1 +
 net/ipv4/af_inet.c         | 16 +++++++++-------
 net/ipv6/af_inet6.c        | 21 +++++++++++----------
 3 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2746fd804216..3536ab432b30 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -517,6 +517,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 
 #define cgroup_bpf_enabled(atype) (0)
 #define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, atype, t_ctx) ({ 0; })
+#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, atype) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8eb428387bac..31d5cefa9979 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -769,26 +769,28 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, uaddr);
 
 	sin->sin_family = AF_INET;
+	lock_sock(sk);
 	if (peer) {
 		if (!inet->inet_dport ||
 		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
-		     peer == 1))
+		     peer == 1)) {
+			release_sock(sk);
 			return -ENOTCONN;
+		}
 		sin->sin_port = inet->inet_dport;
 		sin->sin_addr.s_addr = inet->inet_daddr;
-		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-					    CGROUP_INET4_GETPEERNAME,
-					    NULL);
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+				       CGROUP_INET4_GETPEERNAME);
 	} else {
 		__be32 addr = inet->inet_rcv_saddr;
 		if (!addr)
 			addr = inet->inet_saddr;
 		sin->sin_port = inet->inet_sport;
 		sin->sin_addr.s_addr = addr;
-		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-					    CGROUP_INET4_GETSOCKNAME,
-					    NULL);
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+				       CGROUP_INET4_GETSOCKNAME);
 	}
+	release_sock(sk);
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	return sizeof(*sin);
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b5878bb8e419..0c4da163535a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -521,31 +521,32 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 	sin->sin6_family = AF_INET6;
 	sin->sin6_flowinfo = 0;
 	sin->sin6_scope_id = 0;
+	lock_sock(sk);
 	if (peer) {
-		if (!inet->inet_dport)
-			return -ENOTCONN;
-		if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
-		    peer == 1)
+		if (!inet->inet_dport ||
+		    (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+		    peer == 1)) {
+			release_sock(sk);
 			return -ENOTCONN;
+		}
 		sin->sin6_port = inet->inet_dport;
 		sin->sin6_addr = sk->sk_v6_daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
-		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-					    CGROUP_INET6_GETPEERNAME,
-					    NULL);
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+				       CGROUP_INET6_GETPEERNAME);
 	} else {
 		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
 			sin->sin6_addr = np->saddr;
 		else
 			sin->sin6_addr = sk->sk_v6_rcv_saddr;
 		sin->sin6_port = inet->inet_sport;
-		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
-					    CGROUP_INET6_GETSOCKNAME,
-					    NULL);
+		BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin,
+				       CGROUP_INET6_GETSOCKNAME);
 	}
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
+	release_sock(sk);
 	return sizeof(*sin);
 }
 EXPORT_SYMBOL(inet6_getname);
-- 
cgit v1.2.3


From d4dccf353db80e209f262e3973c834e6e48ba9a9 Mon Sep 17 00:00:00 2001
From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Date: Mon, 25 Oct 2021 08:21:09 -0400
Subject: Drivers: hv: vmbus: Mark vmbus ring buffer visible to host in
 Isolation VM

Mark vmbus ring buffer visible with set_memory_decrypted() when
establish gpadl handle.

Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
Link: https://lore.kernel.org/r/20211025122116.264793-5-ltykernel@gmail.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel.c            | 53 +++++++++++++++++++++++++++++------------
 drivers/net/hyperv/hyperv_net.h |  5 ++--
 drivers/net/hyperv/netvsc.c     | 15 ++++++------
 drivers/uio/uio_hv_generic.c    | 18 +++++++-------
 include/linux/hyperv.h          | 12 +++++++---
 5 files changed, 65 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index f3761c73b074..b37ff4a39224 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -17,6 +17,7 @@
 #include <linux/hyperv.h>
 #include <linux/uio.h>
 #include <linux/interrupt.h>
+#include <linux/set_memory.h>
 #include <asm/page.h>
 #include <asm/mshyperv.h>
 
@@ -456,7 +457,7 @@ nomem:
 static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 				   enum hv_gpadl_type type, void *kbuffer,
 				   u32 size, u32 send_offset,
-				   u32 *gpadl_handle)
+				   struct vmbus_gpadl *gpadl)
 {
 	struct vmbus_channel_gpadl_header *gpadlmsg;
 	struct vmbus_channel_gpadl_body *gpadl_body;
@@ -474,6 +475,15 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	if (ret)
 		return ret;
 
+	ret = set_memory_decrypted((unsigned long)kbuffer,
+				   PFN_UP(size));
+	if (ret) {
+		dev_warn(&channel->device_obj->device,
+			 "Failed to set host visibility for new GPADL %d.\n",
+			 ret);
+		return ret;
+	}
+
 	init_completion(&msginfo->waitevent);
 	msginfo->waiting_channel = channel;
 
@@ -537,7 +547,10 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
 	}
 
 	/* At this point, we received the gpadl created msg */
-	*gpadl_handle = gpadlmsg->gpadl;
+	gpadl->gpadl_handle = gpadlmsg->gpadl;
+	gpadl->buffer = kbuffer;
+	gpadl->size = size;
+
 
 cleanup:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
@@ -549,6 +562,11 @@ cleanup:
 	}
 
 	kfree(msginfo);
+
+	if (ret)
+		set_memory_encrypted((unsigned long)kbuffer,
+				     PFN_UP(size));
+
 	return ret;
 }
 
@@ -561,10 +579,10 @@ cleanup:
  * @gpadl_handle: some funky thing
  */
 int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer,
-			  u32 size, u32 *gpadl_handle)
+			  u32 size, struct vmbus_gpadl *gpadl)
 {
 	return __vmbus_establish_gpadl(channel, HV_GPADL_BUFFER, kbuffer, size,
-				       0U, gpadl_handle);
+				       0U, gpadl);
 }
 EXPORT_SYMBOL_GPL(vmbus_establish_gpadl);
 
@@ -675,7 +693,7 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
 		goto error_clean_ring;
 
 	/* Establish the gpadl for the ring buffer */
-	newchannel->ringbuffer_gpadlhandle = 0;
+	newchannel->ringbuffer_gpadlhandle.gpadl_handle = 0;
 
 	err = __vmbus_establish_gpadl(newchannel, HV_GPADL_RING,
 				      page_address(newchannel->ringbuffer_page),
@@ -701,7 +719,8 @@ static int __vmbus_open(struct vmbus_channel *newchannel,
 	open_msg->header.msgtype = CHANNELMSG_OPENCHANNEL;
 	open_msg->openid = newchannel->offermsg.child_relid;
 	open_msg->child_relid = newchannel->offermsg.child_relid;
-	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
+	open_msg->ringbuffer_gpadlhandle
+		= newchannel->ringbuffer_gpadlhandle.gpadl_handle;
 	/*
 	 * The unit of ->downstream_ringbuffer_pageoffset is HV_HYP_PAGE and
 	 * the unit of ->ringbuffer_send_offset (i.e. send_pages) is PAGE, so
@@ -759,8 +778,7 @@ error_clean_msglist:
 error_free_info:
 	kfree(open_info);
 error_free_gpadl:
-	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-	newchannel->ringbuffer_gpadlhandle = 0;
+	vmbus_teardown_gpadl(newchannel, &newchannel->ringbuffer_gpadlhandle);
 error_clean_ring:
 	hv_ringbuffer_cleanup(&newchannel->outbound);
 	hv_ringbuffer_cleanup(&newchannel->inbound);
@@ -806,7 +824,7 @@ EXPORT_SYMBOL_GPL(vmbus_open);
 /*
  * vmbus_teardown_gpadl -Teardown the specified GPADL handle
  */
-int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
+int vmbus_teardown_gpadl(struct vmbus_channel *channel, struct vmbus_gpadl *gpadl)
 {
 	struct vmbus_channel_gpadl_teardown *msg;
 	struct vmbus_channel_msginfo *info;
@@ -825,7 +843,7 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 
 	msg->header.msgtype = CHANNELMSG_GPADL_TEARDOWN;
 	msg->child_relid = channel->offermsg.child_relid;
-	msg->gpadl = gpadl_handle;
+	msg->gpadl = gpadl->gpadl_handle;
 
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_add_tail(&info->msglistentry,
@@ -845,6 +863,8 @@ int vmbus_teardown_gpadl(struct vmbus_channel *channel, u32 gpadl_handle)
 
 	wait_for_completion(&info->waitevent);
 
+	gpadl->gpadl_handle = 0;
+
 post_msg_err:
 	/*
 	 * If the channel has been rescinded;
@@ -859,6 +879,12 @@ post_msg_err:
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
 
 	kfree(info);
+
+	ret = set_memory_encrypted((unsigned long)gpadl->buffer,
+				   PFN_UP(gpadl->size));
+	if (ret)
+		pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
@@ -933,9 +959,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	}
 
 	/* Tear down the gpadl for the channel's ring buffer */
-	else if (channel->ringbuffer_gpadlhandle) {
-		ret = vmbus_teardown_gpadl(channel,
-					   channel->ringbuffer_gpadlhandle);
+	else if (channel->ringbuffer_gpadlhandle.gpadl_handle) {
+		ret = vmbus_teardown_gpadl(channel, &channel->ringbuffer_gpadlhandle);
 		if (ret) {
 			pr_err("Close failed: teardown gpadl return %d\n", ret);
 			/*
@@ -943,8 +968,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 			 * it is perhaps better to leak memory.
 			 */
 		}
-
-		channel->ringbuffer_gpadlhandle = 0;
 	}
 
 	if (!ret)
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index bc48855dff10..315278a7cf88 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -1075,14 +1075,15 @@ struct netvsc_device {
 	/* Receive buffer allocated by us but manages by NetVSP */
 	void *recv_buf;
 	u32 recv_buf_size; /* allocated bytes */
-	u32 recv_buf_gpadl_handle;
+	struct vmbus_gpadl recv_buf_gpadl_handle;
 	u32 recv_section_cnt;
 	u32 recv_section_size;
 	u32 recv_completion_cnt;
 
 	/* Send buffer allocated by us */
 	void *send_buf;
-	u32 send_buf_gpadl_handle;
+	u32 send_buf_size;
+	struct vmbus_gpadl send_buf_gpadl_handle;
 	u32 send_section_cnt;
 	u32 send_section_size;
 	unsigned long *send_section_map;
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 7bd935412853..396bc1c204e6 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -278,9 +278,9 @@ static void netvsc_teardown_recv_gpadl(struct hv_device *device,
 {
 	int ret;
 
-	if (net_device->recv_buf_gpadl_handle) {
+	if (net_device->recv_buf_gpadl_handle.gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
-					   net_device->recv_buf_gpadl_handle);
+					   &net_device->recv_buf_gpadl_handle);
 
 		/* If we failed here, we might as well return and have a leak
 		 * rather than continue and a bugchk
@@ -290,7 +290,6 @@ static void netvsc_teardown_recv_gpadl(struct hv_device *device,
 				   "unable to teardown receive buffer's gpadl\n");
 			return;
 		}
-		net_device->recv_buf_gpadl_handle = 0;
 	}
 }
 
@@ -300,9 +299,9 @@ static void netvsc_teardown_send_gpadl(struct hv_device *device,
 {
 	int ret;
 
-	if (net_device->send_buf_gpadl_handle) {
+	if (net_device->send_buf_gpadl_handle.gpadl_handle) {
 		ret = vmbus_teardown_gpadl(device->channel,
-					   net_device->send_buf_gpadl_handle);
+					   &net_device->send_buf_gpadl_handle);
 
 		/* If we failed here, we might as well return and have a leak
 		 * rather than continue and a bugchk
@@ -312,7 +311,6 @@ static void netvsc_teardown_send_gpadl(struct hv_device *device,
 				   "unable to teardown send buffer's gpadl\n");
 			return;
 		}
-		net_device->send_buf_gpadl_handle = 0;
 	}
 }
 
@@ -380,7 +378,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	memset(init_packet, 0, sizeof(struct nvsp_message));
 	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
 	init_packet->msg.v1_msg.send_recv_buf.
-		gpadl_handle = net_device->recv_buf_gpadl_handle;
+		gpadl_handle = net_device->recv_buf_gpadl_handle.gpadl_handle;
 	init_packet->msg.v1_msg.
 		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
 
@@ -463,6 +461,7 @@ static int netvsc_init_buf(struct hv_device *device,
 		ret = -ENOMEM;
 		goto cleanup;
 	}
+	net_device->send_buf_size = buf_size;
 
 	/* Establish the gpadl handle for this buffer on this
 	 * channel.  Note: This call uses the vmbus connection rather
@@ -482,7 +481,7 @@ static int netvsc_init_buf(struct hv_device *device,
 	memset(init_packet, 0, sizeof(struct nvsp_message));
 	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
 	init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
-		net_device->send_buf_gpadl_handle;
+		net_device->send_buf_gpadl_handle.gpadl_handle;
 	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
 
 	trace_nvsp_send(ndev, init_packet);
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index 652fe2547587..c08a6cfd119f 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -58,11 +58,11 @@ struct hv_uio_private_data {
 	atomic_t refcnt;
 
 	void	*recv_buf;
-	u32	recv_gpadl;
+	struct vmbus_gpadl recv_gpadl;
 	char	recv_name[32];	/* "recv_4294967295" */
 
 	void	*send_buf;
-	u32	send_gpadl;
+	struct vmbus_gpadl send_gpadl;
 	char	send_name[32];
 };
 
@@ -179,15 +179,13 @@ hv_uio_new_channel(struct vmbus_channel *new_sc)
 static void
 hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
-	if (pdata->send_gpadl) {
-		vmbus_teardown_gpadl(dev->channel, pdata->send_gpadl);
-		pdata->send_gpadl = 0;
+	if (pdata->send_gpadl.gpadl_handle) {
+		vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
 		vfree(pdata->send_buf);
 	}
 
-	if (pdata->recv_gpadl) {
-		vmbus_teardown_gpadl(dev->channel, pdata->recv_gpadl);
-		pdata->recv_gpadl = 0;
+	if (pdata->recv_gpadl.gpadl_handle) {
+		vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
 		vfree(pdata->recv_buf);
 	}
 }
@@ -303,7 +301,7 @@ hv_uio_probe(struct hv_device *dev,
 
 	/* put Global Physical Address Label in name */
 	snprintf(pdata->recv_name, sizeof(pdata->recv_name),
-		 "recv:%u", pdata->recv_gpadl);
+		 "recv:%u", pdata->recv_gpadl.gpadl_handle);
 	pdata->info.mem[RECV_BUF_MAP].name = pdata->recv_name;
 	pdata->info.mem[RECV_BUF_MAP].addr
 		= (uintptr_t)pdata->recv_buf;
@@ -324,7 +322,7 @@ hv_uio_probe(struct hv_device *dev,
 	}
 
 	snprintf(pdata->send_name, sizeof(pdata->send_name),
-		 "send:%u", pdata->send_gpadl);
+		 "send:%u", pdata->send_gpadl.gpadl_handle);
 	pdata->info.mem[SEND_BUF_MAP].name = pdata->send_name;
 	pdata->info.mem[SEND_BUF_MAP].addr
 		= (uintptr_t)pdata->send_buf;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index ddc8713ce57b..a9e0bc3b1511 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -803,6 +803,12 @@ struct vmbus_device {
 
 #define VMBUS_DEFAULT_MAX_PKT_SIZE 4096
 
+struct vmbus_gpadl {
+	u32 gpadl_handle;
+	u32 size;
+	void *buffer;
+};
+
 struct vmbus_channel {
 	struct list_head listentry;
 
@@ -822,7 +828,7 @@ struct vmbus_channel {
 	bool rescind_ref; /* got rescind msg, got channel reference */
 	struct completion rescind_event;
 
-	u32 ringbuffer_gpadlhandle;
+	struct vmbus_gpadl ringbuffer_gpadlhandle;
 
 	/* Allocated memory for ring buffer */
 	struct page *ringbuffer_page;
@@ -1192,10 +1198,10 @@ extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel,
 extern int vmbus_establish_gpadl(struct vmbus_channel *channel,
 				      void *kbuffer,
 				      u32 size,
-				      u32 *gpadl_handle);
+				      struct vmbus_gpadl *gpadl);
 
 extern int vmbus_teardown_gpadl(struct vmbus_channel *channel,
-				     u32 gpadl_handle);
+				     struct vmbus_gpadl *gpadl);
 
 void vmbus_reset_channel_cb(struct vmbus_channel *channel);
 
-- 
cgit v1.2.3


From 20cf6616ccd50256a14fb2a7a3cc730080c90cd0 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Mon, 25 Oct 2021 12:54:34 -0700
Subject: Drivers: hv: vmbus: Remove unused code to check for subchannels

The last caller of vmbus_are_subchannels_present() was removed in commit
c967590457ca ("scsi: storvsc: Fix a race in sub-channel creation that can cause panic").

Remove this dead code, and the utility function invoke_sc_cb() that it is
the only caller of.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/1635191674-34407-1-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 drivers/hv/channel_mgmt.c | 34 ----------------------------------
 include/linux/hyperv.h    | 13 -------------
 2 files changed, 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 142308526ec6..2829575fd9b7 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -1581,21 +1581,6 @@ cleanup:
 	return ret;
 }
 
-static void invoke_sc_cb(struct vmbus_channel *primary_channel)
-{
-	struct list_head *cur, *tmp;
-	struct vmbus_channel *cur_channel;
-
-	if (primary_channel->sc_creation_callback == NULL)
-		return;
-
-	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
-		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
-
-		primary_channel->sc_creation_callback(cur_channel);
-	}
-}
-
 void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
 {
@@ -1603,25 +1588,6 @@ void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 }
 EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
 
-bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
-{
-	bool ret;
-
-	ret = !list_empty(&primary->sc_list);
-
-	if (ret) {
-		/*
-		 * Invoke the callback on sub-channel creation.
-		 * This will present a uniform interface to the
-		 * clients.
-		 */
-		invoke_sc_cb(primary);
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
-
 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
 		void (*chn_rescind_cb)(struct vmbus_channel *))
 {
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a9e0bc3b1511..b823311eac79 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1106,19 +1106,6 @@ void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
 void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
 		void (*chn_rescind_cb)(struct vmbus_channel *));
 
-/*
- * Check if sub-channels have already been offerred. This API will be useful
- * when the driver is unloaded after establishing sub-channels. In this case,
- * when the driver is re-loaded, the driver would have to check if the
- * subchannels have already been established before attempting to request
- * the creation of sub-channels.
- * This function returns TRUE to indicate that subchannels have already been
- * created.
- * This function should be invoked after setting the callback function for
- * sub-channel creation.
- */
-bool vmbus_are_subchannels_present(struct vmbus_channel *primary);
-
 /* The format must be the same as struct vmdata_gpa_direct */
 struct vmbus_channel_packet_page_buffer {
 	u16 type;
-- 
cgit v1.2.3


From 9330986c03006ab1d33d243b7cfe598a7a3c1baa Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannekoong@fb.com>
Date: Wed, 27 Oct 2021 16:45:00 -0700
Subject: bpf: Add bloom filter map implementation

This patch adds the kernel-side changes for the implementation of
a bpf bloom filter map.

The bloom filter map supports peek (determining whether an element
is present in the map) and push (adding an element to the map)
operations.These operations are exposed to userspace applications
through the already existing syscalls in the following way:

BPF_MAP_LOOKUP_ELEM -> peek
BPF_MAP_UPDATE_ELEM -> push

The bloom filter map does not have keys, only values. In light of
this, the bloom filter map's API matches that of queue stack maps:
user applications use BPF_MAP_LOOKUP_ELEM/BPF_MAP_UPDATE_ELEM
which correspond internally to bpf_map_peek_elem/bpf_map_push_elem,
and bpf programs must use the bpf_map_peek_elem and bpf_map_push_elem
APIs to query or add an element to the bloom filter map. When the
bloom filter map is created, it must be created with a key_size of 0.

For updates, the user will pass in the element to add to the map
as the value, with a NULL key. For lookups, the user will pass in the
element to query in the map as the value, with a NULL key. In the
verifier layer, this requires us to modify the argument type of
a bloom filter's BPF_FUNC_map_peek_elem call to ARG_PTR_TO_MAP_VALUE;
as well, in the syscall layer, we need to copy over the user value
so that in bpf_map_peek_elem, we know which specific value to query.

A few things to please take note of:
 * If there are any concurrent lookups + updates, the user is
responsible for synchronizing this to ensure no false negative lookups
occur.
 * The number of hashes to use for the bloom filter is configurable from
userspace. If no number is specified, the default used will be 5 hash
functions. The benchmarks later in this patchset can help compare the
performance of using different number of hashes on different entry
sizes. In general, using more hashes decreases both the false positive
rate and the speed of a lookup.
 * Deleting an element in the bloom filter map is not supported.
 * The bloom filter map may be used as an inner map.
 * The "max_entries" size that is specified at map creation time is used
to approximate a reasonable bitmap size for the bloom filter, and is not
otherwise strictly enforced. If the user wishes to insert more entries
into the bloom filter than "max_entries", they may do so but they should
be aware that this may lead to a higher false positive rate.

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20211027234504.30744-2-joannekoong@fb.com
---
 include/linux/bpf.h            |   1 +
 include/linux/bpf_types.h      |   1 +
 include/uapi/linux/bpf.h       |   9 ++
 kernel/bpf/Makefile            |   2 +-
 kernel/bpf/bloom_filter.c      | 195 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  24 ++++-
 kernel/bpf/verifier.c          |  19 +++-
 tools/include/uapi/linux/bpf.h |   9 ++
 8 files changed, 253 insertions(+), 7 deletions(-)
 create mode 100644 kernel/bpf/bloom_filter.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 31421c74ba08..50105e0b8fcc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -169,6 +169,7 @@ struct bpf_map {
 	u32 value_size;
 	u32 max_entries;
 	u32 map_flags;
+	u64 map_extra; /* any per-map-type extra fields */
 	int spin_lock_off; /* >=0 valid offset, <0 error */
 	int timer_off; /* >=0 valid offset, <0 error */
 	u32 id;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 9c81724e4b98..c4424ac2fa02 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c10820037883..8bead4aa3ad0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_BLOOM_FILTER,
 };
 
 /* Note that tracing related programs such as
@@ -1274,6 +1275,13 @@ union bpf_attr {
 						   * struct stored as the
 						   * map value
 						   */
+		/* Any per-map-type extra fields
+		 *
+		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
+		 * number of hash functions (if 0, the bloom filter will default
+		 * to using 5 hash functions).
+		 */
+		__u64	map_extra;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -5638,6 +5646,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u64 map_extra;
 } __attribute__((aligned(8)));
 
 struct bpf_btf_info {
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 7f33098ca63f..cf6ca339f3cd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -7,7 +7,7 @@ endif
 CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
 obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
new file mode 100644
index 000000000000..7c50232b7571
--- /dev/null
+++ b/kernel/bpf/bloom_filter.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bitmap.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
+
+#define BLOOM_CREATE_FLAG_MASK \
+	(BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK)
+
+struct bpf_bloom_filter {
+	struct bpf_map map;
+	u32 bitset_mask;
+	u32 hash_seed;
+	/* If the size of the values in the bloom filter is u32 aligned,
+	 * then it is more performant to use jhash2 as the underlying hash
+	 * function, else we use jhash. This tracks the number of u32s
+	 * in an u32-aligned value size. If the value size is not u32 aligned,
+	 * this will be 0.
+	 */
+	u32 aligned_u32_count;
+	u32 nr_hash_funcs;
+	unsigned long bitset[];
+};
+
+static u32 hash(struct bpf_bloom_filter *bloom, void *value,
+		u32 value_size, u32 index)
+{
+	u32 h;
+
+	if (bloom->aligned_u32_count)
+		h = jhash2(value, bloom->aligned_u32_count,
+			   bloom->hash_seed + index);
+	else
+		h = jhash(value, value_size, bloom->hash_seed + index);
+
+	return h & bloom->bitset_mask;
+}
+
+static int peek_elem(struct bpf_map *map, void *value)
+{
+	struct bpf_bloom_filter *bloom =
+		container_of(map, struct bpf_bloom_filter, map);
+	u32 i, h;
+
+	for (i = 0; i < bloom->nr_hash_funcs; i++) {
+		h = hash(bloom, value, map->value_size, i);
+		if (!test_bit(h, bloom->bitset))
+			return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int push_elem(struct bpf_map *map, void *value, u64 flags)
+{
+	struct bpf_bloom_filter *bloom =
+		container_of(map, struct bpf_bloom_filter, map);
+	u32 i, h;
+
+	if (flags != BPF_ANY)
+		return -EINVAL;
+
+	for (i = 0; i < bloom->nr_hash_funcs; i++) {
+		h = hash(bloom, value, map->value_size, i);
+		set_bit(h, bloom->bitset);
+	}
+
+	return 0;
+}
+
+static int pop_elem(struct bpf_map *map, void *value)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct bpf_map *map_alloc(union bpf_attr *attr)
+{
+	u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
+	int numa_node = bpf_map_attr_numa_node(attr);
+	struct bpf_bloom_filter *bloom;
+
+	if (!bpf_capable())
+		return ERR_PTR(-EPERM);
+
+	if (attr->key_size != 0 || attr->value_size == 0 ||
+	    attr->max_entries == 0 ||
+	    attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||
+	    !bpf_map_flags_access_ok(attr->map_flags) ||
+	    (attr->map_extra & ~0xF))
+		return ERR_PTR(-EINVAL);
+
+	/* The lower 4 bits of map_extra specify the number of hash functions */
+	nr_hash_funcs = attr->map_extra & 0xF;
+	if (nr_hash_funcs == 0)
+		/* Default to using 5 hash functions if unspecified */
+		nr_hash_funcs = 5;
+
+	/* For the bloom filter, the optimal bit array size that minimizes the
+	 * false positive probability is n * k / ln(2) where n is the number of
+	 * expected entries in the bloom filter and k is the number of hash
+	 * functions. We use 7 / 5 to approximate 1 / ln(2).
+	 *
+	 * We round this up to the nearest power of two to enable more efficient
+	 * hashing using bitmasks. The bitmask will be the bit array size - 1.
+	 *
+	 * If this overflows a u32, the bit array size will have 2^32 (4
+	 * GB) bits.
+	 */
+	if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) ||
+	    check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) ||
+	    nr_bits > (1UL << 31)) {
+		/* The bit array size is 2^32 bits but to avoid overflowing the
+		 * u32, we use U32_MAX, which will round up to the equivalent
+		 * number of bytes
+		 */
+		bitset_bytes = BITS_TO_BYTES(U32_MAX);
+		bitset_mask = U32_MAX;
+	} else {
+		if (nr_bits <= BITS_PER_LONG)
+			nr_bits = BITS_PER_LONG;
+		else
+			nr_bits = roundup_pow_of_two(nr_bits);
+		bitset_bytes = BITS_TO_BYTES(nr_bits);
+		bitset_mask = nr_bits - 1;
+	}
+
+	bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long));
+	bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node);
+
+	if (!bloom)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&bloom->map, attr);
+
+	bloom->nr_hash_funcs = nr_hash_funcs;
+	bloom->bitset_mask = bitset_mask;
+
+	/* Check whether the value size is u32-aligned */
+	if ((attr->value_size & (sizeof(u32) - 1)) == 0)
+		bloom->aligned_u32_count =
+			attr->value_size / sizeof(u32);
+
+	if (!(attr->map_flags & BPF_F_ZERO_SEED))
+		bloom->hash_seed = get_random_int();
+
+	return &bloom->map;
+}
+
+static void map_free(struct bpf_map *map)
+{
+	struct bpf_bloom_filter *bloom =
+		container_of(map, struct bpf_bloom_filter, map);
+
+	bpf_map_area_free(bloom);
+}
+
+static void *lookup_elem(struct bpf_map *map, void *key)
+{
+	/* The eBPF program should use map_peek_elem instead */
+	return ERR_PTR(-EINVAL);
+}
+
+static int update_elem(struct bpf_map *map, void *key,
+		       void *value, u64 flags)
+{
+	/* The eBPF program should use map_push_elem instead */
+	return -EINVAL;
+}
+
+static int check_btf(const struct bpf_map *map, const struct btf *btf,
+		     const struct btf_type *key_type,
+		     const struct btf_type *value_type)
+{
+	/* Bloom filter maps are keyless */
+	return btf_type_is_void(key_type) ? 0 : -EINVAL;
+}
+
+static int bpf_bloom_btf_id;
+const struct bpf_map_ops bloom_filter_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc = map_alloc,
+	.map_free = map_free,
+	.map_push_elem = push_elem,
+	.map_peek_elem = peek_elem,
+	.map_pop_elem = pop_elem,
+	.map_lookup_elem = lookup_elem,
+	.map_update_elem = update_elem,
+	.map_check_btf = check_btf,
+	.map_btf_name = "bpf_bloom_filter",
+	.map_btf_id = &bpf_bloom_btf_id,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3e1c024ce3ed..f7c2c6354add 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -199,7 +199,8 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
 							 flags);
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-		   map->map_type == BPF_MAP_TYPE_STACK) {
+		   map->map_type == BPF_MAP_TYPE_STACK ||
+		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
 		err = map->ops->map_push_elem(map, value, flags);
 	} else {
 		rcu_read_lock();
@@ -238,7 +239,8 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
-		   map->map_type == BPF_MAP_TYPE_STACK) {
+		   map->map_type == BPF_MAP_TYPE_STACK ||
+		   map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
 		err = map->ops->map_peek_elem(map, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
 		/* struct_ops map requires directly updating "value" */
@@ -348,6 +350,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr)
 	map->max_entries = attr->max_entries;
 	map->map_flags = bpf_map_flags_retain_permanent(attr->map_flags);
 	map->numa_node = bpf_map_attr_numa_node(attr);
+	map->map_extra = attr->map_extra;
 }
 
 static int bpf_map_alloc_id(struct bpf_map *map)
@@ -553,6 +556,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "value_size:\t%u\n"
 		   "max_entries:\t%u\n"
 		   "map_flags:\t%#x\n"
+		   "map_extra:\t%#llx\n"
 		   "memlock:\t%lu\n"
 		   "map_id:\t%u\n"
 		   "frozen:\t%u\n",
@@ -561,6 +565,7 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 		   map->value_size,
 		   map->max_entries,
 		   map->map_flags,
+		   (unsigned long long)map->map_extra,
 		   bpf_map_memory_footprint(map),
 		   map->id,
 		   READ_ONCE(map->frozen));
@@ -810,7 +815,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	return ret;
 }
 
-#define BPF_MAP_CREATE_LAST_FIELD btf_vmlinux_value_type_id
+#define BPF_MAP_CREATE_LAST_FIELD map_extra
 /* called via syscall */
 static int map_create(union bpf_attr *attr)
 {
@@ -831,6 +836,10 @@ static int map_create(union bpf_attr *attr)
 		return -EINVAL;
 	}
 
+	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
+	    attr->map_extra != 0)
+		return -EINVAL;
+
 	f_flags = bpf_get_file_flag(attr->map_flags);
 	if (f_flags < 0)
 		return f_flags;
@@ -1080,6 +1089,14 @@ static int map_lookup_elem(union bpf_attr *attr)
 	if (!value)
 		goto free_key;
 
+	if (map->map_type == BPF_MAP_TYPE_BLOOM_FILTER) {
+		if (copy_from_user(value, uvalue, value_size))
+			err = -EFAULT;
+		else
+			err = bpf_map_copy_value(map, key, value, attr->flags);
+		goto free_value;
+	}
+
 	err = bpf_map_copy_value(map, key, value, attr->flags);
 	if (err)
 		goto free_value;
@@ -3881,6 +3898,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
 	info.value_size = map->value_size;
 	info.max_entries = map->max_entries;
 	info.map_flags = map->map_flags;
+	info.map_extra = map->map_extra;
 	memcpy(info.name, map->name, sizeof(map->name));
 
 	if (map->btf) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c6616e325803..3c8aa7df1773 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5002,7 +5002,10 @@ static int resolve_map_arg_type(struct bpf_verifier_env *env,
 			return -EINVAL;
 		}
 		break;
-
+	case BPF_MAP_TYPE_BLOOM_FILTER:
+		if (meta->func_id == BPF_FUNC_map_peek_elem)
+			*arg_type = ARG_PTR_TO_MAP_VALUE;
+		break;
 	default:
 		break;
 	}
@@ -5577,6 +5580,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_task_storage_delete)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_BLOOM_FILTER:
+		if (func_id != BPF_FUNC_map_peek_elem &&
+		    func_id != BPF_FUNC_map_push_elem)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -5644,13 +5652,18 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
 			goto error;
 		break;
-	case BPF_FUNC_map_peek_elem:
 	case BPF_FUNC_map_pop_elem:
-	case BPF_FUNC_map_push_elem:
 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
 		    map->map_type != BPF_MAP_TYPE_STACK)
 			goto error;
 		break;
+	case BPF_FUNC_map_peek_elem:
+	case BPF_FUNC_map_push_elem:
+		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+		    map->map_type != BPF_MAP_TYPE_STACK &&
+		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
+			goto error;
+		break;
 	case BPF_FUNC_sk_storage_get:
 	case BPF_FUNC_sk_storage_delete:
 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c10820037883..8bead4aa3ad0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -906,6 +906,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_RINGBUF,
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
+	BPF_MAP_TYPE_BLOOM_FILTER,
 };
 
 /* Note that tracing related programs such as
@@ -1274,6 +1275,13 @@ union bpf_attr {
 						   * struct stored as the
 						   * map value
 						   */
+		/* Any per-map-type extra fields
+		 *
+		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
+		 * number of hash functions (if 0, the bloom filter will default
+		 * to using 5 hash functions).
+		 */
+		__u64	map_extra;
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -5638,6 +5646,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u64 map_extra;
 } __attribute__((aligned(8)));
 
 struct bpf_btf_info {
-- 
cgit v1.2.3


From d6aef08a872b9e23eecc92d0e92393473b13c497 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Thu, 28 Oct 2021 12:04:54 +0530
Subject: bpf: Add bpf_kallsyms_lookup_name helper

This helper allows us to get the address of a kernel symbol from inside
a BPF_PROG_TYPE_SYSCALL prog (used by gen_loader), so that we can
relocate typeless ksym vars.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211028063501.2239335-2-memxor@gmail.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 16 ++++++++++++++++
 kernel/bpf/syscall.c           | 27 +++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 16 ++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50105e0b8fcc..6deebf8bf78f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2110,6 +2110,7 @@ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
 extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto;
 extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
 extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
+extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8bead4aa3ad0..bd0c9f0487f6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4923,6 +4923,21 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res)
+ *	Description
+ *		Get the address of a kernel symbol, returned in *res*. *res* is
+ *		set to 0 if the symbol is not found.
+ *	Return
+ *		On success, zero. On error, a negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ *		**-ENOENT** if symbol is not found.
+ *
+ *		**-EPERM** if caller does not have permission to obtain kernel address.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5104,6 +5119,7 @@ union bpf_attr {
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
 	FN(skc_to_unix_sock),		\
+	FN(kallsyms_lookup_name),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f7c2c6354add..e12a217ead34 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4781,6 +4781,31 @@ static const struct bpf_func_proto bpf_sys_close_proto = {
 	.arg1_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_kallsyms_lookup_name, const char *, name, int, name_sz, int, flags, u64 *, res)
+{
+	if (flags)
+		return -EINVAL;
+
+	if (name_sz <= 1 || name[name_sz - 1])
+		return -EINVAL;
+
+	if (!bpf_dump_raw_ok(current_cred()))
+		return -EPERM;
+
+	*res = kallsyms_lookup_name(name);
+	return *res ? 0 : -ENOENT;
+}
+
+const struct bpf_func_proto bpf_kallsyms_lookup_name_proto = {
+	.func		= bpf_kallsyms_lookup_name,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_LONG,
+};
+
 static const struct bpf_func_proto *
 syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -4791,6 +4816,8 @@ syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_btf_find_by_name_kind_proto;
 	case BPF_FUNC_sys_close:
 		return &bpf_sys_close_proto;
+	case BPF_FUNC_kallsyms_lookup_name:
+		return &bpf_kallsyms_lookup_name_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8bead4aa3ad0..bd0c9f0487f6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4923,6 +4923,21 @@ union bpf_attr {
  *		Dynamically cast a *sk* pointer to a *unix_sock* pointer.
  *	Return
  *		*sk* if casting is valid, or **NULL** otherwise.
+ *
+ * long bpf_kallsyms_lookup_name(const char *name, int name_sz, int flags, u64 *res)
+ *	Description
+ *		Get the address of a kernel symbol, returned in *res*. *res* is
+ *		set to 0 if the symbol is not found.
+ *	Return
+ *		On success, zero. On error, a negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-EINVAL** if string *name* is not the same size as *name_sz*.
+ *
+ *		**-ENOENT** if symbol is not found.
+ *
+ *		**-EPERM** if caller does not have permission to obtain kernel address.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5104,6 +5119,7 @@ union bpf_attr {
 	FN(get_branch_snapshot),	\
 	FN(trace_vprintk),		\
 	FN(skc_to_unix_sock),		\
+	FN(kallsyms_lookup_name),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From eac96c3efdb593df1a57bb5b95dbe037bfa9a522 Mon Sep 17 00:00:00 2001
From: Yang Shi <shy828301@gmail.com>
Date: Thu, 28 Oct 2021 14:36:11 -0700
Subject: mm: filemap: check if THP has hwpoisoned subpage for PMD page fault

When handling shmem page fault the THP with corrupted subpage could be
PMD mapped if certain conditions are satisfied.  But kernel is supposed
to send SIGBUS when trying to map hwpoisoned page.

There are two paths which may do PMD map: fault around and regular
fault.

Before commit f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault()
codepaths") the thing was even worse in fault around path.  The THP
could be PMD mapped as long as the VMA fits regardless what subpage is
accessed and corrupted.  After this commit as long as head page is not
corrupted the THP could be PMD mapped.

In the regular fault path the THP could be PMD mapped as long as the
corrupted page is not accessed and the VMA fits.

This loophole could be fixed by iterating every subpage to check if any
of them is hwpoisoned or not, but it is somewhat costly in page fault
path.

So introduce a new page flag called HasHWPoisoned on the first tail
page.  It indicates the THP has hwpoisoned subpage(s).  It is set if any
subpage of THP is found hwpoisoned by memory failure and after the
refcount is bumped successfully, then cleared when the THP is freed or
split.

The soft offline path doesn't need this since soft offline handler just
marks a subpage hwpoisoned when the subpage is migrated successfully.
But shmem THP didn't get split then migrated at all.

Link: https://lkml.kernel.org/r/20211020210755.23964-3-shy828301@gmail.com
Fixes: 800d8c63b2e9 ("shmem: add huge pages support")
Signed-off-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 23 +++++++++++++++++++++++
 mm/huge_memory.c           |  2 ++
 mm/memory-failure.c        | 14 ++++++++++++++
 mm/memory.c                |  9 +++++++++
 mm/page_alloc.c            |  4 +++-
 5 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index a558d67ee86f..fbfd3fad48f2 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -171,6 +171,15 @@ enum pageflags {
 	/* Compound pages. Stored in first tail page's flags */
 	PG_double_map = PG_workingset,
 
+#ifdef CONFIG_MEMORY_FAILURE
+	/*
+	 * Compound pages. Stored in first tail page's flags.
+	 * Indicates that at least one subpage is hwpoisoned in the
+	 * THP.
+	 */
+	PG_has_hwpoisoned = PG_mappedtodisk,
+#endif
+
 	/* non-lru isolated movable page */
 	PG_isolated = PG_reclaim,
 
@@ -668,6 +677,20 @@ PAGEFLAG_FALSE(DoubleMap)
 	TESTSCFLAG_FALSE(DoubleMap)
 #endif
 
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+/*
+ * PageHasHWPoisoned indicates that at least one subpage is hwpoisoned in the
+ * compound page.
+ *
+ * This flag is set by hwpoison handler.  Cleared by THP split or free page.
+ */
+PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+	TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
+#else
+PAGEFLAG_FALSE(HasHWPoisoned)
+	TESTSCFLAG_FALSE(HasHWPoisoned)
+#endif
+
 /*
  * Check if a page is currently marked HWPoisoned. Note that this check is
  * best effort only and inherently racy: there is no way to synchronize with
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 92192cb086c7..c5142d237e48 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2426,6 +2426,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 	/* lock lru list/PageCompound, ref frozen by page_ref_freeze */
 	lruvec = lock_page_lruvec(head);
 
+	ClearPageHasHWPoisoned(head);
+
 	for (i = nr - 1; i >= 1; i--) {
 		__split_huge_page_tail(head, i, lruvec, list);
 		/* Some pages can be beyond EOF: drop them from page cache */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 73f68699e7ab..bdbbb32211a5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1694,6 +1694,20 @@ try_again:
 	}
 
 	if (PageTransHuge(hpage)) {
+		/*
+		 * The flag must be set after the refcount is bumped
+		 * otherwise it may race with THP split.
+		 * And the flag can't be set in get_hwpoison_page() since
+		 * it is called by soft offline too and it is just called
+		 * for !MF_COUNT_INCREASE.  So here seems to be the best
+		 * place.
+		 *
+		 * Don't need care about the above error handling paths for
+		 * get_hwpoison_page() since they handle either free page
+		 * or unhandlable page.  The refcount is bumped iff the
+		 * page is a valid handlable page.
+		 */
+		SetPageHasHWPoisoned(hpage);
 		if (try_to_split_thp_page(p, "Memory Failure") < 0) {
 			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
 			res = -EBUSY;
diff --git a/mm/memory.c b/mm/memory.c
index adf9b9ef8277..c52be6d6b605 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3906,6 +3906,15 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
 	if (compound_order(page) != HPAGE_PMD_ORDER)
 		return ret;
 
+	/*
+	 * Just backoff if any subpage of a THP is corrupted otherwise
+	 * the corrupted page may mapped by PMD silently to escape the
+	 * check.  This kind of THP just can be PTE mapped.  Access to
+	 * the corrupted subpage should trigger SIGBUS as expected.
+	 */
+	if (unlikely(PageHasHWPoisoned(page)))
+		return ret;
+
 	/*
 	 * Archs like ppc64 need additional space to store information
 	 * related to pte entry. Use the preallocated table for that.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3ec39552d00f..23d3339ac4e8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1312,8 +1312,10 @@ static __always_inline bool free_pages_prepare(struct page *page,
 
 		VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
 
-		if (compound)
+		if (compound) {
 			ClearPageDoubleMap(page);
+			ClearPageHasHWPoisoned(page);
+		}
 		for (i = 1; i < (1 << order); i++) {
 			if (compound)
 				bad += free_tail_pages_check(page, page + i);
-- 
cgit v1.2.3


From 78476d315e190533757ab894255c4f2c2f254bce Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Fri, 29 Oct 2021 11:01:44 +0800
Subject: mctp: Add flow extension to skb

This change adds a new skb extension for MCTP, to represent a
request/response flow.

The intention is to use this in a later change to allow i2c controllers
to correctly configure a multiplexer over a flow.

Since we have a cleanup function in the core path (if an extension is
present), we'll need to make CONFIG_MCTP a bool, rather than a tristate.

Includes a fix for a build warning with clang:
Reported-by: kernel test robot <lkp@intel.com>

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 +++
 include/net/mctp.h     |  7 +++++++
 net/core/skbuff.c      | 19 +++++++++++++++++++
 net/mctp/Kconfig       |  7 ++++++-
 4 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cb96f1e6460c..0bd6520329f6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4243,6 +4243,9 @@ enum skb_ext_id {
 #endif
 #if IS_ENABLED(CONFIG_MPTCP)
 	SKB_EXT_MPTCP,
+#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+	SKB_EXT_MCTP,
 #endif
 	SKB_EXT_NUM, /* must be last */
 };
diff --git a/include/net/mctp.h b/include/net/mctp.h
index 23bec708f4c7..7a5ba801703c 100644
--- a/include/net/mctp.h
+++ b/include/net/mctp.h
@@ -189,6 +189,13 @@ static inline struct mctp_skb_cb *mctp_cb(struct sk_buff *skb)
 	return (void *)(skb->cb);
 }
 
+/* If CONFIG_MCTP_FLOWS, we may add one of these as a SKB extension,
+ * indicating the flow to the device driver.
+ */
+struct mctp_flow {
+	struct mctp_sk_key *key;
+};
+
 /* Route definition.
  *
  * These are held in the pernet->mctp.routes list, with RCU protection for
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 09b8cf8ab234..67a9188d8a49 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -70,6 +70,7 @@
 #include <net/xfrm.h>
 #include <net/mpls.h>
 #include <net/mptcp.h>
+#include <net/mctp.h>
 #include <net/page_pool.h>
 
 #include <linux/uaccess.h>
@@ -4440,6 +4441,9 @@ static const u8 skb_ext_type_len[] = {
 #if IS_ENABLED(CONFIG_MPTCP)
 	[SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
 #endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+	[SKB_EXT_MCTP] = SKB_EXT_CHUNKSIZEOF(struct mctp_flow),
+#endif
 };
 
 static __always_inline unsigned int skb_ext_total_length(void)
@@ -4456,6 +4460,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
 #endif
 #if IS_ENABLED(CONFIG_MPTCP)
 		skb_ext_type_len[SKB_EXT_MPTCP] +
+#endif
+#if IS_ENABLED(CONFIG_MCTP_FLOWS)
+		skb_ext_type_len[SKB_EXT_MCTP] +
 #endif
 		0;
 }
@@ -6529,6 +6536,14 @@ static void skb_ext_put_sp(struct sec_path *sp)
 }
 #endif
 
+#ifdef CONFIG_MCTP_FLOWS
+static void skb_ext_put_mctp(struct mctp_flow *flow)
+{
+	if (flow->key)
+		mctp_key_unref(flow->key);
+}
+#endif
+
 void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
 {
 	struct skb_ext *ext = skb->extensions;
@@ -6564,6 +6579,10 @@ free_now:
 	if (__skb_ext_exist(ext, SKB_EXT_SEC_PATH))
 		skb_ext_put_sp(skb_ext_get_ptr(ext, SKB_EXT_SEC_PATH));
 #endif
+#ifdef CONFIG_MCTP_FLOWS
+	if (__skb_ext_exist(ext, SKB_EXT_MCTP))
+		skb_ext_put_mctp(skb_ext_get_ptr(ext, SKB_EXT_MCTP));
+#endif
 
 	kmem_cache_free(skbuff_ext_cache, ext);
 }
diff --git a/net/mctp/Kconfig b/net/mctp/Kconfig
index 868c92272cbd..3a5c0e70da77 100644
--- a/net/mctp/Kconfig
+++ b/net/mctp/Kconfig
@@ -1,7 +1,7 @@
 
 menuconfig MCTP
 	depends on NET
-	tristate "MCTP core protocol support"
+	bool "MCTP core protocol support"
 	help
 	  Management Component Transport Protocol (MCTP) is an in-system
 	  protocol for communicating between management controllers and
@@ -16,3 +16,8 @@ config MCTP_TEST
         bool "MCTP core tests" if !KUNIT_ALL_TESTS
         depends on MCTP=y && KUNIT=y
         default KUNIT_ALL_TESTS
+
+config MCTP_FLOWS
+	bool
+	depends on MCTP
+	select SKB_EXTENSIONS
-- 
cgit v1.2.3


From 0bf6d96cb8294094ce1e44cbe8cf65b0899d0a3a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 25 Oct 2021 09:05:07 +0200
Subject: block: remove blk_{get,put}_request

These are now pointless wrappers around blk_mq_{alloc,free}_request,
so remove them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20211025070517.1548584-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c                   | 21 ---------------------
 drivers/block/paride/pd.c          |  4 ++--
 drivers/block/pktcdvd.c            |  2 +-
 drivers/block/virtio_blk.c         |  4 ++--
 drivers/md/dm-mpath.c              |  4 ++--
 drivers/mmc/core/block.c           | 20 ++++++++++----------
 drivers/scsi/scsi_bsg.c            |  2 +-
 drivers/scsi/scsi_error.c          |  2 +-
 drivers/scsi/scsi_ioctl.c          |  4 ++--
 drivers/scsi/scsi_lib.c            |  4 ++--
 drivers/scsi/sg.c                  |  6 +++---
 drivers/scsi/sr.c                  |  2 +-
 drivers/scsi/st.c                  |  4 ++--
 drivers/scsi/ufs/ufshcd.c          | 20 ++++++++++----------
 drivers/scsi/ufs/ufshpb.c          |  8 ++++----
 drivers/target/target_core_pscsi.c |  4 ++--
 include/linux/blk-mq.h             |  3 ---
 17 files changed, 45 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 5ffe05b1d17c..ac1de7d73a45 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -597,27 +597,6 @@ bool blk_get_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_get_queue);
 
-/**
- * blk_get_request - allocate a request
- * @q: request queue to allocate a request for
- * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC.
- * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT.
- */
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-				blk_mq_req_flags_t flags)
-{
-	WARN_ON_ONCE(op & REQ_NOWAIT);
-	WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
-	return blk_mq_alloc_request(q, op, flags);
-}
-EXPORT_SYMBOL(blk_get_request);
-
-void blk_put_request(struct request *req)
-{
-	blk_mq_free_request(req);
-}
-EXPORT_SYMBOL(blk_put_request);
-
 static void handle_bad_sector(struct bio *bio, sector_t maxsector)
 {
 	char b[BDEVNAME_SIZE];
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index 675327df6aff..9cd0bd509b88 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -775,14 +775,14 @@ static int pd_special_command(struct pd_unit *disk,
 	struct request *rq;
 	struct pd_req *req;
 
-	rq = blk_get_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
+	rq = blk_mq_alloc_request(disk->gd->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 	req = blk_mq_rq_to_pdu(rq);
 
 	req->func = func;
 	blk_execute_rq(disk->gd, rq, 0);
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 	return 0;
 }
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index cacf64eedad8..40e7a45e3347 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -726,7 +726,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *
 	if (scsi_req(rq)->result)
 		ret = -EIO;
 out:
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 	return ret;
 }
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 303caf2d17d0..f81a768943e1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -312,7 +312,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 	struct request *req;
 	int err;
 
-	req = blk_get_request(q, REQ_OP_DRV_IN, 0);
+	req = blk_mq_alloc_request(q, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -323,7 +323,7 @@ static int virtblk_get_id(struct gendisk *disk, char *id_str)
 	blk_execute_rq(vblk->disk, req, false);
 	err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 out:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 	return err;
 }
 
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 694aaca4eea2..510f6c3ab98d 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -530,7 +530,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 
 	bdev = pgpath->path.dev->bdev;
 	q = bdev_get_queue(bdev);
-	clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
+	clone = blk_mq_alloc_request(q, rq->cmd_flags | REQ_NOMERGE,
 			BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(clone)) {
 		/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
@@ -579,7 +579,7 @@ static void multipath_release_clone(struct request *clone,
 						    clone->io_start_time_ns);
 	}
 
-	blk_put_request(clone);
+	blk_mq_free_request(clone);
 }
 
 /*
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 431af5e8be2f..74882fa0f86d 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -258,7 +258,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
 	mq = &md->queue;
 
 	/* Dispatch locking to the block layer */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_OUT, 0);
+	req = blk_mq_alloc_request(mq->queue, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req)) {
 		count = PTR_ERR(req);
 		goto out_put;
@@ -266,7 +266,7 @@ static ssize_t power_ro_lock_store(struct device *dev,
 	req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_BOOT_WP;
 	blk_execute_rq(NULL, req, 0);
 	ret = req_to_mmc_queue_req(req)->drv_op_result;
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 	if (!ret) {
 		pr_info("%s: Locking boot partition ro until next power on\n",
@@ -646,7 +646,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
 	 * Dispatch the ioctl() into the block request queue.
 	 */
 	mq = &md->queue;
-	req = blk_get_request(mq->queue,
+	req = blk_mq_alloc_request(mq->queue,
 		idata->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -660,7 +660,7 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md,
 	blk_execute_rq(NULL, req, 0);
 	ioc_err = req_to_mmc_queue_req(req)->drv_op_result;
 	err = mmc_blk_ioctl_copy_to_user(ic_ptr, idata);
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 cmd_done:
 	kfree(idata->buf);
@@ -716,7 +716,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
 	 * Dispatch the ioctl()s into the block request queue.
 	 */
 	mq = &md->queue;
-	req = blk_get_request(mq->queue,
+	req = blk_mq_alloc_request(mq->queue,
 		idata[0]->ic.write_flag ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -733,7 +733,7 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md,
 	for (i = 0; i < num_of_cmds && !err; i++)
 		err = mmc_blk_ioctl_copy_to_user(&cmds[i], idata[i]);
 
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 cmd_err:
 	for (i = 0; i < num_of_cmds; i++) {
@@ -2730,7 +2730,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
 	int ret;
 
 	/* Ask the block layer about the card status */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
+	req = blk_mq_alloc_request(mq->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_GET_CARD_STATUS;
@@ -2740,7 +2740,7 @@ static int mmc_dbg_card_status_get(void *data, u64 *val)
 		*val = ret;
 		ret = 0;
 	}
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 	return ret;
 }
@@ -2766,7 +2766,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
 		return -ENOMEM;
 
 	/* Ask the block layer for the EXT CSD */
-	req = blk_get_request(mq->queue, REQ_OP_DRV_IN, 0);
+	req = blk_mq_alloc_request(mq->queue, REQ_OP_DRV_IN, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out_free;
@@ -2775,7 +2775,7 @@ static int mmc_ext_csd_open(struct inode *inode, struct file *filp)
 	req_to_mmc_queue_req(req)->drv_op_data = &ext_csd;
 	blk_execute_rq(NULL, req, 0);
 	err = req_to_mmc_queue_req(req)->drv_op_result;
-	blk_put_request(req);
+	blk_mq_free_request(req);
 	if (err) {
 		pr_err("FAILED %d\n", err);
 		goto out_free;
diff --git a/drivers/scsi/scsi_bsg.c b/drivers/scsi/scsi_bsg.c
index 551727a6f694..081b84bb7985 100644
--- a/drivers/scsi/scsi_bsg.c
+++ b/drivers/scsi/scsi_bsg.c
@@ -95,7 +95,7 @@ static int scsi_bsg_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr,
 out_free_cmd:
 	scsi_req_free_cmd(scsi_req(rq));
 out_put_request:
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 	return ret;
 }
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 71d027b94be4..36870b41c888 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1979,7 +1979,7 @@ maybe_retry:
 
 static void eh_lock_door_done(struct request *req, blk_status_t status)
 {
-	blk_put_request(req);
+	blk_mq_free_request(req);
 }
 
 /**
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 0078975e3c07..34412eac4566 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -490,7 +490,7 @@ static int sg_io(struct scsi_device *sdev, struct gendisk *disk,
 out_free_cdb:
 	scsi_req_free_cmd(req);
 out_put_request:
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 	return ret;
 }
 
@@ -634,7 +634,7 @@ static int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk,
 	}
 
 error:
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 
 error_free_buffer:
 	kfree(buffer);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9823b65d1536..9c2b99e12ce3 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -260,7 +260,7 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 		scsi_normalize_sense(rq->sense, rq->sense_len, sshdr);
 	ret = rq->result;
  out:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 	return ret;
 }
@@ -1100,7 +1100,7 @@ struct request *scsi_alloc_request(struct request_queue *q,
 {
 	struct request *rq;
 
-	rq = blk_get_request(q, op, flags);
+	rq = blk_mq_alloc_request(q, op, flags);
 	if (!IS_ERR(rq))
 		scsi_initialize_rq(rq);
 	return rq;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 85f57ac0b844..141099ab9092 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -815,7 +815,7 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp,
 	if (atomic_read(&sdp->detaching)) {
 		if (srp->bio) {
 			scsi_req_free_cmd(scsi_req(srp->rq));
-			blk_put_request(srp->rq);
+			blk_mq_free_request(srp->rq);
 			srp->rq = NULL;
 		}
 
@@ -1390,7 +1390,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
 	 */
 	srp->rq = NULL;
 	scsi_req_free_cmd(scsi_req(rq));
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 
 	write_lock_irqsave(&sfp->rq_list_lock, iflags);
 	if (unlikely(srp->orphan)) {
@@ -1830,7 +1830,7 @@ sg_finish_rem_req(Sg_request *srp)
 
 	if (srp->rq) {
 		scsi_req_free_cmd(scsi_req(srp->rq));
-		blk_put_request(srp->rq);
+		blk_mq_free_request(srp->rq);
 	}
 
 	if (srp->res_used)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 7c4d9a964799..3009b986d1d7 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -1003,7 +1003,7 @@ static int sr_read_cdda_bpc(struct cdrom_device_info *cdi, void __user *ubuf,
 	if (blk_rq_unmap_user(bio))
 		ret = -EFAULT;
 out_put_request:
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 	return ret;
 }
 
diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c
index 1275299f6159..c2d5608f6b1a 100644
--- a/drivers/scsi/st.c
+++ b/drivers/scsi/st.c
@@ -530,7 +530,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status)
 		complete(SRpnt->waiting);
 
 	blk_rq_unmap_user(tmp);
-	blk_put_request(req);
+	blk_mq_free_request(req);
 }
 
 static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
@@ -557,7 +557,7 @@ static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
 		err = blk_rq_map_user(req->q, req, mdata, NULL, bufflen,
 				      GFP_KERNEL);
 		if (err) {
-			blk_put_request(req);
+			blk_mq_free_request(req);
 			return err;
 		}
 	}
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index bf81da2ecf98..c85f540f6af4 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2925,7 +2925,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 	 * Even though we use wait_event() which sleeps indefinitely,
 	 * the maximum wait time is bounded by SCSI request timeout.
 	 */
-	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out_unlock;
@@ -2952,7 +2952,7 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
 
 out:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;
@@ -6517,9 +6517,9 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	int task_tag, err;
 
 	/*
-	 * blk_get_request() is used here only to get a free tag.
+	 * blk_mq_alloc_request() is used here only to get a free tag.
 	 */
-	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
@@ -6575,7 +6575,7 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	ufshcd_release(hba);
-	blk_put_request(req);
+	blk_mq_free_request(req);
 
 	return err;
 }
@@ -6660,7 +6660,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 
 	down_read(&hba->clk_scaling_lock);
 
-	req = blk_get_request(q, REQ_OP_DRV_OUT, 0);
+	req = blk_mq_alloc_request(q, REQ_OP_DRV_OUT, 0);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto out_unlock;
@@ -6741,7 +6741,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba,
 				    (struct utp_upiu_req *)lrbp->ucd_rsp_ptr);
 
 out:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 out_unlock:
 	up_read(&hba->clk_scaling_lock);
 	return err;
@@ -7912,7 +7912,7 @@ static void ufshcd_request_sense_done(struct request *rq, blk_status_t error)
 	if (error != BLK_STS_OK)
 		pr_err("%s: REQUEST SENSE failed (%d)\n", __func__, error);
 	kfree(rq->end_io_data);
-	blk_put_request(rq);
+	blk_mq_free_request(rq);
 }
 
 static int
@@ -7932,7 +7932,7 @@ ufshcd_request_sense_async(struct ufs_hba *hba, struct scsi_device *sdev)
 	if (!buffer)
 		return -ENOMEM;
 
-	req = blk_get_request(sdev->request_queue, REQ_OP_DRV_IN,
+	req = blk_mq_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
 			      /*flags=*/BLK_MQ_REQ_PM);
 	if (IS_ERR(req)) {
 		ret = PTR_ERR(req);
@@ -7957,7 +7957,7 @@ ufshcd_request_sense_async(struct ufs_hba *hba, struct scsi_device *sdev)
 	return 0;
 
 out_put:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 out_free:
 	kfree(buffer);
 	return ret;
diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c
index 589af5f6b940..a2a3080071e9 100644
--- a/drivers/scsi/ufs/ufshpb.c
+++ b/drivers/scsi/ufs/ufshpb.c
@@ -564,7 +564,7 @@ static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd,
 	int _read_id;
 	int ret = 0;
 
-	req = blk_get_request(cmd->device->request_queue,
+	req = blk_mq_alloc_request(cmd->device->request_queue,
 			      REQ_OP_DRV_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT);
 	if (IS_ERR(req))
 		return -EAGAIN;
@@ -592,7 +592,7 @@ free_pre_req:
 	ufshpb_put_pre_req(hpb, pre_req);
 unlock_out:
 	spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
-	blk_put_request(req);
+	blk_mq_free_request(req);
 	return ret;
 }
 
@@ -721,7 +721,7 @@ static struct ufshpb_req *ufshpb_get_req(struct ufshpb_lu *hpb,
 		return NULL;
 
 retry:
-	req = blk_get_request(hpb->sdev_ufs_lu->request_queue, dir,
+	req = blk_mq_alloc_request(hpb->sdev_ufs_lu->request_queue, dir,
 			      BLK_MQ_REQ_NOWAIT);
 
 	if (!atomic && (PTR_ERR(req) == -EWOULDBLOCK) && (--retries > 0)) {
@@ -745,7 +745,7 @@ free_rq:
 
 static void ufshpb_put_req(struct ufshpb_lu *hpb, struct ufshpb_req *rq)
 {
-	blk_put_request(rq->req);
+	blk_mq_free_request(rq->req);
 	kmem_cache_free(hpb->map_req_cache, rq);
 }
 
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index b5705a2bd761..7fa57fb57bf2 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -1011,7 +1011,7 @@ pscsi_execute_cmd(struct se_cmd *cmd)
 	return 0;
 
 fail_put_request:
-	blk_put_request(req);
+	blk_mq_free_request(req);
 fail:
 	kfree(pt);
 	return ret;
@@ -1066,7 +1066,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
 		break;
 	}
 
-	blk_put_request(req);
+	blk_mq_free_request(req);
 	kfree(pt);
 }
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ebc45cf0450b..8682663e7368 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -892,9 +892,6 @@ static inline bool rq_is_sync(struct request *rq)
 }
 
 void blk_rq_init(struct request_queue *q, struct request *rq);
-void blk_put_request(struct request *rq);
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-		blk_mq_req_flags_t flags);
 int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		struct bio_set *bs, gfp_t gfp_mask,
 		int (*bio_ctr)(struct bio *, struct bio *, void *), void *data);
-- 
cgit v1.2.3


From ee12203746e5cec678c7d126c9901548bfec1dd5 Mon Sep 17 00:00:00 2001
From: Ben Widawsky <ben.widawsky@intel.com>
Date: Sat, 9 Oct 2021 09:44:39 -0700
Subject: PCI: Add pci_find_dvsec_capability to find designated VSEC

Add pci_find_dvsec_capability to locate a Designated Vendor-Specific
Extended Capability with the specified Vendor ID and Capability ID.

The Designated Vendor-Specific Extended Capability (DVSEC) allows one or
more "vendor" specific capabilities that are not tied to the Vendor ID
of the PCI component. Where the DVSEC Vendor may be a standards body
like CXL.

Cc: David E. Box <david.e.box@linux.intel.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-pci@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Cc: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/163379787943.692348.6814373487017444007.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/pci/pci.c   | 32 ++++++++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index ce2ab62b64cf..94ac86ff28b0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -732,6 +732,38 @@ u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
 }
 EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
 
+/**
+ * pci_find_dvsec_capability - Find DVSEC for vendor
+ * @dev: PCI device to query
+ * @vendor: Vendor ID to match for the DVSEC
+ * @dvsec: Designated Vendor-specific capability ID
+ *
+ * If DVSEC has Vendor ID @vendor and DVSEC ID @dvsec return the capability
+ * offset in config space; otherwise return 0.
+ */
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec)
+{
+	int pos;
+
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_DVSEC);
+	if (!pos)
+		return 0;
+
+	while (pos) {
+		u16 v, id;
+
+		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER1, &v);
+		pci_read_config_word(dev, pos + PCI_DVSEC_HEADER2, &id);
+		if (vendor == v && dvsec == id)
+			return pos;
+
+		pos = pci_find_next_ext_capability(dev, pos, PCI_EXT_CAP_ID_DVSEC);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_dvsec_capability);
+
 /**
  * pci_find_parent_resource - return resource region of parent bus of given
  *			      region
diff --git a/include/linux/pci.h b/include/linux/pci.h
index cd8aa6fce204..c93ccfa4571b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1130,6 +1130,7 @@ u16 pci_find_ext_capability(struct pci_dev *dev, int cap);
 u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
 struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
 u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
+u16 pci_find_dvsec_capability(struct pci_dev *dev, u16 vendor, u16 dvsec);
 
 u64 pci_get_dsn(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From 26d5badbccddcc063dc5174a2baffd13a23322aa Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Oct 2021 12:43:59 -0500
Subject: signal: Implement force_fatal_sig

Add a simple helper force_fatal_sig that causes a signal to be
delivered to a process as if the signal handler was set to SIG_DFL.

Reimplement force_sigsegv based upon this new helper.  This fixes
force_sigsegv so that when it forces the default signal handler
to be used the code now forces the signal to be unblocked as well.

Reusing the tested logic in force_sig_info_to_task that was built for
force_sig_seccomp this makes the implementation trivial.

This is interesting both because it makes force_sigsegv simpler and
because there are a couple of buggy places in the kernel that call
do_exit(SIGILL) or do_exit(SIGSYS) because there is no straight
forward way today for those places to simply force the exit of a
process with the chosen signal.  Creating force_fatal_sig allows
those places to be implemented with normal signal exits.

Link: https://lkml.kernel.org/r/20211020174406.17889-13-ebiederm@xmission.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/sched/signal.h |  1 +
 kernel/signal.c              | 26 +++++++++++++++++---------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index e5f4ce622ee6..e2dc9f119ada 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -338,6 +338,7 @@ extern int kill_pid(struct pid *pid, int sig, int priv);
 extern __must_check bool do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int);
+extern void force_fatal_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
diff --git a/kernel/signal.c b/kernel/signal.c
index 952741f6d0f9..6a5e1802b9a2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1662,6 +1662,19 @@ void force_sig(int sig)
 }
 EXPORT_SYMBOL(force_sig);
 
+void force_fatal_sig(int sig)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = 0;
+	info.si_uid = 0;
+	force_sig_info_to_task(&info, current, true);
+}
+
 /*
  * When things go south during signal handling, we
  * will force a SIGSEGV. And if the signal that caused
@@ -1670,15 +1683,10 @@ EXPORT_SYMBOL(force_sig);
  */
 void force_sigsegv(int sig)
 {
-	struct task_struct *p = current;
-
-	if (sig == SIGSEGV) {
-		unsigned long flags;
-		spin_lock_irqsave(&p->sighand->siglock, flags);
-		p->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
-		spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	}
-	force_sig(SIGSEGV);
+	if (sig == SIGSEGV)
+		force_fatal_sig(SIGSEGV);
+	else
+		force_sig(SIGSEGV);
 }
 
 int force_sig_fault_to_task(int sig, int code, void __user *addr
-- 
cgit v1.2.3


From 5bf84b29938579a9350a4a9c2c4f8b5da2aa6992 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 7 May 2021 15:09:10 -0700
Subject: virtchnl: Remove unused VIRTCHNL_VF_OFFLOAD_RSVD define

Remove unused define that is currently marked as reserved. This will
open up space for a new feature if/when it's introduced. Also, there is
no reason to keep unused defines around.

Suggested-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tony.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index db0e099c2399..2e1e1379b569 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -240,7 +240,6 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
  */
 #define VIRTCHNL_VF_OFFLOAD_L2			0x00000001
 #define VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
-#define VIRTCHNL_VF_OFFLOAD_RSVD		0x00000004
 #define VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
 #define VIRTCHNL_VF_OFFLOAD_RSS_REG		0x00000010
 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR		0x00000020
-- 
cgit v1.2.3


From 4a15022f82ee0f2e14a7f953b7bbc0f5c983b663 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 7 May 2021 15:09:11 -0700
Subject: virtchnl: Use the BIT() macro for capability/offload flags

Currently raw hex values are used to define specific bits for each
capability/offload in virtchnl.h. Using raw hex values makes it
unclear which bits are used/available. Fix this by using the BIT()
macro so it's immediately obvious which bits are used/available.

Also, move the VIRTCHNL_VF_CAP_ADV_LINK_SPEED define in the correct
place to line up with the other bit values and add a comment for its
purpose.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Tony Brelinski <tony.brelinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 include/linux/avf/virtchnl.h | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2e1e1379b569..b30a1bc74fc7 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -238,26 +238,26 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
  * VIRTCHNL_VF_OFFLOAD_L2 flag is inclusive of base mode L2 offloads including
  * TX/RX Checksum offloading and TSO for non-tunnelled packets.
  */
-#define VIRTCHNL_VF_OFFLOAD_L2			0x00000001
-#define VIRTCHNL_VF_OFFLOAD_IWARP		0x00000002
-#define VIRTCHNL_VF_OFFLOAD_RSS_AQ		0x00000008
-#define VIRTCHNL_VF_OFFLOAD_RSS_REG		0x00000010
-#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR		0x00000020
-#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES		0x00000040
-#define VIRTCHNL_VF_OFFLOAD_VLAN		0x00010000
-#define VIRTCHNL_VF_OFFLOAD_RX_POLLING		0x00020000
-#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	0x00040000
-#define VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
-#define VIRTCHNL_VF_OFFLOAD_ENCAP		0X00100000
-#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
-#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
-#define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
-#define VIRTCHNL_VF_OFFLOAD_USO			0X02000000
-#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		0X08000000
-#define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
-
-/* Define below the capability flags that are not offloads */
-#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
+#define VIRTCHNL_VF_OFFLOAD_L2			BIT(0)
+#define VIRTCHNL_VF_OFFLOAD_IWARP		BIT(1)
+#define VIRTCHNL_VF_OFFLOAD_RSS_AQ		BIT(3)
+#define VIRTCHNL_VF_OFFLOAD_RSS_REG		BIT(4)
+#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR		BIT(5)
+#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES		BIT(6)
+/* used to negotiate communicating link speeds in Mbps */
+#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		BIT(7)
+#define VIRTCHNL_VF_OFFLOAD_VLAN		BIT(16)
+#define VIRTCHNL_VF_OFFLOAD_RX_POLLING		BIT(17)
+#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2	BIT(18)
+#define VIRTCHNL_VF_OFFLOAD_RSS_PF		BIT(19)
+#define VIRTCHNL_VF_OFFLOAD_ENCAP		BIT(20)
+#define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		BIT(21)
+#define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	BIT(22)
+#define VIRTCHNL_VF_OFFLOAD_ADQ			BIT(23)
+#define VIRTCHNL_VF_OFFLOAD_USO			BIT(25)
+#define VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF		BIT(27)
+#define VIRTCHNL_VF_OFFLOAD_FDIR_PF		BIT(28)
+
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
 			       VIRTCHNL_VF_OFFLOAD_RSS_PF)
-- 
cgit v1.2.3


From 504e15724893a839213fad5eedfbd511d9ba75cc Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Sun, 11 Jul 2021 16:56:54 +0300
Subject: net/mlx5: Allow skipping counter refresh on creation

CT creates a counter for each CT rule, and for each such counter,
fs_counters tries to queue mlx5_fc_stats_work() work again via
mod_delayed_work(0) call to refresh all counters. This call has a
large performance impact when reaching high insertion rate and
accounts for ~8% of the insertion time when using software steering.

Allow skipping the refresh of all counters during counter creation.
Change CT to use this refresh skipping for it's counters.

Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c    |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 14 +++++++++++---
 include/linux/mlx5/fs.h                               |  4 ++++
 3 files changed, 16 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index f44e5de25037..c1c6e74c79c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -889,7 +889,7 @@ mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
 		return ERR_PTR(-ENOMEM);
 
 	counter->is_shared = false;
-	counter->counter = mlx5_fc_create(ct_priv->dev, true);
+	counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
 	if (IS_ERR(counter->counter)) {
 		ct_dbg("Failed to create counter for ct entry");
 		ret = PTR_ERR(counter->counter);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 60c9df1bc912..31c99d53faf7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -301,7 +301,7 @@ static struct mlx5_fc *mlx5_fc_acquire(struct mlx5_core_dev *dev, bool aging)
 	return mlx5_fc_single_alloc(dev);
 }
 
-struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging)
 {
 	struct mlx5_fc *counter = mlx5_fc_acquire(dev, aging);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -332,8 +332,6 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
 			goto err_out_alloc;
 
 		llist_add(&counter->addlist, &fc_stats->addlist);
-
-		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 	}
 
 	return counter;
@@ -342,6 +340,16 @@ err_out_alloc:
 	mlx5_fc_release(dev, counter);
 	return ERR_PTR(err);
 }
+
+struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
+{
+	struct mlx5_fc *counter = mlx5_fc_create_ex(dev, aging);
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+
+	if (aging)
+		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
+	return counter;
+}
 EXPORT_SYMBOL(mlx5_fc_create);
 
 u32 mlx5_fc_id(struct mlx5_fc *counter)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a7e1155bc4da..cd2d4c572367 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -245,6 +245,10 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 				 struct mlx5_flow_destination *old_dest);
 
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
+
+/* As mlx5_fc_create() but doesn't queue stats refresh thread. */
+struct mlx5_fc *mlx5_fc_create_ex(struct mlx5_core_dev *dev, bool aging);
+
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
-- 
cgit v1.2.3


From f89f9c56e7372b2dda144f83dce61311b298c559 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 25 Oct 2021 08:22:04 +0200
Subject: mailbox: apple: Add driver for Apple mailboxes

Apple SoCs such as the M1 come with various co-processors. Mailboxes
are used to communicate with those. This driver adds support for
two variants of those mailboxes.

Signed-off-by: Sven Peter <sven@svenpeter.dev>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/Kconfig         |  12 ++
 drivers/mailbox/Makefile        |   2 +
 drivers/mailbox/apple-mailbox.c | 384 ++++++++++++++++++++++++++++++++++++++++
 include/linux/apple-mailbox.h   |  19 ++
 4 files changed, 417 insertions(+)
 create mode 100644 drivers/mailbox/apple-mailbox.c
 create mode 100644 include/linux/apple-mailbox.h

(limited to 'include/linux')

diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig
index c9fc06c7e685..d9cd3606040e 100644
--- a/drivers/mailbox/Kconfig
+++ b/drivers/mailbox/Kconfig
@@ -8,6 +8,18 @@ menuconfig MAILBOX
 
 if MAILBOX
 
+config APPLE_MAILBOX
+	tristate "Apple Mailbox driver"
+	depends on ARCH_APPLE || (ARM64 && COMPILE_TEST)
+	default ARCH_APPLE
+	help
+	  Apple SoCs have various co-processors required for certain
+	  peripherals to work (NVMe, display controller, etc.). This
+	  driver adds support for the mailbox controller used to
+	  communicate with those.
+
+	  Say Y here if you have a Apple SoC.
+
 config ARM_MHU
 	tristate "ARM MHU Mailbox"
 	depends on ARM_AMBA
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index c2089f04887e..338cc05e5431 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -58,3 +58,5 @@ obj-$(CONFIG_SUN6I_MSGBOX)	+= sun6i-msgbox.o
 obj-$(CONFIG_SPRD_MBOX)		+= sprd-mailbox.o
 
 obj-$(CONFIG_QCOM_IPCC)		+= qcom-ipcc.o
+
+obj-$(CONFIG_APPLE_MAILBOX)	+= apple-mailbox.o
diff --git a/drivers/mailbox/apple-mailbox.c b/drivers/mailbox/apple-mailbox.c
new file mode 100644
index 000000000000..72942002a54a
--- /dev/null
+++ b/drivers/mailbox/apple-mailbox.c
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+/*
+ * Apple mailbox driver
+ *
+ * Copyright (C) 2021 The Asahi Linux Contributors
+ *
+ * This driver adds support for two mailbox variants (called ASC and M3 by
+ * Apple) found in Apple SoCs such as the M1. It consists of two FIFOs used to
+ * exchange 64+32 bit messages between the main CPU and a co-processor.
+ * Various coprocessors implement different IPC protocols based on these simple
+ * messages and shared memory buffers.
+ *
+ * Both the main CPU and the co-processor see the same set of registers but
+ * the first FIFO (A2I) is always used to transfer messages from the application
+ * processor (us) to the I/O processor and the second one (I2A) for the
+ * other direction.
+ */
+
+#include <linux/apple-mailbox.h>
+#include <linux/device.h>
+#include <linux/gfp.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mailbox_controller.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#define APPLE_ASC_MBOX_CONTROL_FULL  BIT(16)
+#define APPLE_ASC_MBOX_CONTROL_EMPTY BIT(17)
+
+#define APPLE_ASC_MBOX_A2I_CONTROL 0x110
+#define APPLE_ASC_MBOX_A2I_SEND0   0x800
+#define APPLE_ASC_MBOX_A2I_SEND1   0x808
+#define APPLE_ASC_MBOX_A2I_RECV0   0x810
+#define APPLE_ASC_MBOX_A2I_RECV1   0x818
+
+#define APPLE_ASC_MBOX_I2A_CONTROL 0x114
+#define APPLE_ASC_MBOX_I2A_SEND0   0x820
+#define APPLE_ASC_MBOX_I2A_SEND1   0x828
+#define APPLE_ASC_MBOX_I2A_RECV0   0x830
+#define APPLE_ASC_MBOX_I2A_RECV1   0x838
+
+#define APPLE_M3_MBOX_CONTROL_FULL  BIT(16)
+#define APPLE_M3_MBOX_CONTROL_EMPTY BIT(17)
+
+#define APPLE_M3_MBOX_A2I_CONTROL 0x50
+#define APPLE_M3_MBOX_A2I_SEND0	  0x60
+#define APPLE_M3_MBOX_A2I_SEND1	  0x68
+#define APPLE_M3_MBOX_A2I_RECV0	  0x70
+#define APPLE_M3_MBOX_A2I_RECV1	  0x78
+
+#define APPLE_M3_MBOX_I2A_CONTROL 0x80
+#define APPLE_M3_MBOX_I2A_SEND0	  0x90
+#define APPLE_M3_MBOX_I2A_SEND1	  0x98
+#define APPLE_M3_MBOX_I2A_RECV0	  0xa0
+#define APPLE_M3_MBOX_I2A_RECV1	  0xa8
+
+#define APPLE_M3_MBOX_IRQ_ENABLE	0x48
+#define APPLE_M3_MBOX_IRQ_ACK		0x4c
+#define APPLE_M3_MBOX_IRQ_A2I_EMPTY	BIT(0)
+#define APPLE_M3_MBOX_IRQ_A2I_NOT_EMPTY BIT(1)
+#define APPLE_M3_MBOX_IRQ_I2A_EMPTY	BIT(2)
+#define APPLE_M3_MBOX_IRQ_I2A_NOT_EMPTY BIT(3)
+
+#define APPLE_MBOX_MSG1_OUTCNT GENMASK(56, 52)
+#define APPLE_MBOX_MSG1_INCNT  GENMASK(51, 48)
+#define APPLE_MBOX_MSG1_OUTPTR GENMASK(47, 44)
+#define APPLE_MBOX_MSG1_INPTR  GENMASK(43, 40)
+#define APPLE_MBOX_MSG1_MSG    GENMASK(31, 0)
+
+struct apple_mbox_hw {
+	unsigned int control_full;
+	unsigned int control_empty;
+
+	unsigned int a2i_control;
+	unsigned int a2i_send0;
+	unsigned int a2i_send1;
+
+	unsigned int i2a_control;
+	unsigned int i2a_recv0;
+	unsigned int i2a_recv1;
+
+	bool has_irq_controls;
+	unsigned int irq_enable;
+	unsigned int irq_ack;
+	unsigned int irq_bit_recv_not_empty;
+	unsigned int irq_bit_send_empty;
+};
+
+struct apple_mbox {
+	void __iomem *regs;
+	const struct apple_mbox_hw *hw;
+
+	int irq_recv_not_empty;
+	int irq_send_empty;
+
+	struct mbox_chan chan;
+
+	struct device *dev;
+	struct mbox_controller controller;
+};
+
+static const struct of_device_id apple_mbox_of_match[];
+
+static bool apple_mbox_hw_can_send(struct apple_mbox *apple_mbox)
+{
+	u32 mbox_ctrl =
+		readl_relaxed(apple_mbox->regs + apple_mbox->hw->a2i_control);
+
+	return !(mbox_ctrl & apple_mbox->hw->control_full);
+}
+
+static int apple_mbox_hw_send(struct apple_mbox *apple_mbox,
+			      struct apple_mbox_msg *msg)
+{
+	if (!apple_mbox_hw_can_send(apple_mbox))
+		return -EBUSY;
+
+	dev_dbg(apple_mbox->dev, "> TX %016llx %08x\n", msg->msg0, msg->msg1);
+
+	writeq_relaxed(msg->msg0, apple_mbox->regs + apple_mbox->hw->a2i_send0);
+	writeq_relaxed(FIELD_PREP(APPLE_MBOX_MSG1_MSG, msg->msg1),
+		       apple_mbox->regs + apple_mbox->hw->a2i_send1);
+
+	return 0;
+}
+
+static bool apple_mbox_hw_can_recv(struct apple_mbox *apple_mbox)
+{
+	u32 mbox_ctrl =
+		readl_relaxed(apple_mbox->regs + apple_mbox->hw->i2a_control);
+
+	return !(mbox_ctrl & apple_mbox->hw->control_empty);
+}
+
+static int apple_mbox_hw_recv(struct apple_mbox *apple_mbox,
+			      struct apple_mbox_msg *msg)
+{
+	if (!apple_mbox_hw_can_recv(apple_mbox))
+		return -ENOMSG;
+
+	msg->msg0 = readq_relaxed(apple_mbox->regs + apple_mbox->hw->i2a_recv0);
+	msg->msg1 = FIELD_GET(
+		APPLE_MBOX_MSG1_MSG,
+		readq_relaxed(apple_mbox->regs + apple_mbox->hw->i2a_recv1));
+
+	dev_dbg(apple_mbox->dev, "< RX %016llx %08x\n", msg->msg0, msg->msg1);
+
+	return 0;
+}
+
+static int apple_mbox_chan_send_data(struct mbox_chan *chan, void *data)
+{
+	struct apple_mbox *apple_mbox = chan->con_priv;
+	struct apple_mbox_msg *msg = data;
+	int ret;
+
+	ret = apple_mbox_hw_send(apple_mbox, msg);
+	if (ret)
+		return ret;
+
+	/*
+	 * The interrupt is level triggered and will keep firing as long as the
+	 * FIFO is empty. It will also keep firing if the FIFO was empty
+	 * at any point in the past until it has been acknowledged at the
+	 * mailbox level. By acknowledging it here we can ensure that we will
+	 * only get the interrupt once the FIFO has been cleared again.
+	 * If the FIFO is already empty before the ack it will fire again
+	 * immediately after the ack.
+	 */
+	if (apple_mbox->hw->has_irq_controls) {
+		writel_relaxed(apple_mbox->hw->irq_bit_send_empty,
+			       apple_mbox->regs + apple_mbox->hw->irq_ack);
+	}
+	enable_irq(apple_mbox->irq_send_empty);
+
+	return 0;
+}
+
+static irqreturn_t apple_mbox_send_empty_irq(int irq, void *data)
+{
+	struct apple_mbox *apple_mbox = data;
+
+	/*
+	 * We don't need to acknowledge the interrupt at the mailbox level
+	 * here even if supported by the hardware. It will keep firing but that
+	 * doesn't matter since it's disabled at the main interrupt controller.
+	 * apple_mbox_chan_send_data will acknowledge it before enabling
+	 * it at the main controller again.
+	 */
+	disable_irq_nosync(apple_mbox->irq_send_empty);
+	mbox_chan_txdone(&apple_mbox->chan, 0);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t apple_mbox_recv_irq(int irq, void *data)
+{
+	struct apple_mbox *apple_mbox = data;
+	struct apple_mbox_msg msg;
+
+	while (apple_mbox_hw_recv(apple_mbox, &msg) == 0)
+		mbox_chan_received_data(&apple_mbox->chan, (void *)&msg);
+
+	/*
+	 * The interrupt will keep firing even if there are no more messages
+	 * unless we also acknowledge it at the mailbox level here.
+	 * There's no race if a message comes in between the check in the while
+	 * loop above and the ack below: If a new messages arrives inbetween
+	 * those two the interrupt will just fire again immediately after the
+	 * ack since it's level triggered.
+	 */
+	if (apple_mbox->hw->has_irq_controls) {
+		writel_relaxed(apple_mbox->hw->irq_bit_recv_not_empty,
+			       apple_mbox->regs + apple_mbox->hw->irq_ack);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int apple_mbox_chan_startup(struct mbox_chan *chan)
+{
+	struct apple_mbox *apple_mbox = chan->con_priv;
+
+	/*
+	 * Only some variants of this mailbox HW provide interrupt control
+	 * at the mailbox level. We therefore need to handle enabling/disabling
+	 * interrupts at the main interrupt controller anyway for hardware that
+	 * doesn't. Just always keep the interrupts we care about enabled at
+	 * the mailbox level so that both hardware revisions behave almost
+	 * the same.
+	 */
+	if (apple_mbox->hw->has_irq_controls) {
+		writel_relaxed(apple_mbox->hw->irq_bit_recv_not_empty |
+				       apple_mbox->hw->irq_bit_send_empty,
+			       apple_mbox->regs + apple_mbox->hw->irq_enable);
+	}
+
+	enable_irq(apple_mbox->irq_recv_not_empty);
+	return 0;
+}
+
+static void apple_mbox_chan_shutdown(struct mbox_chan *chan)
+{
+	struct apple_mbox *apple_mbox = chan->con_priv;
+
+	disable_irq(apple_mbox->irq_recv_not_empty);
+}
+
+static const struct mbox_chan_ops apple_mbox_ops = {
+	.send_data = apple_mbox_chan_send_data,
+	.startup = apple_mbox_chan_startup,
+	.shutdown = apple_mbox_chan_shutdown,
+};
+
+static struct mbox_chan *apple_mbox_of_xlate(struct mbox_controller *mbox,
+					     const struct of_phandle_args *args)
+{
+	if (args->args_count != 0)
+		return ERR_PTR(-EINVAL);
+
+	return &mbox->chans[0];
+}
+
+static int apple_mbox_probe(struct platform_device *pdev)
+{
+	int ret;
+	const struct of_device_id *match;
+	char *irqname;
+	struct apple_mbox *mbox;
+	struct device *dev = &pdev->dev;
+
+	match = of_match_node(apple_mbox_of_match, pdev->dev.of_node);
+	if (!match)
+		return -EINVAL;
+	if (!match->data)
+		return -EINVAL;
+
+	mbox = devm_kzalloc(dev, sizeof(*mbox), GFP_KERNEL);
+	if (!mbox)
+		return -ENOMEM;
+	platform_set_drvdata(pdev, mbox);
+
+	mbox->dev = dev;
+	mbox->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(mbox->regs))
+		return PTR_ERR(mbox->regs);
+
+	mbox->hw = match->data;
+	mbox->irq_recv_not_empty =
+		platform_get_irq_byname(pdev, "recv-not-empty");
+	if (mbox->irq_recv_not_empty < 0)
+		return -ENODEV;
+
+	mbox->irq_send_empty = platform_get_irq_byname(pdev, "send-empty");
+	if (mbox->irq_send_empty < 0)
+		return -ENODEV;
+
+	mbox->controller.dev = mbox->dev;
+	mbox->controller.num_chans = 1;
+	mbox->controller.chans = &mbox->chan;
+	mbox->controller.ops = &apple_mbox_ops;
+	mbox->controller.txdone_irq = true;
+	mbox->controller.of_xlate = apple_mbox_of_xlate;
+	mbox->chan.con_priv = mbox;
+
+	irqname = devm_kasprintf(dev, GFP_KERNEL, "%s-recv", dev_name(dev));
+	if (!irqname)
+		return -ENOMEM;
+
+	ret = devm_request_threaded_irq(dev, mbox->irq_recv_not_empty, NULL,
+					apple_mbox_recv_irq,
+					IRQF_NO_AUTOEN | IRQF_ONESHOT, irqname,
+					mbox);
+	if (ret)
+		return ret;
+
+	irqname = devm_kasprintf(dev, GFP_KERNEL, "%s-send", dev_name(dev));
+	if (!irqname)
+		return -ENOMEM;
+
+	ret = devm_request_irq(dev, mbox->irq_send_empty,
+			       apple_mbox_send_empty_irq, IRQF_NO_AUTOEN,
+			       irqname, mbox);
+	if (ret)
+		return ret;
+
+	return devm_mbox_controller_register(dev, &mbox->controller);
+}
+
+static const struct apple_mbox_hw apple_mbox_asc_hw = {
+	.control_full = APPLE_ASC_MBOX_CONTROL_FULL,
+	.control_empty = APPLE_ASC_MBOX_CONTROL_EMPTY,
+
+	.a2i_control = APPLE_ASC_MBOX_A2I_CONTROL,
+	.a2i_send0 = APPLE_ASC_MBOX_A2I_SEND0,
+	.a2i_send1 = APPLE_ASC_MBOX_A2I_SEND1,
+
+	.i2a_control = APPLE_ASC_MBOX_I2A_CONTROL,
+	.i2a_recv0 = APPLE_ASC_MBOX_I2A_RECV0,
+	.i2a_recv1 = APPLE_ASC_MBOX_I2A_RECV1,
+
+	.has_irq_controls = false,
+};
+
+static const struct apple_mbox_hw apple_mbox_m3_hw = {
+	.control_full = APPLE_M3_MBOX_CONTROL_FULL,
+	.control_empty = APPLE_M3_MBOX_CONTROL_EMPTY,
+
+	.a2i_control = APPLE_M3_MBOX_A2I_CONTROL,
+	.a2i_send0 = APPLE_M3_MBOX_A2I_SEND0,
+	.a2i_send1 = APPLE_M3_MBOX_A2I_SEND1,
+
+	.i2a_control = APPLE_M3_MBOX_I2A_CONTROL,
+	.i2a_recv0 = APPLE_M3_MBOX_I2A_RECV0,
+	.i2a_recv1 = APPLE_M3_MBOX_I2A_RECV1,
+
+	.has_irq_controls = true,
+	.irq_enable = APPLE_M3_MBOX_IRQ_ENABLE,
+	.irq_ack = APPLE_M3_MBOX_IRQ_ACK,
+	.irq_bit_recv_not_empty = APPLE_M3_MBOX_IRQ_I2A_NOT_EMPTY,
+	.irq_bit_send_empty = APPLE_M3_MBOX_IRQ_A2I_EMPTY,
+};
+
+static const struct of_device_id apple_mbox_of_match[] = {
+	{ .compatible = "apple,t8103-asc-mailbox", .data = &apple_mbox_asc_hw },
+	{ .compatible = "apple,t8103-m3-mailbox", .data = &apple_mbox_m3_hw },
+	{}
+};
+MODULE_DEVICE_TABLE(of, apple_mbox_of_match);
+
+static struct platform_driver apple_mbox_driver = {
+	.driver = {
+		.name = "apple-mailbox",
+		.of_match_table = apple_mbox_of_match,
+	},
+	.probe = apple_mbox_probe,
+};
+module_platform_driver(apple_mbox_driver);
+
+MODULE_LICENSE("Dual MIT/GPL");
+MODULE_AUTHOR("Sven Peter <sven@svenpeter.dev>");
+MODULE_DESCRIPTION("Apple Mailbox driver");
diff --git a/include/linux/apple-mailbox.h b/include/linux/apple-mailbox.h
new file mode 100644
index 000000000000..720fbb70294a
--- /dev/null
+++ b/include/linux/apple-mailbox.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR MIT */
+/*
+ * Apple mailbox message format
+ *
+ * Copyright (C) 2021 The Asahi Linux Contributors
+ */
+
+#ifndef _LINUX_APPLE_MAILBOX_H_
+#define _LINUX_APPLE_MAILBOX_H_
+
+#include <linux/types.h>
+
+/* encodes a single 96bit message sent over the single channel */
+struct apple_mbox_msg {
+	u64 msg0;
+	u32 msg1;
+};
+
+#endif
-- 
cgit v1.2.3


From 97961f78e8bc7f50ff7113fec030af6fa5f004d0 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 22 Oct 2021 18:18:56 +0800
Subject: mailbox: imx: support i.MX8ULP S4 MU

Like i.MX8 SCU, i.MX8ULP S4 also has vendor specific protocol.
 - bind SCU/S4 MU part to share one tx/rx/init API to make code simple.
 - S4 msg max size is very large, so alloc the space at driver probe,
   not use local on stack variable.
 - S4 MU has 8 TR and 4 RR which is different with i.MX8 MU, so adapt
   code to reflect this.

   Tested on i.MX8MP, i.MX8ULP

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/imx-mailbox.c   | 124 ++++++++++++++++++++++++++++------------
 include/linux/firmware/imx/s4.h |  20 +++++++
 2 files changed, 107 insertions(+), 37 deletions(-)
 create mode 100644 include/linux/firmware/imx/s4.h

(limited to 'include/linux')

diff --git a/drivers/mailbox/imx-mailbox.c b/drivers/mailbox/imx-mailbox.c
index 0ce75c6b36b6..ffe36a6bef9e 100644
--- a/drivers/mailbox/imx-mailbox.c
+++ b/drivers/mailbox/imx-mailbox.c
@@ -5,6 +5,7 @@
 
 #include <linux/clk.h>
 #include <linux/firmware/imx/ipc.h>
+#include <linux/firmware/imx/s4.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -18,6 +19,8 @@
 #define IMX_MU_CHANS		16
 /* TX0/RX0/RXDB[0-3] */
 #define IMX_MU_SCU_CHANS	6
+/* TX0/RX0 */
+#define IMX_MU_S4_CHANS		2
 #define IMX_MU_CHAN_NAME_SIZE	20
 
 enum imx_mu_chan_type {
@@ -47,6 +50,11 @@ struct imx_sc_rpc_msg_max {
 	u32 data[7];
 };
 
+struct imx_s4_rpc_msg_max {
+	struct imx_s4_rpc_msg hdr;
+	u32 data[254];
+};
+
 struct imx_mu_con_priv {
 	unsigned int		idx;
 	char			irq_desc[IMX_MU_CHAN_NAME_SIZE];
@@ -58,6 +66,7 @@ struct imx_mu_con_priv {
 struct imx_mu_priv {
 	struct device		*dev;
 	void __iomem		*base;
+	void			*msg;
 	spinlock_t		xcr_lock; /* control register lock */
 
 	struct mbox_controller	mbox;
@@ -75,7 +84,8 @@ struct imx_mu_priv {
 
 enum imx_mu_type {
 	IMX_MU_V1,
-	IMX_MU_V2,
+	IMX_MU_V2 = BIT(1),
+	IMX_MU_V2_S4 = BIT(15),
 };
 
 struct imx_mu_dcfg {
@@ -89,18 +99,18 @@ struct imx_mu_dcfg {
 	u32	xCR[4];		/* Control Registers */
 };
 
-#define IMX_MU_xSR_GIPn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
-#define IMX_MU_xSR_RFn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
-#define IMX_MU_xSR_TEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
+#define IMX_MU_xSR_GIPn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
+#define IMX_MU_xSR_RFn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+#define IMX_MU_xSR_TEn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
 
 /* General Purpose Interrupt Enable */
-#define IMX_MU_xCR_GIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
+#define IMX_MU_xCR_GIEn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(28 + (3 - (x))))
 /* Receive Interrupt Enable */
-#define IMX_MU_xCR_RIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+#define IMX_MU_xCR_RIEn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
 /* Transmit Interrupt Enable */
-#define IMX_MU_xCR_TIEn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
+#define IMX_MU_xCR_TIEn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(20 + (3 - (x))))
 /* General Purpose Interrupt Request */
-#define IMX_MU_xCR_GIRn(type, x) (type == IMX_MU_V2 ? BIT(x) : BIT(16 + (3 - (x))))
+#define IMX_MU_xCR_GIRn(type, x) (type & IMX_MU_V2 ? BIT(x) : BIT(16 + (3 - (x))))
 
 
 static struct imx_mu_priv *to_imx_mu_priv(struct mbox_controller *mbox)
@@ -167,14 +177,22 @@ static int imx_mu_generic_rx(struct imx_mu_priv *priv,
 	return 0;
 }
 
-static int imx_mu_scu_tx(struct imx_mu_priv *priv,
-			 struct imx_mu_con_priv *cp,
-			 void *data)
+static int imx_mu_specific_tx(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp, void *data)
 {
-	struct imx_sc_rpc_msg_max *msg = data;
 	u32 *arg = data;
 	int i, ret;
 	u32 xsr;
+	u32 size, max_size, num_tr;
+
+	if (priv->dcfg->type & IMX_MU_V2_S4) {
+		size = ((struct imx_s4_rpc_msg_max *)data)->hdr.size;
+		max_size = sizeof(struct imx_s4_rpc_msg_max);
+		num_tr = 8;
+	} else {
+		size = ((struct imx_sc_rpc_msg_max *)data)->hdr.size;
+		max_size = sizeof(struct imx_sc_rpc_msg_max);
+		num_tr = 4;
+	}
 
 	switch (cp->type) {
 	case IMX_MU_TYPE_TX:
@@ -183,27 +201,27 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 		 * sizeof yields bytes.
 		 */
 
-		if (msg->hdr.size > sizeof(*msg) / 4) {
+		if (size > max_size / 4) {
 			/*
 			 * The real message size can be different to
-			 * struct imx_sc_rpc_msg_max size
+			 * struct imx_sc_rpc_msg_max/imx_s4_rpc_msg_max size
 			 */
-			dev_err(priv->dev, "Maximal message size (%zu bytes) exceeded on TX; got: %i bytes\n", sizeof(*msg), msg->hdr.size << 2);
+			dev_err(priv->dev, "Maximal message size (%u bytes) exceeded on TX; got: %i bytes\n", max_size, size << 2);
 			return -EINVAL;
 		}
 
-		for (i = 0; i < 4 && i < msg->hdr.size; i++)
-			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
-		for (; i < msg->hdr.size; i++) {
+		for (i = 0; i < num_tr && i < size; i++)
+			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % num_tr) * 4);
+		for (; i < size; i++) {
 			ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_TSR],
 						 xsr,
-						 xsr & IMX_MU_xSR_TEn(priv->dcfg->type, i % 4),
+						 xsr & IMX_MU_xSR_TEn(priv->dcfg->type, i % num_tr),
 						 0, 100);
 			if (ret) {
 				dev_err(priv->dev, "Send data index: %d timeout\n", i);
 				return ret;
 			}
-			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % 4) * 4);
+			imx_mu_write(priv, *arg++, priv->dcfg->xTR + (i % num_tr) * 4);
 		}
 
 		imx_mu_xcr_rmw(priv, IMX_MU_TCR, IMX_MU_xCR_TIEn(priv->dcfg->type, cp->idx), 0);
@@ -216,23 +234,32 @@ static int imx_mu_scu_tx(struct imx_mu_priv *priv,
 	return 0;
 }
 
-static int imx_mu_scu_rx(struct imx_mu_priv *priv,
-			 struct imx_mu_con_priv *cp)
+static int imx_mu_specific_rx(struct imx_mu_priv *priv, struct imx_mu_con_priv *cp)
 {
-	struct imx_sc_rpc_msg_max msg;
-	u32 *data = (u32 *)&msg;
+	u32 *data;
 	int i, ret;
 	u32 xsr;
+	u32 size, max_size;
+
+	data = (u32 *)priv->msg;
 
 	imx_mu_xcr_rmw(priv, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(priv->dcfg->type, 0));
 	*data++ = imx_mu_read(priv, priv->dcfg->xRR);
 
-	if (msg.hdr.size > sizeof(msg) / 4) {
-		dev_err(priv->dev, "Maximal message size (%zu bytes) exceeded on RX; got: %i bytes\n", sizeof(msg), msg.hdr.size << 2);
+	if (priv->dcfg->type & IMX_MU_V2_S4) {
+		size = ((struct imx_s4_rpc_msg_max *)priv->msg)->hdr.size;
+		max_size = sizeof(struct imx_s4_rpc_msg_max);
+	} else {
+		size = ((struct imx_sc_rpc_msg_max *)priv->msg)->hdr.size;
+		max_size = sizeof(struct imx_sc_rpc_msg_max);
+	}
+
+	if (size > max_size / 4) {
+		dev_err(priv->dev, "Maximal message size (%u bytes) exceeded on RX; got: %i bytes\n", max_size, size << 2);
 		return -EINVAL;
 	}
 
-	for (i = 1; i < msg.hdr.size; i++) {
+	for (i = 1; i < size; i++) {
 		ret = readl_poll_timeout(priv->base + priv->dcfg->xSR[IMX_MU_RSR], xsr,
 					 xsr & IMX_MU_xSR_RFn(priv->dcfg->type, i % 4), 0, 100);
 		if (ret) {
@@ -243,7 +270,7 @@ static int imx_mu_scu_rx(struct imx_mu_priv *priv,
 	}
 
 	imx_mu_xcr_rmw(priv, IMX_MU_RCR, IMX_MU_xCR_RIEn(priv->dcfg->type, 0), 0);
-	mbox_chan_received_data(cp->chan, (void *)&msg);
+	mbox_chan_received_data(cp->chan, (void *)priv->msg);
 
 	return 0;
 }
@@ -394,8 +421,8 @@ static const struct mbox_chan_ops imx_mu_ops = {
 	.shutdown = imx_mu_shutdown,
 };
 
-static struct mbox_chan *imx_mu_scu_xlate(struct mbox_controller *mbox,
-					  const struct of_phandle_args *sp)
+static struct mbox_chan *imx_mu_specific_xlate(struct mbox_controller *mbox,
+					       const struct of_phandle_args *sp)
 {
 	u32 type, idx, chan;
 
@@ -478,11 +505,12 @@ static void imx_mu_init_generic(struct imx_mu_priv *priv)
 		imx_mu_write(priv, 0, priv->dcfg->xCR[i]);
 }
 
-static void imx_mu_init_scu(struct imx_mu_priv *priv)
+static void imx_mu_init_specific(struct imx_mu_priv *priv)
 {
 	unsigned int i;
+	int num_chans = priv->dcfg->type & IMX_MU_V2_S4 ? IMX_MU_S4_CHANS : IMX_MU_SCU_CHANS;
 
-	for (i = 0; i < IMX_MU_SCU_CHANS; i++) {
+	for (i = 0; i < num_chans; i++) {
 		struct imx_mu_con_priv *cp = &priv->con_priv[i];
 
 		cp->idx = i < 2 ? 0 : i - 2;
@@ -493,8 +521,8 @@ static void imx_mu_init_scu(struct imx_mu_priv *priv)
 			 "imx_mu_chan[%i-%i]", cp->type, cp->idx);
 	}
 
-	priv->mbox.num_chans = IMX_MU_SCU_CHANS;
-	priv->mbox.of_xlate = imx_mu_scu_xlate;
+	priv->mbox.num_chans = num_chans;
+	priv->mbox.of_xlate = imx_mu_specific_xlate;
 
 	/* Set default MU configuration */
 	for (i = 0; i < IMX_MU_xCR_MAX; i++)
@@ -508,6 +536,7 @@ static int imx_mu_probe(struct platform_device *pdev)
 	struct imx_mu_priv *priv;
 	const struct imx_mu_dcfg *dcfg;
 	int ret;
+	u32 size;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -528,6 +557,15 @@ static int imx_mu_probe(struct platform_device *pdev)
 		return -EINVAL;
 	priv->dcfg = dcfg;
 
+	if (priv->dcfg->type & IMX_MU_V2_S4)
+		size = sizeof(struct imx_s4_rpc_msg_max);
+	else
+		size = sizeof(struct imx_sc_rpc_msg_max);
+
+	priv->msg = devm_kzalloc(dev, size, GFP_KERNEL);
+	if (IS_ERR(priv->msg))
+		return PTR_ERR(priv->msg);
+
 	priv->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(priv->clk)) {
 		if (PTR_ERR(priv->clk) != -ENOENT)
@@ -623,10 +661,21 @@ static const struct imx_mu_dcfg imx_mu_cfg_imx8ulp = {
 	.xCR	= {0x110, 0x114, 0x120, 0x128},
 };
 
+static const struct imx_mu_dcfg imx_mu_cfg_imx8ulp_s4 = {
+	.tx	= imx_mu_specific_tx,
+	.rx	= imx_mu_specific_rx,
+	.init	= imx_mu_init_specific,
+	.type	= IMX_MU_V2 | IMX_MU_V2_S4,
+	.xTR	= 0x200,
+	.xRR	= 0x280,
+	.xSR	= {0xC, 0x118, 0x124, 0x12C},
+	.xCR	= {0x110, 0x114, 0x120, 0x128},
+};
+
 static const struct imx_mu_dcfg imx_mu_cfg_imx8_scu = {
-	.tx	= imx_mu_scu_tx,
-	.rx	= imx_mu_scu_rx,
-	.init	= imx_mu_init_scu,
+	.tx	= imx_mu_specific_tx,
+	.rx	= imx_mu_specific_rx,
+	.init	= imx_mu_init_specific,
 	.xTR	= 0x0,
 	.xRR	= 0x10,
 	.xSR	= {0x20, 0x20, 0x20, 0x20},
@@ -637,6 +686,7 @@ static const struct of_device_id imx_mu_dt_ids[] = {
 	{ .compatible = "fsl,imx7ulp-mu", .data = &imx_mu_cfg_imx7ulp },
 	{ .compatible = "fsl,imx6sx-mu", .data = &imx_mu_cfg_imx6sx },
 	{ .compatible = "fsl,imx8ulp-mu", .data = &imx_mu_cfg_imx8ulp },
+	{ .compatible = "fsl,imx8ulp-mu-s4", .data = &imx_mu_cfg_imx8ulp_s4 },
 	{ .compatible = "fsl,imx8-mu-scu", .data = &imx_mu_cfg_imx8_scu },
 	{ },
 };
diff --git a/include/linux/firmware/imx/s4.h b/include/linux/firmware/imx/s4.h
new file mode 100644
index 000000000000..9e34923ae1d6
--- /dev/null
+++ b/include/linux/firmware/imx/s4.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2021 NXP
+ *
+ * Header file for the IPC implementation.
+ */
+
+#ifndef _S4_IPC_H
+#define _S4_IPC_H
+
+struct imx_s4_ipc;
+
+struct imx_s4_rpc_msg {
+	uint8_t ver;
+	uint8_t size;
+	uint8_t cmd;
+	uint8_t tag;
+} __packed;
+
+#endif /* _S4_IPC_H */
-- 
cgit v1.2.3


From cc8d7b4aea79df7cb45b74f9bc5b8a8bd2ed4c07 Mon Sep 17 00:00:00 2001
From: Anssi Hannula <anssi.hannula@bitwise.fi>
Date: Wed, 27 Oct 2021 13:21:24 +0300
Subject: tty: Fix extra "not" in TTY_DRIVER_REAL_RAW description

TTY_DRIVER_REAL_RAW flag (which is always set for e.g. serial ports)
documentation says that driver must always set special character
handling flags in certain conditions.

However, as the following sentence makes clear, what is actually
intended is the opposite.

Fix that by removing the unintended double negation.

Acked-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Anssi Hannula <anssi.hannula@bitwise.fi>
Link: https://lore.kernel.org/r/20211027102124.3049414-1-anssi.hannula@bitwise.fi
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty_driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 29e1cf178afb..795b94ccdeb6 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -360,7 +360,7 @@ static inline void tty_set_operations(struct tty_driver *driver,
  * 	Used for PTY's, in particular.
  * 
  * TTY_DRIVER_REAL_RAW --- if set, indicates that the driver will
- * 	guarantee never not to set any special character handling
+ * 	guarantee never to set any special character handling
  * 	flags if ((IGNBRK || (!BRKINT && !PARMRK)) && (IGNPAR ||
  * 	!INPCK)).  That is, if there is no reason for the driver to
  * 	send notifications of parity and break characters up to the
-- 
cgit v1.2.3


From f98a3dccfcb0b9b9c3bef8df9edd61cda80ad937 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 22 Oct 2021 13:59:38 +0200
Subject: locking: Remove spin_lock_flags() etc

parisc, ia64 and powerpc32 are the only remaining architectures that
provide custom arch_{spin,read,write}_lock_flags() functions, which are
meant to re-enable interrupts while waiting for a spinlock.

However, none of these can actually run into this codepath, because
it is only called on architectures without CONFIG_GENERIC_LOCKBREAK,
or when CONFIG_DEBUG_LOCK_ALLOC is set without CONFIG_LOCKDEP, and none
of those combinations are possible on the three architectures.

Going back in the git history, it appears that arch/mn10300 may have
been able to run into this code path, but there is a good chance that
it never worked. On the architectures that still exist, it was
already impossible to hit back in 2008 after the introduction of
CONFIG_GENERIC_LOCKBREAK, and possibly earlier.

As this is all dead code, just remove it and the helper functions built
around it. For arch/ia64, the inline asm could be cleaned up, but
it seems safer to leave it untouched.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Helge Deller <deller@gmx.de>  # parisc
Link: https://lore.kernel.org/r/20211022120058.1031690-1-arnd@kernel.org
---
 arch/ia64/include/asm/spinlock.h           | 23 ++++++-----------------
 arch/openrisc/include/asm/spinlock.h       |  3 ---
 arch/parisc/include/asm/spinlock.h         | 15 ---------------
 arch/powerpc/include/asm/simple_spinlock.h | 21 ---------------------
 arch/s390/include/asm/spinlock.h           |  8 --------
 include/linux/lockdep.h                    | 17 -----------------
 include/linux/rwlock.h                     | 15 ---------------
 include/linux/rwlock_api_smp.h             |  6 ++----
 include/linux/spinlock.h                   | 13 -------------
 include/linux/spinlock_api_smp.h           |  9 ---------
 include/linux/spinlock_up.h                |  1 -
 kernel/locking/spinlock.c                  |  3 +--
 12 files changed, 9 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 864775970c50..0e5c1ad3239c 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -124,18 +124,13 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 	__ticket_spin_unlock(lock);
 }
 
-static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
-						  unsigned long flags)
-{
-	arch_spin_lock(lock);
-}
-#define arch_spin_lock_flags	arch_spin_lock_flags
-
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_read_lock(arch_rwlock_t *lock)
 {
+	unsigned long flags = 0;
+
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1,%2\n"
 		"br.few 3f\n"
@@ -157,13 +152,8 @@ arch_read_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "p6", "p7", "r2", "memory");
 }
 
-#define arch_read_lock_flags arch_read_lock_flags
-#define arch_read_lock(lock) arch_read_lock_flags(lock, 0)
-
 #else /* !ASM_SUPPORTED */
 
-#define arch_read_lock_flags(rw, flags) arch_read_lock(rw)
-
 #define arch_read_lock(rw)								\
 do {											\
 	arch_rwlock_t *__read_lock_ptr = (rw);						\
@@ -186,8 +176,10 @@ do {								\
 #ifdef ASM_SUPPORTED
 
 static __always_inline void
-arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
+arch_write_lock(arch_rwlock_t *lock)
 {
+	unsigned long flags = 0;
+
 	__asm__ __volatile__ (
 		"tbit.nz p6, p0 = %1, %2\n"
 		"mov ar.ccv = r0\n"
@@ -210,9 +202,6 @@ arch_write_lock_flags(arch_rwlock_t *lock, unsigned long flags)
 		: "ar.ccv", "p6", "p7", "r2", "r29", "memory");
 }
 
-#define arch_write_lock_flags arch_write_lock_flags
-#define arch_write_lock(rw) arch_write_lock_flags(rw, 0)
-
 #define arch_write_trylock(rw)							\
 ({										\
 	register long result;							\
diff --git a/arch/openrisc/include/asm/spinlock.h b/arch/openrisc/include/asm/spinlock.h
index a8940bdfcb7e..264944a71535 100644
--- a/arch/openrisc/include/asm/spinlock.h
+++ b/arch/openrisc/include/asm/spinlock.h
@@ -19,9 +19,6 @@
 
 #include <asm/qrwlock.h>
 
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
 #define arch_spin_relax(lock)	cpu_relax()
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index fa5ee8a45dbd..a6e5d66a7656 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -23,21 +23,6 @@ static inline void arch_spin_lock(arch_spinlock_t *x)
 			continue;
 }
 
-static inline void arch_spin_lock_flags(arch_spinlock_t *x,
-					unsigned long flags)
-{
-	volatile unsigned int *a;
-
-	a = __ldcw_align(x);
-	while (__ldcw(a) == 0)
-		while (*a == 0)
-			if (flags & PSW_SM_I) {
-				local_irq_enable();
-				local_irq_disable();
-			}
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
 static inline void arch_spin_unlock(arch_spinlock_t *x)
 {
 	volatile unsigned int *a;
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
index 8985791a2ba5..7ae6aeef8464 100644
--- a/arch/powerpc/include/asm/simple_spinlock.h
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -123,27 +123,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
 	}
 }
 
-static inline
-void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	unsigned long flags_dis;
-
-	while (1) {
-		if (likely(__arch_spin_trylock(lock) == 0))
-			break;
-		local_save_flags(flags_dis);
-		local_irq_restore(flags);
-		do {
-			HMT_low();
-			if (is_shared_processor())
-				splpar_spin_yield(lock);
-		} while (unlikely(lock->slock != 0));
-		HMT_medium();
-		local_irq_restore(flags_dis);
-	}
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
 static inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	__asm__ __volatile__("# arch_spin_unlock\n\t"
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index ef59588a3042..888a2f1c9ee3 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -67,14 +67,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lp)
 		arch_spin_lock_wait(lp);
 }
 
-static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
-					unsigned long flags)
-{
-	if (!arch_spin_trylock_once(lp))
-		arch_spin_lock_wait(lp);
-}
-#define arch_spin_lock_flags	arch_spin_lock_flags
-
 static inline int arch_spin_trylock(arch_spinlock_t *lp)
 {
 	if (!arch_spin_trylock_once(lp))
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9fe165beb0f9..467b94257105 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -481,23 +481,6 @@ do {								\
 
 #endif /* CONFIG_LOCK_STAT */
 
-#ifdef CONFIG_LOCKDEP
-
-/*
- * On lockdep we dont want the hand-coded irq-enable of
- * _raw_*_lock_flags() code, because lockdep assumes
- * that interrupts are not re-enabled during lock-acquire:
- */
-#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
-	LOCK_CONTENDED((_lock), (try), (lock))
-
-#else /* CONFIG_LOCKDEP */
-
-#define LOCK_CONTENDED_FLAGS(_lock, try, lock, lockfl, flags) \
-	lockfl((_lock), (flags))
-
-#endif /* CONFIG_LOCKDEP */
-
 #ifdef CONFIG_PROVE_LOCKING
 extern void print_irqtrace_events(struct task_struct *curr);
 #else
diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h
index 7ce9a51ae5c0..2c0ad417ce3c 100644
--- a/include/linux/rwlock.h
+++ b/include/linux/rwlock.h
@@ -30,31 +30,16 @@ do {								\
 
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void do_raw_read_lock(rwlock_t *lock) __acquires(lock);
-#define do_raw_read_lock_flags(lock, flags) do_raw_read_lock(lock)
  extern int do_raw_read_trylock(rwlock_t *lock);
  extern void do_raw_read_unlock(rwlock_t *lock) __releases(lock);
  extern void do_raw_write_lock(rwlock_t *lock) __acquires(lock);
-#define do_raw_write_lock_flags(lock, flags) do_raw_write_lock(lock)
  extern int do_raw_write_trylock(rwlock_t *lock);
  extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock);
 #else
-
-#ifndef arch_read_lock_flags
-# define arch_read_lock_flags(lock, flags)	arch_read_lock(lock)
-#endif
-
-#ifndef arch_write_lock_flags
-# define arch_write_lock_flags(lock, flags)	arch_write_lock(lock)
-#endif
-
 # define do_raw_read_lock(rwlock)	do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_read_lock_flags(lock, flags) \
-		do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_read_trylock(rwlock)	arch_read_trylock(&(rwlock)->raw_lock)
 # define do_raw_read_unlock(rwlock)	do {arch_read_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
 # define do_raw_write_lock(rwlock)	do {__acquire(lock); arch_write_lock(&(rwlock)->raw_lock); } while (0)
-# define do_raw_write_lock_flags(lock, flags) \
-		do {__acquire(lock); arch_write_lock_flags(&(lock)->raw_lock, *(flags)); } while (0)
 # define do_raw_write_trylock(rwlock)	arch_write_trylock(&(rwlock)->raw_lock)
 # define do_raw_write_unlock(rwlock)	do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0)
 #endif
diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h
index abfb53ab11be..f1db6f17c4fb 100644
--- a/include/linux/rwlock_api_smp.h
+++ b/include/linux/rwlock_api_smp.h
@@ -157,8 +157,7 @@ static inline unsigned long __raw_read_lock_irqsave(rwlock_t *lock)
 	local_irq_save(flags);
 	preempt_disable();
 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, do_raw_read_trylock, do_raw_read_lock,
-			     do_raw_read_lock_flags, &flags);
+	LOCK_CONTENDED(lock, do_raw_read_trylock, do_raw_read_lock);
 	return flags;
 }
 
@@ -184,8 +183,7 @@ static inline unsigned long __raw_write_lock_irqsave(rwlock_t *lock)
 	local_irq_save(flags);
 	preempt_disable();
 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, do_raw_write_trylock, do_raw_write_lock,
-			     do_raw_write_lock_flags, &flags);
+	LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock);
 	return flags;
 }
 
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 45310ea1b1d7..f0447062eecd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -177,7 +177,6 @@ do {									\
 
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock);
-#define do_raw_spin_lock_flags(lock, flags) do_raw_spin_lock(lock)
  extern int do_raw_spin_trylock(raw_spinlock_t *lock);
  extern void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock);
 #else
@@ -188,18 +187,6 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
 	mmiowb_spin_lock();
 }
 
-#ifndef arch_spin_lock_flags
-#define arch_spin_lock_flags(lock, flags)	arch_spin_lock(lock)
-#endif
-
-static inline void
-do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock)
-{
-	__acquire(lock);
-	arch_spin_lock_flags(&lock->raw_lock, *flags);
-	mmiowb_spin_lock();
-}
-
 static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
 {
 	int ret = arch_spin_trylock(&(lock)->raw_lock);
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 6b8e1a0b137b..51fa0dab68c4 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -108,16 +108,7 @@ static inline unsigned long __raw_spin_lock_irqsave(raw_spinlock_t *lock)
 	local_irq_save(flags);
 	preempt_disable();
 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	/*
-	 * On lockdep we dont want the hand-coded irq-enable of
-	 * do_raw_spin_lock_flags() code, because lockdep assumes
-	 * that interrupts are not re-enabled during lock-acquire:
-	 */
-#ifdef CONFIG_LOCKDEP
 	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
-#else
-	do_raw_spin_lock_flags(lock, &flags);
-#endif
 	return flags;
 }
 
diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h
index 0ac9112c1bbe..16521074b6f7 100644
--- a/include/linux/spinlock_up.h
+++ b/include/linux/spinlock_up.h
@@ -62,7 +62,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
 #define arch_spin_is_locked(lock)	((void)(lock), 0)
 /* for sched/core.c and kernel_lock.c: */
 # define arch_spin_lock(lock)		do { barrier(); (void)(lock); } while (0)
-# define arch_spin_lock_flags(lock, flags)	do { barrier(); (void)(lock); } while (0)
 # define arch_spin_unlock(lock)	do { barrier(); (void)(lock); } while (0)
 # define arch_spin_trylock(lock)	({ barrier(); (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index c5830cfa379a..b562f9289372 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -378,8 +378,7 @@ unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
 	local_irq_save(flags);
 	preempt_disable();
 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, do_raw_spin_trylock, do_raw_spin_lock,
-				do_raw_spin_lock_flags, &flags);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
 	return flags;
 }
 EXPORT_SYMBOL(_raw_spin_lock_irqsave_nested);
-- 
cgit v1.2.3


From 9cc2fa4f4a92ccc6760d764e7341be46ee8aaaa1 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Tue, 5 Oct 2021 00:05:43 +0200
Subject: task_stack: Fix end_of_stack() for architectures with upwards-growing
 stack

The function end_of_stack() returns a pointer to the last entry of a
stack. For architectures like parisc where the stack grows upwards
return the pointer to the highest address in the stack.

Without this change I faced a crash on parisc, because the stackleak
functionality wrote STACKLEAK_POISON to the lowest address and thus
overwrote the first 4 bytes of the task_struct which included the
TIF_FLAGS.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 include/linux/sched/task_stack.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h
index 2413427e439c..d10150587d81 100644
--- a/include/linux/sched/task_stack.h
+++ b/include/linux/sched/task_stack.h
@@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task)
 
 static inline unsigned long *end_of_stack(const struct task_struct *task)
 {
+#ifdef CONFIG_STACK_GROWSUP
+	return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1;
+#else
 	return task->stack;
+#endif
 }
 
 #elif !defined(__HAVE_THREAD_FUNCTIONS)
-- 
cgit v1.2.3


From e60b56e46b384cee1ad34e6adc164d883049c6c3 Mon Sep 17 00:00:00 2001
From: Vincent Guittot <vincent.guittot@linaro.org>
Date: Tue, 19 Oct 2021 14:35:35 +0200
Subject: sched/fair: Wait before decaying max_newidle_lb_cost

Decay max_newidle_lb_cost only when it has not been updated for a while
and ensure to not decay a recently changed value.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20211019123537.17146-4-vincent.guittot@linaro.org
---
 include/linux/sched/topology.h |  2 +-
 kernel/sched/fair.c            | 36 +++++++++++++++++++++++++++---------
 kernel/sched/topology.c        |  2 +-
 3 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 2f9166f6dec8..c07bfa2d80f2 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -105,7 +105,7 @@ struct sched_domain {
 
 	/* idle_balance() stats */
 	u64 max_newidle_lb_cost;
-	unsigned long next_decay_max_lb_cost;
+	unsigned long last_decay_max_lb_cost;
 
 	u64 avg_scan_cost;		/* select_idle_sibling */
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c4c36865321b..e50fd751e1df 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -10239,6 +10239,30 @@ void update_max_interval(void)
 	max_load_balance_interval = HZ*num_online_cpus()/10;
 }
 
+static inline bool update_newidle_cost(struct sched_domain *sd, u64 cost)
+{
+	if (cost > sd->max_newidle_lb_cost) {
+		/*
+		 * Track max cost of a domain to make sure to not delay the
+		 * next wakeup on the CPU.
+		 */
+		sd->max_newidle_lb_cost = cost;
+		sd->last_decay_max_lb_cost = jiffies;
+	} else if (time_after(jiffies, sd->last_decay_max_lb_cost + HZ)) {
+		/*
+		 * Decay the newidle max times by ~1% per second to ensure that
+		 * it is not outdated and the current max cost is actually
+		 * shorter.
+		 */
+		sd->max_newidle_lb_cost = (sd->max_newidle_lb_cost * 253) / 256;
+		sd->last_decay_max_lb_cost = jiffies;
+
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * It checks each scheduling domain to see if it is due to be balanced,
  * and initiates a balancing operation if so.
@@ -10262,14 +10286,9 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
 	for_each_domain(cpu, sd) {
 		/*
 		 * Decay the newidle max times here because this is a regular
-		 * visit to all the domains. Decay ~1% per second.
+		 * visit to all the domains.
 		 */
-		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
-			sd->max_newidle_lb_cost =
-				(sd->max_newidle_lb_cost * 253) / 256;
-			sd->next_decay_max_lb_cost = jiffies + HZ;
-			need_decay = 1;
-		}
+		need_decay = update_newidle_cost(sd, 0);
 		max_cost += sd->max_newidle_lb_cost;
 
 		/*
@@ -10911,8 +10930,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
 
 			t1 = sched_clock_cpu(this_cpu);
 			domain_cost = t1 - t0;
-			if (domain_cost > sd->max_newidle_lb_cost)
-				sd->max_newidle_lb_cost = domain_cost;
+			update_newidle_cost(sd, domain_cost);
 
 			curr_cost += domain_cost;
 			t0 = t1;
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index e81246787560..30169c7685b6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1568,7 +1568,7 @@ sd_init(struct sched_domain_topology_level *tl,
 		.last_balance		= jiffies,
 		.balance_interval	= sd_weight,
 		.max_newidle_lb_cost	= 0,
-		.next_decay_max_lb_cost	= jiffies,
+		.last_decay_max_lb_cost	= jiffies,
 		.child			= child,
 #ifdef CONFIG_SCHED_DEBUG
 		.name			= tl->name,
-- 
cgit v1.2.3


From 7ff22787ba49c2e66dcec92f3e2b79ef6b6a0d71 Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Mon, 4 Oct 2021 10:07:09 -0700
Subject: platform/chrome: cros_ec_proto: Use EC struct for features

The Chrome EC's features are returned through an
ec_response_get_features struct, but they are stored in an independent
array. Although the two are effectively the same at present (2 unsigned
32 bit ints), there is the possibility that they could go out of sync.
Avoid this by only using the EC struct to store the features.

Signed-off-by: Prashant Malani <pmalani@chromium.org>
Link: https://lore.kernel.org/r/20211004170716.86601-1-pmalani@chromium.org
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/mfd/cros_ec_dev.c                   |  4 ++--
 drivers/platform/chrome/cros_ec_proto.c     | 15 ++++++++-------
 include/linux/platform_data/cros_ec_proto.h |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec_dev.c b/drivers/mfd/cros_ec_dev.c
index 8c08d1c55726..6ee1f410eb53 100644
--- a/drivers/mfd/cros_ec_dev.c
+++ b/drivers/mfd/cros_ec_dev.c
@@ -146,8 +146,8 @@ static int ec_device_probe(struct platform_device *pdev)
 	ec->ec_dev = dev_get_drvdata(dev->parent);
 	ec->dev = dev;
 	ec->cmd_offset = ec_platform->cmd_offset;
-	ec->features[0] = -1U; /* Not cached yet */
-	ec->features[1] = -1U; /* Not cached yet */
+	ec->features.flags[0] = -1U; /* Not cached yet */
+	ec->features.flags[1] = -1U; /* Not cached yet */
 	device_initialize(&ec->class_dev);
 
 	for (i = 0; i < ARRAY_SIZE(cros_mcu_devices); i++) {
diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c
index a9f1867e5d8f..b908cdd680e3 100644
--- a/drivers/platform/chrome/cros_ec_proto.c
+++ b/drivers/platform/chrome/cros_ec_proto.c
@@ -812,36 +812,37 @@ EXPORT_SYMBOL(cros_ec_get_host_event);
  */
 bool cros_ec_check_features(struct cros_ec_dev *ec, int feature)
 {
+	struct ec_response_get_features *features = &ec->features;
 	struct cros_ec_command *msg;
 	int ret;
 
-	if (ec->features[0] == -1U && ec->features[1] == -1U) {
+	if (features->flags[0] == -1U && features->flags[1] == -1U) {
 		/* features bitmap not read yet */
-		msg = kzalloc(sizeof(*msg) + sizeof(ec->features), GFP_KERNEL);
+		msg = kzalloc(sizeof(*msg) + sizeof(*features), GFP_KERNEL);
 		if (!msg) {
 			dev_err(ec->dev, "failed to allocate memory to get EC features\n");
 			return false;
 		}
 
 		msg->command = EC_CMD_GET_FEATURES + ec->cmd_offset;
-		msg->insize = sizeof(ec->features);
+		msg->insize = sizeof(*features);
 
 		ret = cros_ec_cmd_xfer_status(ec->ec_dev, msg);
 		if (ret < 0) {
 			dev_warn(ec->dev, "cannot get EC features: %d/%d\n",
 				 ret, msg->result);
-			memset(ec->features, 0, sizeof(ec->features));
+			memset(features, 0, sizeof(*features));
 		} else {
-			memcpy(ec->features, msg->data, sizeof(ec->features));
+			memcpy(features, msg->data, sizeof(*features));
 		}
 
 		dev_dbg(ec->dev, "EC features %08x %08x\n",
-			ec->features[0], ec->features[1]);
+			features->flags[0], features->flags[1]);
 
 		kfree(msg);
 	}
 
-	return !!(ec->features[feature / 32] & EC_FEATURE_MASK_0(feature));
+	return !!(features->flags[feature / 32] & EC_FEATURE_MASK_0(feature));
 }
 EXPORT_SYMBOL_GPL(cros_ec_check_features);
 
diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h
index 9d370816a419..df3c78c92ca2 100644
--- a/include/linux/platform_data/cros_ec_proto.h
+++ b/include/linux/platform_data/cros_ec_proto.h
@@ -205,7 +205,7 @@ struct cros_ec_dev {
 	struct cros_ec_debugfs *debug_info;
 	bool has_kb_wake_angle;
 	u16 cmd_offset;
-	u32 features[2];
+	struct ec_response_get_features features;
 };
 
 #define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
-- 
cgit v1.2.3


From d89c8169bd7052c78731137da4c4c06986409c62 Mon Sep 17 00:00:00 2001
From: Wu Zongyong <wuzongyong@linux.alibaba.com>
Date: Fri, 29 Oct 2021 17:14:42 +0800
Subject: virtio-pci: introduce legacy device module

Split common codes from virtio-pci-legacy so vDPA driver can reuse it
later.

Signed-off-by: Wu Zongyong <wuzongyong@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/71605acde5e97fcb2760a6973e406279fb1bbd33.1635493219.git.wuzongyong@linux.alibaba.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/Kconfig                 |  10 ++
 drivers/virtio/Makefile                |   1 +
 drivers/virtio/virtio_pci_common.c     |  10 +-
 drivers/virtio/virtio_pci_common.h     |   9 +-
 drivers/virtio/virtio_pci_legacy.c     | 101 ++++-----------
 drivers/virtio/virtio_pci_legacy_dev.c | 220 +++++++++++++++++++++++++++++++++
 include/linux/virtio_pci_legacy.h      |  42 +++++++
 7 files changed, 310 insertions(+), 83 deletions(-)
 create mode 100644 drivers/virtio/virtio_pci_legacy_dev.c
 create mode 100644 include/linux/virtio_pci_legacy.h

(limited to 'include/linux')

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index ce1b3f6ec325..8fcf94cd2c96 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -20,6 +20,15 @@ config VIRTIO_PCI_LIB
 	  PCI device with possible vendor specific extensions. Any
 	  module that selects this module must depend on PCI.
 
+config VIRTIO_PCI_LIB_LEGACY
+	tristate
+	help
+	  Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device)
+	  implementation.
+	  This module implements the basic probe and control for devices
+	  which are based on legacy PCI device. Any module that selects this
+	  module must depend on PCI.
+
 menuconfig VIRTIO_MENU
 	bool "Virtio drivers"
 	default y
@@ -43,6 +52,7 @@ config VIRTIO_PCI_LEGACY
 	bool "Support for legacy virtio draft 0.9.X and older devices"
 	default y
 	depends on VIRTIO_PCI
+	select VIRTIO_PCI_LIB_LEGACY
 	help
           Virtio PCI Card 0.9.X Draft (circa 2014) and older device support.
 
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 699bbea0465f..0a82d0873248 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o
+obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index b35bb2d57f62..d724f676608b 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -549,6 +549,8 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 
 	pci_set_master(pci_dev);
 
+	vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
+
 	rc = register_virtio_device(&vp_dev->vdev);
 	reg_dev = vp_dev;
 	if (rc)
@@ -557,10 +559,10 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 	return 0;
 
 err_register:
-	if (vp_dev->ioaddr)
-	     virtio_pci_legacy_remove(vp_dev);
+	if (vp_dev->is_legacy)
+		virtio_pci_legacy_remove(vp_dev);
 	else
-	     virtio_pci_modern_remove(vp_dev);
+		virtio_pci_modern_remove(vp_dev);
 err_probe:
 	pci_disable_device(pci_dev);
 err_enable_device:
@@ -587,7 +589,7 @@ static void virtio_pci_remove(struct pci_dev *pci_dev)
 
 	unregister_virtio_device(&vp_dev->vdev);
 
-	if (vp_dev->ioaddr)
+	if (vp_dev->is_legacy)
 		virtio_pci_legacy_remove(vp_dev);
 	else
 		virtio_pci_modern_remove(vp_dev);
diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index beec047a8f8d..eb17a29fc7ef 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h
@@ -25,6 +25,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_pci.h>
+#include <linux/virtio_pci_legacy.h>
 #include <linux/virtio_pci_modern.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
@@ -44,16 +45,14 @@ struct virtio_pci_vq_info {
 struct virtio_pci_device {
 	struct virtio_device vdev;
 	struct pci_dev *pci_dev;
+	struct virtio_pci_legacy_device ldev;
 	struct virtio_pci_modern_device mdev;
 
-	/* In legacy mode, these two point to within ->legacy. */
+	bool is_legacy;
+
 	/* Where to read and clear interrupt */
 	u8 __iomem *isr;
 
-	/* Legacy only field */
-	/* the IO mapping for the PCI config space */
-	void __iomem *ioaddr;
-
 	/* a list of queues so we can dispatch IRQs */
 	spinlock_t lock;
 	struct list_head virtqueues;
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index d62e9835aeec..82eb437ad920 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -14,6 +14,7 @@
  *  Michael S. Tsirkin <mst@redhat.com>
  */
 
+#include "linux/virtio_pci_legacy.h"
 #include "virtio_pci_common.h"
 
 /* virtio config->get_features() implementation */
@@ -23,7 +24,7 @@ static u64 vp_get_features(struct virtio_device *vdev)
 
 	/* When someone needs more than 32 feature bits, we'll need to
 	 * steal a bit to indicate that the rest are somewhere else. */
-	return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+	return vp_legacy_get_features(&vp_dev->ldev);
 }
 
 /* virtio config->finalize_features() implementation */
@@ -38,7 +39,7 @@ static int vp_finalize_features(struct virtio_device *vdev)
 	BUG_ON((u32)vdev->features != vdev->features);
 
 	/* We only support 32 feature bits. */
-	iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+	vp_legacy_set_features(&vp_dev->ldev, vdev->features);
 
 	return 0;
 }
@@ -48,7 +49,7 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
 		   void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *ioaddr = vp_dev->ldev.ioaddr +
 			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
 			offset;
 	u8 *ptr = buf;
@@ -64,7 +65,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 		   const void *buf, unsigned len)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	void __iomem *ioaddr = vp_dev->ioaddr +
+	void __iomem *ioaddr = vp_dev->ldev.ioaddr +
 			VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) +
 			offset;
 	const u8 *ptr = buf;
@@ -78,7 +79,7 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 static u8 vp_get_status(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-	return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	return vp_legacy_get_status(&vp_dev->ldev);
 }
 
 static void vp_set_status(struct virtio_device *vdev, u8 status)
@@ -86,28 +87,24 @@ static void vp_set_status(struct virtio_device *vdev, u8 status)
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* We should never be setting status to 0. */
 	BUG_ON(status == 0);
-	iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	vp_legacy_set_status(&vp_dev->ldev, status);
 }
 
 static void vp_reset(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 	/* 0 status means a reset. */
-	iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	vp_legacy_set_status(&vp_dev->ldev, 0);
 	/* Flush out the status write, and flush in device writes,
 	 * including MSi-X interrupts, if any. */
-	ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
+	vp_legacy_get_status(&vp_dev->ldev);
 	/* Flush pending VQ/configuration callbacks. */
 	vp_synchronize_vectors(vdev);
 }
 
 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 {
-	/* Setup the vector used for configuration events */
-	iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-	/* Verify we had enough resources to assign the vector */
-	/* Will also flush the write out to device */
-	return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	return vp_legacy_config_vector(&vp_dev->ldev, vector);
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@@ -123,12 +120,9 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	int err;
 	u64 q_pfn;
 
-	/* Select the queue we're interested in */
-	iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
 	/* Check if queue is either not available or already active. */
-	num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
-	if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
+	num = vp_legacy_get_queue_size(&vp_dev->ldev, index);
+	if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index))
 		return ERR_PTR(-ENOENT);
 
 	info->msix_vector = msix_vec;
@@ -151,13 +145,12 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	}
 
 	/* activate the queue */
-	iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn);
 
-	vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
+	vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
 
 	if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-		iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
-		msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec);
 		if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 			err = -EBUSY;
 			goto out_deactivate;
@@ -167,7 +160,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 	return vq;
 
 out_deactivate:
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, index, 0);
 out_del_vq:
 	vring_del_virtqueue(vq);
 	return ERR_PTR(err);
@@ -178,17 +171,15 @@ static void del_vq(struct virtio_pci_vq_info *info)
 	struct virtqueue *vq = info->vq;
 	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 
-	iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
-
 	if (vp_dev->msix_enabled) {
-		iowrite16(VIRTIO_MSI_NO_VECTOR,
-			  vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+		vp_legacy_queue_vector(&vp_dev->ldev, vq->index,
+				VIRTIO_MSI_NO_VECTOR);
 		/* Flush the write out to device */
-		ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+		ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR);
 	}
 
 	/* Select and deactivate the queue */
-	iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+	vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0);
 
 	vring_del_virtqueue(vq);
 }
@@ -211,51 +202,18 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
 /* the PCI probing function */
 int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 {
+	struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
 	struct pci_dev *pci_dev = vp_dev->pci_dev;
 	int rc;
 
-	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
-	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
-		return -ENODEV;
-
-	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
-		printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
-		       VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
-		return -ENODEV;
-	}
-
-	rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
-	if (rc) {
-		rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
-	} else {
-		/*
-		 * The virtio ring base address is expressed as a 32-bit PFN,
-		 * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
-		 */
-		dma_set_coherent_mask(&pci_dev->dev,
-				DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
-	}
-
-	if (rc)
-		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+	ldev->pci_dev = pci_dev;
 
-	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
+	rc = vp_legacy_probe(ldev);
 	if (rc)
 		return rc;
 
-	rc = -ENOMEM;
-	vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
-	if (!vp_dev->ioaddr)
-		goto err_iomap;
-
-	vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR;
-
-	/* we use the subsystem vendor/device id as the virtio vendor/device
-	 * id.  this allows us to use the same PCI vendor/device id for all
-	 * virtio devices and to identify the particular virtio driver by
-	 * the subsystem ids */
-	vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
-	vp_dev->vdev.id.device = pci_dev->subsystem_device;
+	vp_dev->isr = ldev->isr;
+	vp_dev->vdev.id = ldev->id;
 
 	vp_dev->vdev.config = &virtio_pci_config_ops;
 
@@ -264,16 +222,11 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
 	vp_dev->del_vq = del_vq;
 
 	return 0;
-
-err_iomap:
-	pci_release_region(pci_dev, 0);
-	return rc;
 }
 
 void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev)
 {
-	struct pci_dev *pci_dev = vp_dev->pci_dev;
+	struct virtio_pci_legacy_device *ldev = &vp_dev->ldev;
 
-	pci_iounmap(pci_dev, vp_dev->ioaddr);
-	pci_release_region(pci_dev, 0);
+	vp_legacy_remove(ldev);
 }
diff --git a/drivers/virtio/virtio_pci_legacy_dev.c b/drivers/virtio/virtio_pci_legacy_dev.c
new file mode 100644
index 000000000000..9b97680dd02b
--- /dev/null
+++ b/drivers/virtio/virtio_pci_legacy_dev.c
@@ -0,0 +1,220 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "linux/virtio_pci.h"
+#include <linux/virtio_pci_legacy.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+
+/*
+ * vp_legacy_probe: probe the legacy virtio pci device, note that the
+ * caller is required to enable PCI device before calling this function.
+ * @ldev: the legacy virtio-pci device
+ *
+ * Return 0 on succeed otherwise fail
+ */
+int vp_legacy_probe(struct virtio_pci_legacy_device *ldev)
+{
+	struct pci_dev *pci_dev = ldev->pci_dev;
+	int rc;
+
+	/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
+	if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
+		return -ENODEV;
+
+	if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION)
+		return -ENODEV;
+
+	rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32));
+	} else {
+		/*
+		 * The virtio ring base address is expressed as a 32-bit PFN,
+		 * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT.
+		 */
+		dma_set_coherent_mask(&pci_dev->dev,
+				DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT));
+	}
+
+	if (rc)
+		dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
+	rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
+	if (rc)
+		return rc;
+
+	ldev->ioaddr = pci_iomap(pci_dev, 0, 0);
+	if (!ldev->ioaddr)
+		goto err_iomap;
+
+	ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR;
+
+	ldev->id.vendor = pci_dev->subsystem_vendor;
+	ldev->id.device = pci_dev->subsystem_device;
+
+	return 0;
+err_iomap:
+	pci_release_region(pci_dev, 0);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(vp_legacy_probe);
+
+/*
+ * vp_legacy_probe: remove and cleanup the legacy virtio pci device
+ * @ldev: the legacy virtio-pci device
+ */
+void vp_legacy_remove(struct virtio_pci_legacy_device *ldev)
+{
+	struct pci_dev *pci_dev = ldev->pci_dev;
+
+	pci_iounmap(pci_dev, ldev->ioaddr);
+	pci_release_region(pci_dev, 0);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_remove);
+
+/*
+ * vp_legacy_get_features - get features from device
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the features read from the device
+ */
+u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev)
+{
+
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_features);
+
+/*
+ * vp_legacy_get_driver_features - get driver features from device
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the driver features read from the device
+ */
+u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev)
+{
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features);
+
+/*
+ * vp_legacy_set_features - set features to device
+ * @ldev: the legacy virtio-pci device
+ * @features: the features set to device
+ */
+void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
+			    u32 features)
+{
+	iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_features);
+
+/*
+ * vp_legacy_get_status - get the device status
+ * @ldev: the legacy virtio-pci device
+ *
+ * Returns the status read from device
+ */
+u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev)
+{
+	return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_status);
+
+/*
+ * vp_legacy_set_status - set status to device
+ * @ldev: the legacy virtio-pci device
+ * @status: the status set to device
+ */
+void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
+				 u8 status)
+{
+	iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_status);
+
+/*
+ * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: queue index
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
+			   u16 index, u16 vector)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+	/* Flush the write out to device */
+	return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_queue_vector);
+
+/*
+ * vp_legacy_config_vector - set the vector for config interrupt
+ * @ldev: the legacy virtio-pci device
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
+			    u16 vector)
+{
+	/* Setup the vector used for configuration events */
+	iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+	/* Verify we had enough resources to assign the vector */
+	/* Will also flush the write out to device */
+	return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_config_vector);
+
+/*
+ * vp_legacy_set_queue_address - set the virtqueue address
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ * @queue_pfn: pfn of the virtqueue
+ */
+void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
+			     u16 index, u32 queue_pfn)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address);
+
+/*
+ * vp_legacy_get_queue_enable - enable a virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ *
+ * Returns whether a virtqueue is enabled or not
+ */
+bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
+				u16 index)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable);
+
+/*
+ * vp_legacy_get_queue_size - get size for a virtqueue
+ * @ldev: the legacy virtio-pci device
+ * @index: the queue index
+ *
+ * Returns the size of the virtqueue
+ */
+u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
+			     u16 index)
+{
+	iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+	return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
+}
+EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size);
+
+MODULE_VERSION("0.1");
+MODULE_DESCRIPTION("Legacy Virtio PCI Device");
+MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio_pci_legacy.h b/include/linux/virtio_pci_legacy.h
new file mode 100644
index 000000000000..e5d665faf00e
--- /dev/null
+++ b/include/linux/virtio_pci_legacy.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_PCI_LEGACY_H
+#define _LINUX_VIRTIO_PCI_LEGACY_H
+
+#include "linux/mod_devicetable.h"
+#include <linux/pci.h>
+#include <linux/virtio_pci.h>
+
+struct virtio_pci_legacy_device {
+	struct pci_dev *pci_dev;
+
+	/* Where to read and clear interrupt */
+	u8 __iomem *isr;
+	/* The IO mapping for the PCI config space (legacy mode only) */
+	void __iomem *ioaddr;
+
+	struct virtio_device_id id;
+};
+
+u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev);
+u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev,
+			u32 features);
+u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev,
+			u8 status);
+u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev,
+			   u16 idx, u16 vector);
+u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev,
+		     u16 vector);
+void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev,
+			     u16 index, u32 queue_pfn);
+bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev,
+				u16 idx);
+void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev,
+			      u16 idx, u16 size);
+u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev,
+			     u16 idx);
+int vp_legacy_probe(struct virtio_pci_legacy_device *ldev);
+void vp_legacy_remove(struct virtio_pci_legacy_device *ldev);
+
+#endif
-- 
cgit v1.2.3


From d0ae1fbfcff48e889bf993ba16890e30f6615593 Mon Sep 17 00:00:00 2001
From: Wu Zongyong <wuzongyong@linux.alibaba.com>
Date: Fri, 29 Oct 2021 17:14:43 +0800
Subject: vdpa: fix typo

Signed-off-by: Wu Zongyong <wuzongyong@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/4b5153262e4ba64986bb567d7425ad4829ca7bcc.1635493219.git.wuzongyong@linux.alibaba.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 3972ab765de1..a896ee021e5f 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -257,7 +257,7 @@ struct vdpa_config_ops {
 	struct vdpa_notification_area
 	(*get_vq_notification)(struct vdpa_device *vdev, u16 idx);
 	/* vq irq is not expected to be changed once DRIVER_OK is set */
-	int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx);
+	int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx);
 
 	/* Device ops */
 	u32 (*get_vq_align)(struct vdpa_device *vdev);
-- 
cgit v1.2.3


From 3b970a5842c9114c82e60744c84a7d06ee51b6f9 Mon Sep 17 00:00:00 2001
From: Wu Zongyong <wuzongyong@linux.alibaba.com>
Date: Fri, 29 Oct 2021 17:14:45 +0800
Subject: vdpa: add new callback get_vq_num_min in vdpa_config_ops

This callback is optional. For vdpa devices that not support to change
virtqueue size, get_vq_num_min and get_vq_num_max will return the same
value, so that users can choose a correct value for that device.

Suggested-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Wu Zongyong <wuzongyong@linux.alibaba.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/f4af5b0abd660d9a29ab6b2f67bd6df10284a230.1635493219.git.wuzongyong@linux.alibaba.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index a896ee021e5f..30864848950b 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -171,6 +171,9 @@ struct vdpa_map_file {
  * @get_vq_num_max:		Get the max size of virtqueue
  *				@vdev: vdpa device
  *				Returns u16: max size of virtqueue
+ * @get_vq_num_min:		Get the min size of virtqueue (optional)
+ *				@vdev: vdpa device
+ *				Returns u16: min size of virtqueue
  * @get_device_id:		Get virtio device id
  *				@vdev: vdpa device
  *				Returns u32: virtio device id
@@ -266,6 +269,7 @@ struct vdpa_config_ops {
 	void (*set_config_cb)(struct vdpa_device *vdev,
 			      struct vdpa_callback *cb);
 	u16 (*get_vq_num_max)(struct vdpa_device *vdev);
+	u16 (*get_vq_num_min)(struct vdpa_device *vdev);
 	u32 (*get_device_id)(struct vdpa_device *vdev);
 	u32 (*get_vendor_id)(struct vdpa_device *vdev);
 	u8 (*get_status)(struct vdpa_device *vdev);
-- 
cgit v1.2.3


From d50497eb4e554e1f0351e1836ee7241c059592e6 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 19 Oct 2021 15:01:45 +0800
Subject: virtio_config: introduce a new .enable_cbs method

This patch introduces a new method to enable the callbacks for config
and virtqueues. This will be used for making sure the virtqueue
callbacks are only enabled after virtio_device_ready() if transport
implements this method.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20211019070152.8236-4-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 8519b3ae5d52..4d107ad31149 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -23,6 +23,8 @@ struct virtio_shm_region {
  *       any of @get/@set, @get_status/@set_status, or @get_features/
  *       @finalize_features are NOT safe to be called from an atomic
  *       context.
+ * @enable_cbs: enable the callbacks
+ *      vdev: the virtio_device
  * @get: read the value of a configuration field
  *	vdev: the virtio_device
  *	offset: the offset of the configuration field
@@ -75,6 +77,7 @@ struct virtio_shm_region {
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
+	void (*enable_cbs)(struct virtio_device *vdev);
 	void (*get)(struct virtio_device *vdev, unsigned offset,
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
@@ -229,6 +232,9 @@ void virtio_device_ready(struct virtio_device *dev)
 {
 	unsigned status = dev->config->get_status(dev);
 
+	if (dev->config->enable_cbs)
+                  dev->config->enable_cbs(dev);
+
 	BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
 	dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
 }
-- 
cgit v1.2.3


From 939779f5152d161b34f612af29e7dc1ac4472fcf Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 27 Oct 2021 10:21:04 +0800
Subject: virtio_ring: validate used buffer length

This patch validate the used buffer length provided by the device
before trying to use it. This is done by record the in buffer length
in a new field in desc_state structure during virtqueue_add(), then we
can fail the virtqueue_get_buf() when we find the device is trying to
give us a used buffer length which is greater than the in buffer
length.

Since some drivers have already done the validation by themselves,
this patch tries to makes the core validation optional. For the driver
that doesn't want the validation, it can set the
suppress_used_validation to be true (which could be overridden by
force_used_validation module parameter). To be more efficient, a
dedicate array is used for storing the validate used length, this
helps to eliminate the cache stress if validation is done by the
driver.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20211027022107.14357-2-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 60 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/virtio.h       |  2 ++
 2 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 6d2614e34470..00f64f2f8b72 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -14,6 +14,9 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>
 
+static bool force_used_validation = false;
+module_param(force_used_validation, bool, 0444);
+
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)				\
@@ -182,6 +185,9 @@ struct vring_virtqueue {
 		} packed;
 	};
 
+	/* Per-descriptor in buffer length */
+	u32 *buflen;
+
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
@@ -490,6 +496,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	unsigned int i, n, avail, descs_used, prev, err_idx;
 	int head;
 	bool indirect;
+	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -571,6 +578,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
 						     indirect);
+			buflen += sg->length;
 		}
 	}
 	/* Last one doesn't continue. */
@@ -610,6 +618,10 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	else
 		vq->split.desc_state[head].indir_desc = ctx;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[head] = buflen;
+
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -784,6 +796,11 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[i])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[i]);
+		return NULL;
+	}
 
 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
@@ -1062,6 +1079,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
+	u32 buflen = 0;
 
 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
@@ -1091,6 +1109,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 			desc[i].addr = cpu_to_le64(addr);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}
 
@@ -1144,6 +1164,10 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -1179,6 +1203,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr, avail_used_flags;
 	int err;
+	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -1258,6 +1283,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 					1 << VRING_PACKED_DESC_F_AVAIL |
 					1 << VRING_PACKED_DESC_F_USED;
 			}
+			if (n >= out_sgs)
+				buflen += sg->length;
 		}
 	}
 
@@ -1277,6 +1304,10 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
+	/* Store in buffer length if necessary */
+	if (vq->buflen)
+		vq->buflen[id] = buflen;
+
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
 	 * available before all subsequent descriptors comprising
@@ -1463,6 +1494,11 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", id);
 		return NULL;
 	}
+	if (vq->buflen && unlikely(*len > vq->buflen[id])) {
+		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
+			*len, vq->buflen[id]);
+		return NULL;
+	}
 
 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
@@ -1668,6 +1704,7 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	struct vring_virtqueue *vq;
 	struct vring_packed_desc *ring;
 	struct vring_packed_desc_event *driver, *device;
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
 	size_t ring_size_in_bytes, event_size_in_bytes;
 
@@ -1757,6 +1794,15 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (!vq->packed.desc_extra)
 		goto err_desc_extra;
 
+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1769,6 +1815,8 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
+err_buflen:
+	kfree(vq->packed.desc_extra);
 err_desc_extra:
 	kfree(vq->packed.desc_state);
 err_desc_state:
@@ -2176,6 +2224,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
+	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	struct vring_virtqueue *vq;
 
 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2235,6 +2284,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (!vq->split.desc_extra)
 		goto err_extra;
 
+	if (!drv->suppress_used_validation || force_used_validation) {
+		vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
+					   GFP_KERNEL);
+		if (!vq->buflen)
+			goto err_buflen;
+	} else {
+		vq->buflen = NULL;
+	}
+
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	memset(vq->split.desc_state, 0, vring.num *
@@ -2245,6 +2303,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
+err_buflen:
+	kfree(vq->split.desc_extra);
 err_extra:
 	kfree(vq->split.desc_state);
 err_state:
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 41edbc01ffa4..44d0e09da2d9 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -152,6 +152,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -168,6 +169,7 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
+	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
-- 
cgit v1.2.3


From 6dbb1f1687a2ccdfc5b84b0a35bbc6dfefc4de3b Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 26 Oct 2021 20:55:12 +0300
Subject: vdpa: Introduce and use vdpa device get, set config helpers

Subsequent patches enable get and set configuration either
via management device or via vdpa device' config ops.

This requires synchronization between multiple callers to get and set
config callbacks. Features setting also influence the layout of the
configuration fields endianness.

To avoid exposing synchronization primitives to callers, introduce
helper for setting the configuration and use it.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20211026175519.87795-2-parav@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vdpa/vdpa.c          | 36 ++++++++++++++++++++++++++++++++++++
 drivers/vhost/vdpa.c         |  3 +--
 drivers/virtio/virtio_vdpa.c |  3 +--
 include/linux/vdpa.h         | 19 ++++---------------
 4 files changed, 42 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index fcf02a364878..cbc8fc69cf9b 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -297,6 +297,42 @@ void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
 }
 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
 
+/**
+ * vdpa_get_config - Get one or more device configuration fields.
+ * @vdev: vdpa device to operate on
+ * @offset: starting byte offset of the field
+ * @buf: buffer pointer to read to
+ * @len: length of the configuration fields in bytes
+ */
+void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
+		     void *buf, unsigned int len)
+{
+	const struct vdpa_config_ops *ops = vdev->config;
+
+	/*
+	 * Config accesses aren't supposed to trigger before features are set.
+	 * If it does happen we assume a legacy guest.
+	 */
+	if (!vdev->features_valid)
+		vdpa_set_features(vdev, 0);
+	ops->get_config(vdev, offset, buf, len);
+}
+EXPORT_SYMBOL_GPL(vdpa_get_config);
+
+/**
+ * vdpa_set_config - Set one or more device configuration fields.
+ * @vdev: vdpa device to operate on
+ * @offset: starting byte offset of the field
+ * @buf: buffer pointer to read from
+ * @length: length of the configuration fields in bytes
+ */
+void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
+		     const void *buf, unsigned int length)
+{
+	vdev->config->set_config(vdev, offset, buf, length);
+}
+EXPORT_SYMBOL_GPL(vdpa_set_config);
+
 static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
 				 const char *busname, const char *devname)
 {
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 39039e046117..01c59ce7e250 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -237,7 +237,6 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 				  struct vhost_vdpa_config __user *c)
 {
 	struct vdpa_device *vdpa = v->vdpa;
-	const struct vdpa_config_ops *ops = vdpa->config;
 	struct vhost_vdpa_config config;
 	unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 	u8 *buf;
@@ -251,7 +250,7 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 	if (IS_ERR(buf))
 		return PTR_ERR(buf);
 
-	ops->set_config(vdpa, config.off, buf, config.len);
+	vdpa_set_config(vdpa, config.off, buf, config.len);
 
 	kvfree(buf);
 	return 0;
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 6b62aaf08cc5..f85f860bc10b 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -65,9 +65,8 @@ static void virtio_vdpa_set(struct virtio_device *vdev, unsigned offset,
 			    const void *buf, unsigned len)
 {
 	struct vdpa_device *vdpa = vd_get_vdpa(vdev);
-	const struct vdpa_config_ops *ops = vdpa->config;
 
-	ops->set_config(vdpa, offset, buf, len);
+	vdpa_set_config(vdpa, offset, buf, len);
 }
 
 static u32 virtio_vdpa_generation(struct virtio_device *vdev)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 30864848950b..267236aab34c 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -386,21 +386,10 @@ static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
 	return ops->set_features(vdev, features);
 }
 
-static inline void vdpa_get_config(struct vdpa_device *vdev,
-				   unsigned int offset, void *buf,
-				   unsigned int len)
-{
-	const struct vdpa_config_ops *ops = vdev->config;
-
-	/*
-	 * Config accesses aren't supposed to trigger before features are set.
-	 * If it does happen we assume a legacy guest.
-	 */
-	if (!vdev->features_valid)
-		vdpa_set_features(vdev, 0);
-	ops->get_config(vdev, offset, buf, len);
-}
-
+void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
+		     void *buf, unsigned int len);
+void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
+		     const void *buf, unsigned int length);
 /**
  * struct vdpa_mgmtdev_ops - vdpa device ops
  * @dev_add: Add a vdpa device using alloc and register
-- 
cgit v1.2.3


From ad69dd0bf26b88ec6ab26f8bbe5cd74fbed7672a Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 26 Oct 2021 20:55:13 +0300
Subject: vdpa: Introduce query of device config layout

Introduce a command to query a device config layout.

An example query of network vdpa device:

$ vdpa dev add name bar mgmtdev vdpasim_net

$ vdpa dev config show
bar: mac 00:35:09:19:48:05 link up link_announce false mtu 1500

$ vdpa dev config show -jp
{
    "config": {
        "bar": {
            "mac": "00:35:09:19:48:05",
            "link ": "up",
            "link_announce ": false,
            "mtu": 1500,
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20211026175519.87795-3-parav@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vdpa/vdpa.c       | 176 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/vdpa.h      |   2 +
 include/uapi/linux/vdpa.h |   6 ++
 3 files changed, 184 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index cbc8fc69cf9b..8fcbdda8590c 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -14,6 +14,8 @@
 #include <uapi/linux/vdpa.h>
 #include <net/genetlink.h>
 #include <linux/mod_devicetable.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ids.h>
 
 static LIST_HEAD(mdev_head);
 /* A global mutex that protects vdpa management device and device level operations. */
@@ -66,6 +68,7 @@ static void vdpa_release_dev(struct device *d)
 		ops->free(vdev);
 
 	ida_simple_remove(&vdpa_index_ida, vdev->index);
+	mutex_destroy(&vdev->cf_mutex);
 	kfree(vdev);
 }
 
@@ -127,6 +130,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
 	if (err)
 		goto err_name;
 
+	mutex_init(&vdev->cf_mutex);
 	device_initialize(&vdev->dev);
 
 	return vdev;
@@ -309,6 +313,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
 {
 	const struct vdpa_config_ops *ops = vdev->config;
 
+	mutex_lock(&vdev->cf_mutex);
 	/*
 	 * Config accesses aren't supposed to trigger before features are set.
 	 * If it does happen we assume a legacy guest.
@@ -316,6 +321,7 @@ void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
 	if (!vdev->features_valid)
 		vdpa_set_features(vdev, 0);
 	ops->get_config(vdev, offset, buf, len);
+	mutex_unlock(&vdev->cf_mutex);
 }
 EXPORT_SYMBOL_GPL(vdpa_get_config);
 
@@ -329,7 +335,9 @@ EXPORT_SYMBOL_GPL(vdpa_get_config);
 void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset,
 		     const void *buf, unsigned int length)
 {
+	mutex_lock(&vdev->cf_mutex);
 	vdev->config->set_config(vdev, offset, buf, length);
+	mutex_unlock(&vdev->cf_mutex);
 }
 EXPORT_SYMBOL_GPL(vdpa_set_config);
 
@@ -661,6 +669,168 @@ static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callba
 	return msg->len;
 }
 
+static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev,
+				       struct sk_buff *msg, u64 features,
+				       const struct virtio_net_config *config)
+{
+	u16 val_u16;
+
+	if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0)
+		return 0;
+
+	val_u16 = le16_to_cpu(config->max_virtqueue_pairs);
+	return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16);
+}
+
+static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg)
+{
+	struct virtio_net_config config = {};
+	u64 features;
+	u16 val_u16;
+
+	vdpa_get_config(vdev, 0, &config, sizeof(config));
+
+	if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac),
+		    config.mac))
+		return -EMSGSIZE;
+
+	val_u16 = le16_to_cpu(config.status);
+	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16))
+		return -EMSGSIZE;
+
+	val_u16 = le16_to_cpu(config.mtu);
+	if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16))
+		return -EMSGSIZE;
+
+	features = vdev->config->get_features(vdev);
+
+	return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config);
+}
+
+static int
+vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
+		     int flags, struct netlink_ext_ack *extack)
+{
+	u32 device_id;
+	void *hdr;
+	int err;
+
+	hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags,
+			  VDPA_CMD_DEV_CONFIG_GET);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) {
+		err = -EMSGSIZE;
+		goto msg_err;
+	}
+
+	device_id = vdev->config->get_device_id(vdev);
+	if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) {
+		err = -EMSGSIZE;
+		goto msg_err;
+	}
+
+	switch (device_id) {
+	case VIRTIO_ID_NET:
+		err = vdpa_dev_net_config_fill(vdev, msg);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+	if (err)
+		goto msg_err;
+
+	genlmsg_end(msg, hdr);
+	return 0;
+
+msg_err:
+	genlmsg_cancel(msg, hdr);
+	return err;
+}
+
+static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct vdpa_device *vdev;
+	struct sk_buff *msg;
+	const char *devname;
+	struct device *dev;
+	int err;
+
+	if (!info->attrs[VDPA_ATTR_DEV_NAME])
+		return -EINVAL;
+	devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	mutex_lock(&vdpa_dev_mutex);
+	dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
+	if (!dev) {
+		NL_SET_ERR_MSG_MOD(info->extack, "device not found");
+		err = -ENODEV;
+		goto dev_err;
+	}
+	vdev = container_of(dev, struct vdpa_device, dev);
+	if (!vdev->mdev) {
+		NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device");
+		err = -EINVAL;
+		goto mdev_err;
+	}
+	err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq,
+				   0, info->extack);
+	if (!err)
+		err = genlmsg_reply(msg, info);
+
+mdev_err:
+	put_device(dev);
+dev_err:
+	mutex_unlock(&vdpa_dev_mutex);
+	if (err)
+		nlmsg_free(msg);
+	return err;
+}
+
+static int vdpa_dev_config_dump(struct device *dev, void *data)
+{
+	struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
+	struct vdpa_dev_dump_info *info = data;
+	int err;
+
+	if (!vdev->mdev)
+		return 0;
+	if (info->idx < info->start_idx) {
+		info->idx++;
+		return 0;
+	}
+	err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
+				   info->cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				   info->cb->extack);
+	if (err)
+		return err;
+
+	info->idx++;
+	return 0;
+}
+
+static int
+vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+	struct vdpa_dev_dump_info info;
+
+	info.msg = msg;
+	info.cb = cb;
+	info.start_idx = cb->args[0];
+	info.idx = 0;
+
+	mutex_lock(&vdpa_dev_mutex);
+	bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump);
+	mutex_unlock(&vdpa_dev_mutex);
+	cb->args[0] = info.idx;
+	return msg->len;
+}
+
 static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
 	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
@@ -692,6 +862,12 @@ static const struct genl_ops vdpa_nl_ops[] = {
 		.doit = vdpa_nl_cmd_dev_get_doit,
 		.dumpit = vdpa_nl_cmd_dev_get_dumpit,
 	},
+	{
+		.cmd = VDPA_CMD_DEV_CONFIG_GET,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = vdpa_nl_cmd_dev_config_get_doit,
+		.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
+	},
 };
 
 static struct genl_family vdpa_nl_family __ro_after_init = {
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 267236aab34c..5cc5e501397f 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -63,6 +63,7 @@ struct vdpa_mgmt_dev;
  * @dev: underlying device
  * @dma_dev: the actual device that is performing DMA
  * @config: the configuration ops for this device.
+ * @cf_mutex: Protects get and set access to configuration layout.
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
  * @use_va: indicate whether virtual address must be used by this device
@@ -74,6 +75,7 @@ struct vdpa_device {
 	struct device dev;
 	struct device *dma_dev;
 	const struct vdpa_config_ops *config;
+	struct mutex cf_mutex; /* Protects get/set config */
 	unsigned int index;
 	bool features_valid;
 	bool use_va;
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index e3b87879514c..a252f06f9dfd 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -17,6 +17,7 @@ enum vdpa_command {
 	VDPA_CMD_DEV_NEW,
 	VDPA_CMD_DEV_DEL,
 	VDPA_CMD_DEV_GET,		/* can dump */
+	VDPA_CMD_DEV_CONFIG_GET,	/* can dump */
 };
 
 enum vdpa_attr {
@@ -34,6 +35,11 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_MAX_VQ_SIZE,		/* u16 */
 	VDPA_ATTR_DEV_MIN_VQ_SIZE,		/* u16 */
 
+	VDPA_ATTR_DEV_NET_CFG_MACADDR,		/* binary */
+	VDPA_ATTR_DEV_NET_STATUS,		/* u8 */
+	VDPA_ATTR_DEV_NET_CFG_MAX_VQP,		/* u16 */
+	VDPA_ATTR_DEV_NET_CFG_MTU,		/* u16 */
+
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.2.3


From 960deb33be3d08e55a39e40e0286a51c7448e053 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 26 Oct 2021 20:55:14 +0300
Subject: vdpa: Use kernel coding style for structure comments

As subsequent patch adds new structure field with comment, move the
structure comment to follow kernel coding style.

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://lore.kernel.org/r/20211026175519.87795-4-parav@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/vdpa.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index 5cc5e501397f..fafb7202482c 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -411,10 +411,17 @@ struct vdpa_mgmtdev_ops {
 	void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
 };
 
+/**
+ * struct vdpa_mgmt_dev - vdpa management device
+ * @device: Management parent device
+ * @ops: operations supported by management device
+ * @id_table: Pointer to device id table of supported ids
+ * @list: list entry
+ */
 struct vdpa_mgmt_dev {
 	struct device *device;
 	const struct vdpa_mgmtdev_ops *ops;
-	const struct virtio_device_id *id_table; /* supported ids */
+	const struct virtio_device_id *id_table;
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From d8ca2fa5be1bdb9d08cfe1f831cddb622a01dfd4 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Tue, 26 Oct 2021 20:55:15 +0300
Subject: vdpa: Enable user to set mac and mtu of vdpa device

$ vdpa dev add name bar mgmtdev vdpasim_net mac 00:11:22:33:44:55 mtu 9000

$ vdpa dev config show
bar: mac 00:11:22:33:44:55 link up link_announce false mtu 9000

$ vdpa dev config show -jp
{
    "config": {
        "bar": {
            "mac": "00:11:22:33:44:55",
            "link ": "up",
            "link_announce ": false,
            "mtu": 9000,
        }
    }
}

Signed-off-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Eli Cohen <elic@nvidia.com>
Acked-by: Jason Wang <jasowang@redhat.com>

Link: https://lore.kernel.org/r/20211026175519.87795-5-parav@nvidia.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 drivers/vdpa/ifcvf/ifcvf_main.c      |  3 ++-
 drivers/vdpa/mlx5/net/mlx5_vnet.c    |  3 ++-
 drivers/vdpa/vdpa.c                  | 38 ++++++++++++++++++++++++++++++++++--
 drivers/vdpa/vdpa_sim/vdpa_sim_blk.c |  3 ++-
 drivers/vdpa/vdpa_sim/vdpa_sim_net.c |  3 ++-
 drivers/vdpa/vdpa_user/vduse_dev.c   |  3 ++-
 include/linux/vdpa.h                 | 17 +++++++++++++++-
 7 files changed, 62 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index dcd648e1f7e7..6dc75ca70b37 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -499,7 +499,8 @@ static u32 get_dev_type(struct pci_dev *pdev)
 	return dev_type;
 }
 
-static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			      const struct vdpa_dev_set_config *config)
 {
 	struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
 	struct ifcvf_adapter *adapter;
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index b5bd1a553256..6bbdc0ece707 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2482,7 +2482,8 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p
 	return ret;
 }
 
-static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
+static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
+			     const struct vdpa_dev_set_config *add_config)
 {
 	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
 	struct virtio_net_config *config;
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 8fcbdda8590c..7332a74a4b00 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -14,7 +14,6 @@
 #include <uapi/linux/vdpa.h>
 #include <net/genetlink.h>
 #include <linux/mod_devicetable.h>
-#include <linux/virtio_net.h>
 #include <linux/virtio_ids.h>
 
 static LIST_HEAD(mdev_head);
@@ -480,9 +479,15 @@ out:
 	return msg->len;
 }
 
+#define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \
+				 (1 << VDPA_ATTR_DEV_NET_CFG_MTU))
+
 static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
 {
+	struct vdpa_dev_set_config config = {};
+	struct nlattr **nl_attrs = info->attrs;
 	struct vdpa_mgmt_dev *mdev;
+	const u8 *macaddr;
 	const char *name;
 	int err = 0;
 
@@ -491,6 +496,26 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
 
 	name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
 
+	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) {
+		macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]);
+		memcpy(config.net.mac, macaddr, sizeof(config.net.mac));
+		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR);
+	}
+	if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) {
+		config.net.mtu =
+			nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]);
+		config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU);
+	}
+
+	/* Skip checking capability if user didn't prefer to configure any
+	 * device networking attributes. It is likely that user might have used
+	 * a device specific method to configure such attributes or using device
+	 * default attributes.
+	 */
+	if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) &&
+	    !netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
 	mutex_lock(&vdpa_dev_mutex);
 	mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
 	if (IS_ERR(mdev)) {
@@ -498,8 +523,14 @@ static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *i
 		err = PTR_ERR(mdev);
 		goto err;
 	}
+	if ((config.mask & mdev->config_attr_mask) != config.mask) {
+		NL_SET_ERR_MSG_MOD(info->extack,
+				   "All provided attributes are not supported");
+		err = -EOPNOTSUPP;
+		goto err;
+	}
 
-	err = mdev->ops->dev_add(mdev, name);
+	err = mdev->ops->dev_add(mdev, name, &config);
 err:
 	mutex_unlock(&vdpa_dev_mutex);
 	return err;
@@ -835,6 +866,9 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
 	[VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
 	[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
 	[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
+	[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+	/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
+	[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
 };
 
 static const struct genl_ops vdpa_nl_ops[] = {
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
index a790903f243e..42d401d43911 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
@@ -248,7 +248,8 @@ static struct device vdpasim_blk_mgmtdev = {
 	.release = vdpasim_blk_mgmtdev_release,
 };
 
-static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			       const struct vdpa_dev_set_config *config)
 {
 	struct vdpasim_dev_attr dev_attr = {};
 	struct vdpasim *simdev;
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
index a1ab6163f7d1..d681e423e64f 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
@@ -126,7 +126,8 @@ static struct device vdpasim_net_mgmtdev = {
 	.release = vdpasim_net_mgmtdev_release,
 };
 
-static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			       const struct vdpa_dev_set_config *config)
 {
 	struct vdpasim_dev_attr dev_attr = {};
 	struct vdpasim *simdev;
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 841667a896dd..c9204c62f339 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1503,7 +1503,8 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
 	return 0;
 }
 
-static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
+			const struct vdpa_dev_set_config *config)
 {
 	struct vduse_dev *dev;
 	int ret;
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h
index fafb7202482c..c3011ccda430 100644
--- a/include/linux/vdpa.h
+++ b/include/linux/vdpa.h
@@ -6,6 +6,8 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <linux/vhost_iotlb.h>
+#include <linux/virtio_net.h>
+#include <linux/if_ether.h>
 
 /**
  * struct vdpa_calllback - vDPA callback definition.
@@ -93,6 +95,14 @@ struct vdpa_iova_range {
 	u64 last;
 };
 
+struct vdpa_dev_set_config {
+	struct {
+		u8 mac[ETH_ALEN];
+		u16 mtu;
+	} net;
+	u64 mask;
+};
+
 /**
  * Corresponding file area for device memory mapping
  * @file: vma->vm_file for the mapping
@@ -397,6 +407,7 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
  * @dev_add: Add a vdpa device using alloc and register
  *	     @mdev: parent device to use for device addition
  *	     @name: name of the new vdpa device
+ *	     @config: config attributes to apply to the device under creation
  *	     Driver need to add a new device using _vdpa_register_device()
  *	     after fully initializing the vdpa device. Driver must return 0
  *	     on success or appropriate error code.
@@ -407,7 +418,8 @@ void vdpa_set_config(struct vdpa_device *dev, unsigned int offset,
  *	     _vdpa_unregister_device().
  */
 struct vdpa_mgmtdev_ops {
-	int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
+	int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name,
+		       const struct vdpa_dev_set_config *config);
 	void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
 };
 
@@ -416,12 +428,15 @@ struct vdpa_mgmtdev_ops {
  * @device: Management parent device
  * @ops: operations supported by management device
  * @id_table: Pointer to device id table of supported ids
+ * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that
+ *		      management device support during dev_add callback
  * @list: list entry
  */
 struct vdpa_mgmt_dev {
 	struct device *device;
 	const struct vdpa_mgmtdev_ops *ops;
 	const struct virtio_device_id *id_table;
+	u64 config_attr_mask;
 	struct list_head list;
 };
 
-- 
cgit v1.2.3


From 7303524e04af49a47991e19f895c3b8cdc3796c7 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Fri, 29 Oct 2021 22:12:14 +0800
Subject: skmsg: Lose offset info in sk_psock_skb_ingress

If sockmap enable strparser, there are lose offset info in
sk_psock_skb_ingress(). If the length determined by parse_msg function is not
skb->len, the skb will be converted to sk_msg multiple times, and userspace
app will get the data multiple times.

Fix this by get the offset and length from strp_msg. And as Cong suggested,
add one bit in skb->_sk_redir to distinguish enable or disable strparser.

Fixes: 604326b41a6fb ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Cong Wang <cong.wang@bytedance.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20211029141216.211899-1-liujian56@huawei.com
---
 include/linux/skmsg.h | 18 ++++++++++++++++--
 net/core/skmsg.c      | 43 +++++++++++++++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 14ab0c0bc924..94e2a1f6e58d 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -508,8 +508,22 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 
 #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 
-/* We only have one bit so far. */
-#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
+#define BPF_F_STRPARSER	(1UL << 1)
+
+/* We only have two bits so far. */
+#define BPF_F_PTR_MASK ~(BPF_F_INGRESS | BPF_F_STRPARSER)
+
+static inline bool skb_bpf_strparser(const struct sk_buff *skb)
+{
+	unsigned long sk_redir = skb->_sk_redir;
+
+	return sk_redir & BPF_F_STRPARSER;
+}
+
+static inline void skb_bpf_set_strparser(struct sk_buff *skb)
+{
+	skb->_sk_redir |= BPF_F_STRPARSER;
+}
 
 static inline bool skb_bpf_ingress(const struct sk_buff *skb)
 {
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 2d6249b28928..9701a1404ccb 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -494,6 +494,7 @@ static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
 }
 
 static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
+					u32 off, u32 len,
 					struct sk_psock *psock,
 					struct sock *sk,
 					struct sk_msg *msg)
@@ -507,11 +508,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	 */
 	if (skb_linearize(skb))
 		return -EAGAIN;
-	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, off, len);
 	if (unlikely(num_sge < 0))
 		return num_sge;
 
-	copied = skb->len;
+	copied = len;
 	msg->sg.start = 0;
 	msg->sg.size = copied;
 	msg->sg.end = num_sge;
@@ -522,9 +523,11 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb,
 	return copied;
 }
 
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb);
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len);
 
-static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb,
+				u32 off, u32 len)
 {
 	struct sock *sk = psock->sk;
 	struct sk_msg *msg;
@@ -535,7 +538,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * correctly.
 	 */
 	if (unlikely(skb->sk == sk))
-		return sk_psock_skb_ingress_self(psock, skb);
+		return sk_psock_skb_ingress_self(psock, skb, off, len);
 	msg = sk_psock_create_ingress_msg(sk, skb);
 	if (!msg)
 		return -EAGAIN;
@@ -547,7 +550,7 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
 	 * into user buffers.
 	 */
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -557,7 +560,8 @@ static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
  * skb. In this case we do not need to check memory limits or skb_set_owner_r
  * because the skb is already accounted for here.
  */
-static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb)
+static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb,
+				     u32 off, u32 len)
 {
 	struct sk_msg *msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 	struct sock *sk = psock->sk;
@@ -567,7 +571,7 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
 		return -EAGAIN;
 	sk_msg_init(msg);
 	skb_set_owner_r(skb, sk);
-	err = sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
+	err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg);
 	if (err < 0)
 		kfree(msg);
 	return err;
@@ -581,7 +585,7 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
 			return -EAGAIN;
 		return skb_send_sock(psock->sk, skb, off, len);
 	}
-	return sk_psock_skb_ingress(psock, skb);
+	return sk_psock_skb_ingress(psock, skb, off, len);
 }
 
 static void sk_psock_skb_state(struct sk_psock *psock,
@@ -624,6 +628,12 @@ static void sk_psock_backlog(struct work_struct *work)
 	while ((skb = skb_dequeue(&psock->ingress_skb))) {
 		len = skb->len;
 		off = 0;
+		if (skb_bpf_strparser(skb)) {
+			struct strp_msg *stm = strp_msg(skb);
+
+			off = stm->offset;
+			len = stm->full_len;
+		}
 start:
 		ingress = skb_bpf_ingress(skb);
 		skb_bpf_redirect_clear(skb);
@@ -863,6 +873,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
 	 * return code, but then didn't set a redirect interface.
 	 */
 	if (unlikely(!sk_other)) {
+		skb_bpf_redirect_clear(skb);
 		sock_drop(from->sk, skb);
 		return -EIO;
 	}
@@ -930,6 +941,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 {
 	struct sock *sk_other;
 	int err = 0;
+	u32 len, off;
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -937,6 +949,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		sk_other = psock->sk;
 		if (sock_flag(sk_other, SOCK_DEAD) ||
 		    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+			skb_bpf_redirect_clear(skb);
 			goto out_free;
 		}
 
@@ -949,7 +962,15 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
 		 * retrying later from workqueue.
 		 */
 		if (skb_queue_empty(&psock->ingress_skb)) {
-			err = sk_psock_skb_ingress_self(psock, skb);
+			len = skb->len;
+			off = 0;
+			if (skb_bpf_strparser(skb)) {
+				struct strp_msg *stm = strp_msg(skb);
+
+				off = stm->offset;
+				len = stm->full_len;
+			}
+			err = sk_psock_skb_ingress_self(psock, skb, off, len);
 		}
 		if (err < 0) {
 			spin_lock_bh(&psock->ingress_lock);
@@ -1015,6 +1036,8 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
 		skb_dst_drop(skb);
 		skb_bpf_redirect_clear(skb);
 		ret = bpf_prog_run_pin_on_cpu(prog, skb);
+		if (ret == SK_PASS)
+			skb_bpf_set_strparser(skb);
 		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
 		skb->sk = NULL;
 	}
-- 
cgit v1.2.3


From 588e5d8766486e52ee332a4bb097b016a355b465 Mon Sep 17 00:00:00 2001
From: He Fengqing <hefengqing@huawei.com>
Date: Fri, 29 Oct 2021 02:39:06 +0000
Subject: cgroup: bpf: Move wrapper for __cgroup_bpf_*() to kernel/bpf/cgroup.c

In commit 324bda9e6c5a("bpf: multi program support for cgroup+bpf")
cgroup_bpf_*() called from kernel/bpf/syscall.c, but now they are only
used in kernel/bpf/cgroup.c, so move these function to
kernel/bpf/cgroup.c, like cgroup_bpf_replace().

Signed-off-by: He Fengqing <hefengqing@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/bpf-cgroup.h | 20 -----------------
 kernel/bpf/cgroup.c        | 54 ++++++++++++++++++++++++++++++++++++++--------
 kernel/cgroup/cgroup.c     | 38 --------------------------------
 3 files changed, 45 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 2746fd804216..9aad4e3ca29b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -157,26 +157,6 @@ struct cgroup_bpf {
 int cgroup_bpf_inherit(struct cgroup *cgrp);
 void cgroup_bpf_offline(struct cgroup *cgrp);
 
-int __cgroup_bpf_attach(struct cgroup *cgrp,
-			struct bpf_prog *prog, struct bpf_prog *replace_prog,
-			struct bpf_cgroup_link *link,
-			enum bpf_attach_type type, u32 flags);
-int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			struct bpf_cgroup_link *link,
-			enum bpf_attach_type type);
-int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-		       union bpf_attr __user *uattr);
-
-/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
-int cgroup_bpf_attach(struct cgroup *cgrp,
-		      struct bpf_prog *prog, struct bpf_prog *replace_prog,
-		      struct bpf_cgroup_link *link, enum bpf_attach_type type,
-		      u32 flags);
-int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type);
-int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-		     union bpf_attr __user *uattr);
-
 int __cgroup_bpf_run_filter_skb(struct sock *sk,
 				struct sk_buff *skb,
 				enum cgroup_bpf_attach_type atype);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 03145d45e3d5..2ca643af9a54 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -430,10 +430,10 @@ static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
  * Exactly one of @prog or @link can be non-null.
  * Must be called with cgroup_mutex held.
  */
-int __cgroup_bpf_attach(struct cgroup *cgrp,
-			struct bpf_prog *prog, struct bpf_prog *replace_prog,
-			struct bpf_cgroup_link *link,
-			enum bpf_attach_type type, u32 flags)
+static int __cgroup_bpf_attach(struct cgroup *cgrp,
+			       struct bpf_prog *prog, struct bpf_prog *replace_prog,
+			       struct bpf_cgroup_link *link,
+			       enum bpf_attach_type type, u32 flags)
 {
 	u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
 	struct bpf_prog *old_prog = NULL;
@@ -523,6 +523,20 @@ cleanup:
 	return err;
 }
 
+static int cgroup_bpf_attach(struct cgroup *cgrp,
+			     struct bpf_prog *prog, struct bpf_prog *replace_prog,
+			     struct bpf_cgroup_link *link,
+			     enum bpf_attach_type type,
+			     u32 flags)
+{
+	int ret;
+
+	mutex_lock(&cgroup_mutex);
+	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
+	mutex_unlock(&cgroup_mutex);
+	return ret;
+}
+
 /* Swap updated BPF program for given link in effective program arrays across
  * all descendant cgroups. This function is guaranteed to succeed.
  */
@@ -672,14 +686,14 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
  *                         propagate the change to descendants
  * @cgrp: The cgroup which descendants to traverse
  * @prog: A program to detach or NULL
- * @prog: A link to detach or NULL
+ * @link: A link to detach or NULL
  * @type: Type of detach operation
  *
  * At most one of @prog or @link can be non-NULL.
  * Must be called with cgroup_mutex held.
  */
-int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-			struct bpf_cgroup_link *link, enum bpf_attach_type type)
+static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+			       struct bpf_cgroup_link *link, enum bpf_attach_type type)
 {
 	enum cgroup_bpf_attach_type atype;
 	struct bpf_prog *old_prog;
@@ -730,9 +744,20 @@ cleanup:
 	return err;
 }
 
+static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+			     enum bpf_attach_type type)
+{
+	int ret;
+
+	mutex_lock(&cgroup_mutex);
+	ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
+	mutex_unlock(&cgroup_mutex);
+	return ret;
+}
+
 /* Must be called with cgroup_mutex held to avoid races. */
-int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-		       union bpf_attr __user *uattr)
+static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+			      union bpf_attr __user *uattr)
 {
 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
 	enum bpf_attach_type type = attr->query.attach_type;
@@ -789,6 +814,17 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	return ret;
 }
 
+static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+			    union bpf_attr __user *uattr)
+{
+	int ret;
+
+	mutex_lock(&cgroup_mutex);
+	ret = __cgroup_bpf_query(cgrp, attr, uattr);
+	mutex_unlock(&cgroup_mutex);
+	return ret;
+}
+
 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 			   enum bpf_prog_type ptype, struct bpf_prog *prog)
 {
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 003204c85893..c73f634c8328 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6676,44 +6676,6 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
 
 #endif	/* CONFIG_SOCK_CGROUP_DATA */
 
-#ifdef CONFIG_CGROUP_BPF
-int cgroup_bpf_attach(struct cgroup *cgrp,
-		      struct bpf_prog *prog, struct bpf_prog *replace_prog,
-		      struct bpf_cgroup_link *link,
-		      enum bpf_attach_type type,
-		      u32 flags)
-{
-	int ret;
-
-	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
-	mutex_unlock(&cgroup_mutex);
-	return ret;
-}
-
-int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
-		      enum bpf_attach_type type)
-{
-	int ret;
-
-	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
-	mutex_unlock(&cgroup_mutex);
-	return ret;
-}
-
-int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
-		     union bpf_attr __user *uattr)
-{
-	int ret;
-
-	mutex_lock(&cgroup_mutex);
-	ret = __cgroup_bpf_query(cgrp, attr, uattr);
-	mutex_unlock(&cgroup_mutex);
-	return ret;
-}
-#endif /* CONFIG_CGROUP_BPF */
-
 #ifdef CONFIG_SYSFS
 static ssize_t show_delegatable_files(struct cftype *files, char *buf,
 				      ssize_t size, const char *prefix)
-- 
cgit v1.2.3


From ebe4560ed5c8cbfe3759f16c23ca5a6df090c6b5 Mon Sep 17 00:00:00 2001
From: Oscar Carter <oscar.carter@gmx.com>
Date: Sat, 30 May 2020 11:08:39 +0200
Subject: firewire: Remove function callback casts

In 1394 OHCI specification, Isochronous Receive DMA context has several
modes. One of mode is 'BufferFill' and Linux FireWire stack uses it to
receive isochronous packets for multiple isochronous channel as
FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL.

The mode is not used by in-kernel driver, while it's available for
userspace. The character device driver in firewire-core includes
cast of function callback for the mode since the type of callback
function is different from the other modes. The case is inconvenient
to effort of Control Flow Integrity builds due to
-Wcast-function-type warning.

This commit removes the cast. A static helper function is newly added
to initialize isochronous context for the mode. The helper function
arranges isochronous context to assign specific callback function
after call of existent kernel API. It's noticeable that the number of
isochronous channel, speed, and the size of header are not required for
the mode. The helper function is used for the mode by character device
driver instead of direct call of existent kernel API.

The same goal can be achieved (in the ioctl_create_iso_context function)
without this helper function as follows:
- Call the fw_iso_context_create function passing NULL to the callback
  parameter.
- Then setting the context->callback.sc or context->callback.mc
  variables based on the a->type value.

However using the helper function created in this patch makes code more
clear and declarative. This way avoid the call to a function with one
purpose to achieved another one.

Co-developed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Signed-off-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Co-developed-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Oscar Carter <oscar.carter@gmx.com>
Reviewed-by: Takashi Sakamoto <o-takashi@sakamocchi.jp>
Testeb-by: Takashi Sakamoto<o-takashi@sakamocchi.jp>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
---
 drivers/firewire/core-cdev.c | 32 ++++++++++++++++++++++++++------
 include/linux/firewire.h     | 11 +++++++----
 2 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index fb6c651214f3..9f89c17730b1 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -10,6 +10,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/firewire.h>
 #include <linux/firewire-cdev.h>
@@ -953,11 +954,25 @@ static enum dma_data_direction iso_dma_direction(struct fw_iso_context *context)
 			return DMA_FROM_DEVICE;
 }
 
+static struct fw_iso_context *fw_iso_mc_context_create(struct fw_card *card,
+						fw_iso_mc_callback_t callback,
+						void *callback_data)
+{
+	struct fw_iso_context *ctx;
+
+	ctx = fw_iso_context_create(card, FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL,
+				    0, 0, 0, NULL, callback_data);
+	if (!IS_ERR(ctx))
+		ctx->callback.mc = callback;
+
+	return ctx;
+}
+
 static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 {
 	struct fw_cdev_create_iso_context *a = &arg->create_iso_context;
 	struct fw_iso_context *context;
-	fw_iso_callback_t cb;
+	union fw_iso_callback cb;
 	int ret;
 
 	BUILD_BUG_ON(FW_CDEV_ISO_CONTEXT_TRANSMIT != FW_ISO_CONTEXT_TRANSMIT ||
@@ -970,7 +985,7 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 		if (a->speed > SCODE_3200 || a->channel > 63)
 			return -EINVAL;
 
-		cb = iso_callback;
+		cb.sc = iso_callback;
 		break;
 
 	case FW_ISO_CONTEXT_RECEIVE:
@@ -978,19 +993,24 @@ static int ioctl_create_iso_context(struct client *client, union ioctl_arg *arg)
 		    a->channel > 63)
 			return -EINVAL;
 
-		cb = iso_callback;
+		cb.sc = iso_callback;
 		break;
 
 	case FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL:
-		cb = (fw_iso_callback_t)iso_mc_callback;
+		cb.mc = iso_mc_callback;
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
-	context = fw_iso_context_create(client->device->card, a->type,
-			a->channel, a->speed, a->header_size, cb, client);
+	if (a->type == FW_ISO_CONTEXT_RECEIVE_MULTICHANNEL)
+		context = fw_iso_mc_context_create(client->device->card, cb.mc,
+						   client);
+	else
+		context = fw_iso_context_create(client->device->card, a->type,
+						a->channel, a->speed,
+						a->header_size, cb.sc, client);
 	if (IS_ERR(context))
 		return PTR_ERR(context);
 	if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW)
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index aec8f30ab200..07967a450eaa 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -436,6 +436,12 @@ typedef void (*fw_iso_callback_t)(struct fw_iso_context *context,
 				  void *header, void *data);
 typedef void (*fw_iso_mc_callback_t)(struct fw_iso_context *context,
 				     dma_addr_t completed, void *data);
+
+union fw_iso_callback {
+	fw_iso_callback_t sc;
+	fw_iso_mc_callback_t mc;
+};
+
 struct fw_iso_context {
 	struct fw_card *card;
 	int type;
@@ -443,10 +449,7 @@ struct fw_iso_context {
 	int speed;
 	bool drop_overflow_headers;
 	size_t header_size;
-	union {
-		fw_iso_callback_t sc;
-		fw_iso_mc_callback_t mc;
-	} callback;
+	union fw_iso_callback callback;
 	void *callback_data;
 };
 
-- 
cgit v1.2.3


From 74128d801b5165650ae6222e8cf23780fbc55e67 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 23 Sep 2021 01:09:45 +0200
Subject: watchdog: ux500_wdt: Drop platform data

Drop the platform data passing from the PRCMU driver. This platform
data was part of the ambition to support more SoCs, which in turn
were never mass produced.

Only a name remains of the MFD cell so switch to MFD_CELL_NAME().

Cc: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20210922230947.1864357-1-linus.walleij@linaro.org
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/mfd/db8500-prcmu.c              | 13 +------------
 drivers/watchdog/ux500_wdt.c            | 13 ++-----------
 include/linux/platform_data/ux500_wdt.h | 18 ------------------
 3 files changed, 3 insertions(+), 41 deletions(-)
 delete mode 100644 include/linux/platform_data/ux500_wdt.h

(limited to 'include/linux')

diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c
index c1d3e7c116cf..ccf6be922b39 100644
--- a/drivers/mfd/db8500-prcmu.c
+++ b/drivers/mfd/db8500-prcmu.c
@@ -36,7 +36,6 @@
 #include <linux/mfd/abx500/ab8500.h>
 #include <linux/regulator/db8500-prcmu.h>
 #include <linux/regulator/machine.h>
-#include <linux/platform_data/ux500_wdt.h>
 #include "db8500-prcmu-regs.h"
 
 /* Index of different voltages to be used when accessing AVSData */
@@ -2939,18 +2938,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = {
 	},
 };
 
-static struct ux500_wdt_data db8500_wdt_pdata = {
-	.timeout = 600, /* 10 minutes */
-	.has_28_bits_resolution = true,
-};
-
 static const struct mfd_cell common_prcmu_devs[] = {
-	{
-		.name = "ux500_wdt",
-		.platform_data = &db8500_wdt_pdata,
-		.pdata_size = sizeof(db8500_wdt_pdata),
-		.id = -1,
-	},
+	MFD_CELL_NAME("ux500_wdt"),
 	MFD_CELL_NAME("db8500-cpuidle"),
 };
 
diff --git a/drivers/watchdog/ux500_wdt.c b/drivers/watchdog/ux500_wdt.c
index 072758106865..40f8cf1cb234 100644
--- a/drivers/watchdog/ux500_wdt.c
+++ b/drivers/watchdog/ux500_wdt.c
@@ -15,7 +15,6 @@
 #include <linux/uaccess.h>
 #include <linux/watchdog.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/ux500_wdt.h>
 
 #include <linux/mfd/dbx500-prcmu.h>
 
@@ -23,7 +22,6 @@
 
 #define WATCHDOG_MIN	0
 #define WATCHDOG_MAX28	268435  /* 28 bit resolution in ms == 268435.455 s */
-#define WATCHDOG_MAX32	4294967 /* 32 bit resolution in ms == 4294967.295 s */
 
 static unsigned int timeout = WATCHDOG_TIMEOUT;
 module_param(timeout, uint, 0);
@@ -80,22 +78,15 @@ static struct watchdog_device ux500_wdt = {
 	.info = &ux500_wdt_info,
 	.ops = &ux500_wdt_ops,
 	.min_timeout = WATCHDOG_MIN,
-	.max_timeout = WATCHDOG_MAX32,
+	.max_timeout = WATCHDOG_MAX28,
 };
 
 static int ux500_wdt_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	int ret;
-	struct ux500_wdt_data *pdata = dev_get_platdata(dev);
-
-	if (pdata) {
-		if (pdata->timeout > 0)
-			timeout = pdata->timeout;
-		if (pdata->has_28_bits_resolution)
-			ux500_wdt.max_timeout = WATCHDOG_MAX28;
-	}
 
+	timeout = 600; /* Default to 10 minutes */
 	ux500_wdt.parent = dev;
 	watchdog_set_nowayout(&ux500_wdt, nowayout);
 
diff --git a/include/linux/platform_data/ux500_wdt.h b/include/linux/platform_data/ux500_wdt.h
deleted file mode 100644
index de6a4ad41e76..000000000000
--- a/include/linux/platform_data/ux500_wdt.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST Ericsson SA 2011
- *
- * STE Ux500 Watchdog platform data
- */
-#ifndef __UX500_WDT_H
-#define __UX500_WDT_H
-
-/**
- * struct ux500_wdt_data
- */
-struct ux500_wdt_data {
-	unsigned int timeout;
-	bool has_28_bits_resolution;
-};
-
-#endif /* __UX500_WDT_H */
-- 
cgit v1.2.3


From 31a645aea4f8da5bb190ce322c6e5aacaef13855 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 25 Oct 2021 14:40:22 +0800
Subject: bpf: Factor out a helper to prepare trampoline for struct_ops prog

Factor out a helper bpf_struct_ops_prepare_trampoline() to prepare
trampoline for BPF_PROG_TYPE_STRUCT_OPS prog. It will be used by
.test_run callback in following patch.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20211025064025.2567443-2-houtao1@huawei.com
---
 include/linux/bpf.h         |  4 ++++
 kernel/bpf/bpf_struct_ops.c | 29 +++++++++++++++++++----------
 2 files changed, 23 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6deebf8bf78f..aabd3540aaaf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1000,6 +1000,10 @@ bool bpf_struct_ops_get(const void *kdata);
 void bpf_struct_ops_put(const void *kdata);
 int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
 				       void *value);
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs,
+				      struct bpf_prog *prog,
+				      const struct btf_func_model *model,
+				      void *image, void *image_end);
 static inline bool bpf_try_module_get(const void *data, struct module *owner)
 {
 	if (owner == BPF_MODULE_OWNER)
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 9abcc33f02cf..44be101f2562 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -312,6 +312,20 @@ static int check_zero_holes(const struct btf_type *t, void *data)
 	return 0;
 }
 
+int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs,
+				      struct bpf_prog *prog,
+				      const struct btf_func_model *model,
+				      void *image, void *image_end)
+{
+	u32 flags;
+
+	tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
+	tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
+	flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
+	return arch_prepare_bpf_trampoline(NULL, image, image_end,
+					   model, flags, tprogs, NULL);
+}
+
 static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 					  void *value, u64 flags)
 {
@@ -323,7 +337,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	struct bpf_tramp_progs *tprogs = NULL;
 	void *udata, *kdata;
 	int prog_fd, err = 0;
-	void *image;
+	void *image, *image_end;
 	u32 i;
 
 	if (flags)
@@ -363,12 +377,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	udata = &uvalue->data;
 	kdata = &kvalue->data;
 	image = st_map->image;
+	image_end = st_map->image + PAGE_SIZE;
 
 	for_each_member(i, t, member) {
 		const struct btf_type *mtype, *ptype;
 		struct bpf_prog *prog;
 		u32 moff;
-		u32 flags;
 
 		moff = btf_member_bit_offset(t, member) / 8;
 		ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
@@ -430,14 +444,9 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 			goto reset_unlock;
 		}
 
-		tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
-		tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
-		flags = st_ops->func_models[i].ret_size > 0 ?
-			BPF_TRAMP_F_RET_FENTRY_RET : 0;
-		err = arch_prepare_bpf_trampoline(NULL, image,
-						  st_map->image + PAGE_SIZE,
-						  &st_ops->func_models[i],
-						  flags, tprogs, NULL);
+		err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+							&st_ops->func_models[i],
+							image, image_end);
 		if (err < 0)
 			goto reset_unlock;
 
-- 
cgit v1.2.3


From 35346ab64132d0f5919b06932d708c0d10360553 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 25 Oct 2021 14:40:23 +0800
Subject: bpf: Factor out helpers for ctx access checking

Factor out two helpers to check the read access of ctx for raw tp
and BTF function. bpf_tracing_ctx_access() is used to check
the read access to argument is valid, and bpf_tracing_btf_ctx_access()
checks whether the btf type of argument is valid besides the checking
of argument read. bpf_tracing_btf_ctx_access() will be used by the
following patch.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20211025064025.2567443-3-houtao1@huawei.com
---
 include/linux/bpf.h      | 23 +++++++++++++++++++++++
 kernel/trace/bpf_trace.c | 16 ++--------------
 net/ipv4/bpf_tcp_ca.c    |  9 +--------
 3 files changed, 26 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index aabd3540aaaf..67f71e7def56 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1650,6 +1650,29 @@ bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner);
 bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		    const struct bpf_prog *prog,
 		    struct bpf_insn_access_aux *info);
+
+static inline bool bpf_tracing_ctx_access(int off, int size,
+					  enum bpf_access_type type)
+{
+	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
+		return false;
+	if (type != BPF_READ)
+		return false;
+	if (off % size != 0)
+		return false;
+	return true;
+}
+
+static inline bool bpf_tracing_btf_ctx_access(int off, int size,
+					      enum bpf_access_type type,
+					      const struct bpf_prog *prog,
+					      struct bpf_insn_access_aux *info)
+{
+	if (!bpf_tracing_ctx_access(off, size, type))
+		return false;
+	return btf_ctx_access(off, size, type, prog, info);
+}
+
 int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
 		      const struct btf_type *t, int off, int size,
 		      enum bpf_access_type atype,
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index cbcd0d6fca7c..7396488793ff 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1646,13 +1646,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size,
 					const struct bpf_prog *prog,
 					struct bpf_insn_access_aux *info)
 {
-	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
-		return false;
-	if (type != BPF_READ)
-		return false;
-	if (off % size != 0)
-		return false;
-	return true;
+	return bpf_tracing_ctx_access(off, size, type);
 }
 
 static bool tracing_prog_is_valid_access(int off, int size,
@@ -1660,13 +1654,7 @@ static bool tracing_prog_is_valid_access(int off, int size,
 					 const struct bpf_prog *prog,
 					 struct bpf_insn_access_aux *info)
 {
-	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
-		return false;
-	if (type != BPF_READ)
-		return false;
-	if (off % size != 0)
-		return false;
-	return btf_ctx_access(off, size, type, prog, info);
+	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
 }
 
 int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog,
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 57709ac09fb2..2cf02b4d77fb 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -81,14 +81,7 @@ static bool bpf_tcp_ca_is_valid_access(int off, int size,
 				       const struct bpf_prog *prog,
 				       struct bpf_insn_access_aux *info)
 {
-	if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
-		return false;
-	if (type != BPF_READ)
-		return false;
-	if (off % size != 0)
-		return false;
-
-	if (!btf_ctx_access(off, size, type, prog, info))
+	if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info))
 		return false;
 
 	if (info->reg_type == PTR_TO_BTF_ID && info->btf_id == sock_id)
-- 
cgit v1.2.3


From c196906d50e360d82ed9aa5596a9d0ce89b7ab78 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 25 Oct 2021 14:40:24 +0800
Subject: bpf: Add dummy BPF STRUCT_OPS for test purpose

Currently the test of BPF STRUCT_OPS depends on the specific bpf
implementation of tcp_congestion_ops, but it can not cover all
basic functionalities (e.g, return value handling), so introduce
a dummy BPF STRUCT_OPS for test purpose.

Loading a bpf_dummy_ops implementation from userspace is prohibited,
and its only purpose is to run BPF_PROG_TYPE_STRUCT_OPS program
through bpf(BPF_PROG_TEST_RUN). Now programs for test_1() & test_2()
are supported. The following three cases are exercised in
bpf_dummy_struct_ops_test_run():

(1) test and check the value returned from state arg in test_1(state)
The content of state is copied from userspace pointer and copied back
after calling test_1(state). The user pointer is saved in an u64 array
and the array address is passed through ctx_in.

(2) test and check the return value of test_1(NULL)
Just simulate the case in which an invalid input argument is passed in.

(3) test multiple arguments passing in test_2(state, ...)
5 arguments are passed through ctx_in in form of u64 array. The first
element of array is userspace pointer of state and others 4 arguments
follow.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20211025064025.2567443-4-houtao1@huawei.com
---
 include/linux/bpf.h               |  16 +++
 kernel/bpf/bpf_struct_ops.c       |   3 +
 kernel/bpf/bpf_struct_ops_types.h |   3 +
 net/bpf/Makefile                  |   3 +
 net/bpf/bpf_dummy_struct_ops.c    | 200 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 225 insertions(+)
 create mode 100644 net/bpf/bpf_dummy_struct_ops.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 67f71e7def56..c098089c1b54 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1018,6 +1018,22 @@ static inline void bpf_module_put(const void *data, struct module *owner)
 	else
 		module_put(owner);
 }
+
+#ifdef CONFIG_NET
+/* Define it here to avoid the use of forward declaration */
+struct bpf_dummy_ops_state {
+	int val;
+};
+
+struct bpf_dummy_ops {
+	int (*test_1)(struct bpf_dummy_ops_state *cb);
+	int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
+		      char a3, unsigned long a4);
+};
+
+int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
+			    union bpf_attr __user *uattr);
+#endif
 #else
 static inline const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id)
 {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 44be101f2562..8ecfe4752769 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -93,6 +93,9 @@ const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = {
 };
 
 const struct bpf_prog_ops bpf_struct_ops_prog_ops = {
+#ifdef CONFIG_NET
+	.test_run = bpf_struct_ops_test_run,
+#endif
 };
 
 static const struct btf_type *module_type;
diff --git a/kernel/bpf/bpf_struct_ops_types.h b/kernel/bpf/bpf_struct_ops_types.h
index 066d83ea1c99..5678a9ddf817 100644
--- a/kernel/bpf/bpf_struct_ops_types.h
+++ b/kernel/bpf/bpf_struct_ops_types.h
@@ -2,6 +2,9 @@
 /* internal file - do not include directly */
 
 #ifdef CONFIG_BPF_JIT
+#ifdef CONFIG_NET
+BPF_STRUCT_OPS_TYPE(bpf_dummy_ops)
+#endif
 #ifdef CONFIG_INET
 #include <net/tcp.h>
 BPF_STRUCT_OPS_TYPE(tcp_congestion_ops)
diff --git a/net/bpf/Makefile b/net/bpf/Makefile
index 1c0a98d8c28f..1ebe270bde23 100644
--- a/net/bpf/Makefile
+++ b/net/bpf/Makefile
@@ -1,2 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_BPF_SYSCALL)	:= test_run.o
+ifeq ($(CONFIG_BPF_JIT),y)
+obj-$(CONFIG_BPF_SYSCALL)	+= bpf_dummy_struct_ops.o
+endif
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
new file mode 100644
index 000000000000..fbc896323bec
--- /dev/null
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021. Huawei Technologies Co., Ltd
+ */
+#include <linux/kernel.h>
+#include <linux/bpf_verifier.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+
+extern struct bpf_struct_ops bpf_bpf_dummy_ops;
+
+/* A common type for test_N with return value in bpf_dummy_ops */
+typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
+
+struct bpf_dummy_ops_test_args {
+	u64 args[MAX_BPF_FUNC_ARGS];
+	struct bpf_dummy_ops_state state;
+};
+
+static struct bpf_dummy_ops_test_args *
+dummy_ops_init_args(const union bpf_attr *kattr, unsigned int nr)
+{
+	__u32 size_in;
+	struct bpf_dummy_ops_test_args *args;
+	void __user *ctx_in;
+	void __user *u_state;
+
+	size_in = kattr->test.ctx_size_in;
+	if (size_in != sizeof(u64) * nr)
+		return ERR_PTR(-EINVAL);
+
+	args = kzalloc(sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return ERR_PTR(-ENOMEM);
+
+	ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
+	if (copy_from_user(args->args, ctx_in, size_in))
+		goto out;
+
+	/* args[0] is 0 means state argument of test_N will be NULL */
+	u_state = u64_to_user_ptr(args->args[0]);
+	if (u_state && copy_from_user(&args->state, u_state,
+				      sizeof(args->state)))
+		goto out;
+
+	return args;
+out:
+	kfree(args);
+	return ERR_PTR(-EFAULT);
+}
+
+static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
+{
+	void __user *u_state;
+
+	u_state = u64_to_user_ptr(args->args[0]);
+	if (u_state && copy_to_user(u_state, &args->state, sizeof(args->state)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
+{
+	dummy_ops_test_ret_fn test = (void *)image;
+	struct bpf_dummy_ops_state *state = NULL;
+
+	/* state needs to be NULL if args[0] is 0 */
+	if (args->args[0])
+		state = &args->state;
+	return test(state, args->args[1], args->args[2],
+		    args->args[3], args->args[4]);
+}
+
+int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
+			    union bpf_attr __user *uattr)
+{
+	const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops;
+	const struct btf_type *func_proto;
+	struct bpf_dummy_ops_test_args *args;
+	struct bpf_tramp_progs *tprogs;
+	void *image = NULL;
+	unsigned int op_idx;
+	int prog_ret;
+	int err;
+
+	if (prog->aux->attach_btf_id != st_ops->type_id)
+		return -EOPNOTSUPP;
+
+	func_proto = prog->aux->attach_func_proto;
+	args = dummy_ops_init_args(kattr, btf_type_vlen(func_proto));
+	if (IS_ERR(args))
+		return PTR_ERR(args);
+
+	tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL);
+	if (!tprogs) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	image = bpf_jit_alloc_exec(PAGE_SIZE);
+	if (!image) {
+		err = -ENOMEM;
+		goto out;
+	}
+	set_vm_flush_reset_perms(image);
+
+	op_idx = prog->expected_attach_type;
+	err = bpf_struct_ops_prepare_trampoline(tprogs, prog,
+						&st_ops->func_models[op_idx],
+						image, image + PAGE_SIZE);
+	if (err < 0)
+		goto out;
+
+	set_memory_ro((long)image, 1);
+	set_memory_x((long)image, 1);
+	prog_ret = dummy_ops_call_op(image, args);
+
+	err = dummy_ops_copy_args(args);
+	if (err)
+		goto out;
+	if (put_user(prog_ret, &uattr->test.retval))
+		err = -EFAULT;
+out:
+	kfree(args);
+	bpf_jit_free_exec(image);
+	kfree(tprogs);
+	return err;
+}
+
+static int bpf_dummy_init(struct btf *btf)
+{
+	return 0;
+}
+
+static bool bpf_dummy_ops_is_valid_access(int off, int size,
+					  enum bpf_access_type type,
+					  const struct bpf_prog *prog,
+					  struct bpf_insn_access_aux *info)
+{
+	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
+					   const struct btf *btf,
+					   const struct btf_type *t, int off,
+					   int size, enum bpf_access_type atype,
+					   u32 *next_btf_id)
+{
+	const struct btf_type *state;
+	s32 type_id;
+	int err;
+
+	type_id = btf_find_by_name_kind(btf, "bpf_dummy_ops_state",
+					BTF_KIND_STRUCT);
+	if (type_id < 0)
+		return -EINVAL;
+
+	state = btf_type_by_id(btf, type_id);
+	if (t != state) {
+		bpf_log(log, "only access to bpf_dummy_ops_state is supported\n");
+		return -EACCES;
+	}
+
+	err = btf_struct_access(log, btf, t, off, size, atype, next_btf_id);
+	if (err < 0)
+		return err;
+
+	return atype == BPF_READ ? err : NOT_INIT;
+}
+
+static const struct bpf_verifier_ops bpf_dummy_verifier_ops = {
+	.is_valid_access = bpf_dummy_ops_is_valid_access,
+	.btf_struct_access = bpf_dummy_ops_btf_struct_access,
+};
+
+static int bpf_dummy_init_member(const struct btf_type *t,
+				 const struct btf_member *member,
+				 void *kdata, const void *udata)
+{
+	return -EOPNOTSUPP;
+}
+
+static int bpf_dummy_reg(void *kdata)
+{
+	return -EOPNOTSUPP;
+}
+
+static void bpf_dummy_unreg(void *kdata)
+{
+}
+
+struct bpf_struct_ops bpf_bpf_dummy_ops = {
+	.verifier_ops = &bpf_dummy_verifier_ops,
+	.init = bpf_dummy_init,
+	.init_member = bpf_dummy_init_member,
+	.reg = bpf_dummy_reg,
+	.unreg = bpf_dummy_unreg,
+	.name = "bpf_dummy_ops",
+};
-- 
cgit v1.2.3


From 8845b4681bf44b9d2d2badf2c67cf476e42a86bd Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannekoong@fb.com>
Date: Fri, 29 Oct 2021 15:49:08 -0700
Subject: bpf: Add alignment padding for "map_extra" + consolidate holes

This patch makes 2 changes regarding alignment padding
for the "map_extra" field.

1) In the kernel header, "map_extra" and "btf_value_type_id"
are rearranged to consolidate the hole.

Before:
struct bpf_map {
	...
        u32		max_entries;	/*    36     4	*/
        u32		map_flags;	/*    40     4	*/

        /* XXX 4 bytes hole, try to pack */

        u64		map_extra;	/*    48     8	*/
        int		spin_lock_off;	/*    56     4	*/
        int		timer_off;	/*    60     4	*/
        /* --- cacheline 1 boundary (64 bytes) --- */
        u32		id;		/*    64     4	*/
        int		numa_node;	/*    68     4	*/
	...
        bool		frozen;		/*   117     1	*/

        /* XXX 10 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
	...
        struct work_struct	work;	/*   144    72	*/

        /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */
	struct mutex	freeze_mutex;	/*   216   144 	*/

        /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */
        u64		writecnt; 	/*   360     8	*/

    /* size: 384, cachelines: 6, members: 26 */
    /* sum members: 354, holes: 2, sum holes: 14 */
    /* padding: 16 */
    /* forced alignments: 2, forced holes: 1, sum forced holes: 10 */

} __attribute__((__aligned__(64)));

After:
struct bpf_map {
	...
        u32		max_entries;	/*    36     4	*/
        u64		map_extra;	/*    40     8 	*/
        u32		map_flags;	/*    48     4	*/
        int		spin_lock_off;	/*    52     4	*/
        int		timer_off;	/*    56     4	*/
        u32		id;		/*    60     4	*/

        /* --- cacheline 1 boundary (64 bytes) --- */
        int		numa_node;	/*    64     4	*/
	...
	bool		frozen		/*   113     1  */

        /* XXX 14 bytes hole, try to pack */

        /* --- cacheline 2 boundary (128 bytes) --- */
	...
        struct work_struct	work;	/*   144    72	*/

        /* --- cacheline 3 boundary (192 bytes) was 24 bytes ago --- */
        struct mutex	freeze_mutex;	/*   216   144	*/

        /* --- cacheline 5 boundary (320 bytes) was 40 bytes ago --- */
        u64		writecnt;       /*   360     8	*/

    /* size: 384, cachelines: 6, members: 26 */
    /* sum members: 354, holes: 1, sum holes: 14 */
    /* padding: 16 */
    /* forced alignments: 2, forced holes: 1, sum forced holes: 14 */

} __attribute__((__aligned__(64)));

2) Add alignment padding to the bpf_map_info struct
More details can be found in commit 36f9814a494a ("bpf: fix uapi hole
for 32 bit compat applications")

Signed-off-by: Joanne Koong <joannekoong@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20211029224909.1721024-3-joannekoong@fb.com
---
 include/linux/bpf.h            | 6 +++---
 include/uapi/linux/bpf.h       | 1 +
 tools/include/uapi/linux/bpf.h | 1 +
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c098089c1b54..f6743d4bb531 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -168,23 +168,23 @@ struct bpf_map {
 	u32 key_size;
 	u32 value_size;
 	u32 max_entries;
-	u32 map_flags;
 	u64 map_extra; /* any per-map-type extra fields */
+	u32 map_flags;
 	int spin_lock_off; /* >=0 valid offset, <0 error */
 	int timer_off; /* >=0 valid offset, <0 error */
 	u32 id;
 	int numa_node;
 	u32 btf_key_type_id;
 	u32 btf_value_type_id;
+	u32 btf_vmlinux_value_type_id;
 	struct btf *btf;
 #ifdef CONFIG_MEMCG_KMEM
 	struct mem_cgroup *memcg;
 #endif
 	char name[BPF_OBJ_NAME_LEN];
-	u32 btf_vmlinux_value_type_id;
 	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
-	/* 22 bytes hole */
+	/* 14 bytes hole */
 
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bd0c9f0487f6..ba5af15e25f5 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5662,6 +5662,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u32 :32;	/* alignment pad */
 	__u64 map_extra;
 } __attribute__((aligned(8)));
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bd0c9f0487f6..ba5af15e25f5 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5662,6 +5662,7 @@ struct bpf_map_info {
 	__u32 btf_id;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
+	__u32 :32;	/* alignment pad */
 	__u64 map_extra;
 } __attribute__((aligned(8)));
 
-- 
cgit v1.2.3


From e66435936756d9bce96433be183358a8994a0f0d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 1 Nov 2021 14:56:37 -0700
Subject: mm: fix mismerge of folio page flag manipulators

I had missed a semantic conflict between commit d389a4a81155 ("mm: Add
folio flag manipulation functions") from the folio tree, and commit
eac96c3efdb5 ("mm: filemap: check if THP has hwpoisoned subpage for PMD
page fault") that added a new set of page flags.

My build tests had too many options enabled, which hid this issue.  But
if you didn't have MEMORY_FAILURE or TRANSPARENT_HUGEPAGE enabled, you'd
end up with build errors like this:

  include/linux/page-flags.h:806:29: error: macro "PAGEFLAG_FALSE" requires 2 arguments, but only 1 given
    806 | PAGEFLAG_FALSE(HasHWPoisoned)
        |                             ^

due to the missing lowercase name used for folio function naming.

Fixes: 49f8275c7d92 ("Merge tag 'folio-5.16' of git://git.infradead.org/users/willy/pagecache")
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Reported-by: Yang Shi <shy828301@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d8623d6e1141..981341a3c3c4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -803,8 +803,8 @@ PAGEFLAG_FALSE(DoubleMap, double_map)
 PAGEFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
 	TESTSCFLAG(HasHWPoisoned, has_hwpoisoned, PF_SECOND)
 #else
-PAGEFLAG_FALSE(HasHWPoisoned)
-	TESTSCFLAG_FALSE(HasHWPoisoned)
+PAGEFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
+	TESTSCFLAG_FALSE(HasHWPoisoned, has_hwpoisoned)
 #endif
 
 /*
-- 
cgit v1.2.3


From f1a456f8f3fc5828d8abcad941860380ae147b1d Mon Sep 17 00:00:00 2001
From: Talal Ahmad <talalahmad@google.com>
Date: Fri, 29 Oct 2021 22:05:42 -0400
Subject: net: avoid double accounting for pure zerocopy skbs

Track skbs with only zerocopy data and avoid charging them to kernel
memory to correctly account the memory utilization for msg_zerocopy.
All of the data in such skbs is held in user pages which are already
accounted to user. Before this change, they are charged again in
kernel in __zerocopy_sg_from_iter. The charging in kernel is
excessive because data is not being copied into skb frags. This
excessive charging can lead to kernel going into memory pressure
state which impacts all sockets in the system adversely. Mark pure
zerocopy skbs with a SKBFL_PURE_ZEROCOPY flag and remove
charge/uncharge for data in such skbs.

Initially, an skb is marked pure zerocopy when it is empty and in
zerocopy path. skb can then change from a pure zerocopy skb to mixed
data skb (zerocopy and copy data) if it is at tail of write queue and
there is room available in it and non-zerocopy data is being sent in
the next sendmsg call. At this time sk_mem_charge is done for the pure
zerocopied data and the pure zerocopy flag is unmarked. We found that
this happens very rarely on workloads that pass MSG_ZEROCOPY.

A pure zerocopy skb can later be coalesced into normal skb if they are
next to each other in queue but this patch prevents coalescing from
happening. This avoids complexity of charging when skb downgrades from
pure zerocopy to mixed. This is also rare.

In sk_wmem_free_skb, if it is a pure zerocopy skb, an sk_mem_uncharge
for SKB_TRUESIZE(MAX_TCP_HEADER) is done for sk_mem_charge in
tcp_skb_entail for an skb without data.

Testing with the msg_zerocopy.c benchmark between two hosts(100G nics)
with zerocopy showed that before this patch the 'sock' variable in
memory.stat for cgroup2 that tracks sum of sk_forward_alloc,
sk_rmem_alloc and sk_wmem_queued is around 1822720 and with this
change it is 0. This is due to no charge to sk_forward_alloc for
zerocopy data and shows memory utilization for kernel is lowered.

Signed-off-by: Talal Ahmad <talalahmad@google.com>
Acked-by: Arjun Roy <arjunroy@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 19 ++++++++++++++++++-
 include/net/tcp.h      |  8 ++++++--
 net/core/datagram.c    |  3 ++-
 net/core/skbuff.c      |  3 ++-
 net/ipv4/tcp.c         | 22 ++++++++++++++++++++--
 net/ipv4/tcp_output.c  |  7 +++++--
 6 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0bd6520329f6..10869906cc57 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -454,9 +454,15 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBFL_SHARED_FRAG = BIT(1),
+
+	/* segment contains only zerocopy data and should not be
+	 * charged to the kernel memory.
+	 */
+	SKBFL_PURE_ZEROCOPY = BIT(2),
 };
 
 #define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
+#define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
 
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1464,6 +1470,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 	return is_zcopy ? skb_uarg(skb) : NULL;
 }
 
+static inline bool skb_zcopy_pure(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
+}
+
+static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
+				       const struct sk_buff *skb2)
+{
+	return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
+}
+
 static inline void net_zcopy_get(struct ubuf_info *uarg)
 {
 	refcount_inc(&uarg->refcnt);
@@ -1528,7 +1545,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
 		if (!skb_zcopy_is_nouarg(skb))
 			uarg->callback(skb, uarg, zerocopy_success);
 
-		skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG;
+		skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70972f3ac8fa..af91f370432e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -293,7 +293,10 @@ static inline bool tcp_out_of_memory(struct sock *sk)
 static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
 	sk_wmem_queued_add(sk, -skb->truesize);
-	sk_mem_uncharge(sk, skb->truesize);
+	if (!skb_zcopy_pure(skb))
+		sk_mem_uncharge(sk, skb->truesize);
+	else
+		sk_mem_uncharge(sk, SKB_TRUESIZE(MAX_TCP_HEADER));
 	__kfree_skb(skb);
 }
 
@@ -974,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 					const struct sk_buff *from)
 {
 	return likely(tcp_skb_can_collapse_to(to) &&
-		      mptcp_skb_can_collapse(to, from));
+		      mptcp_skb_can_collapse(to, from) &&
+		      skb_pure_zcopy_same(to, from));
 }
 
 /* Events passed to congestion control interface */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..ee290776c661 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 		skb->truesize += truesize;
 		if (sk && sk->sk_type == SOCK_STREAM) {
 			sk_wmem_queued_add(sk, truesize);
-			sk_mem_charge(sk, truesize);
+			if (!skb_zcopy_pure(skb))
+				sk_mem_charge(sk, truesize);
 		} else {
 			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 67a9188d8a49..29e617d8d7fb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3433,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
+	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
 
-	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
 	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bc7f419184aa..2561c14a6e63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -863,6 +863,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 	if (likely(skb)) {
 		bool mem_scheduled;
 
+		skb->truesize = SKB_TRUESIZE(size + MAX_TCP_HEADER);
 		if (force_schedule) {
 			mem_scheduled = true;
 			sk_forced_mem_schedule(sk, skb->truesize);
@@ -1319,6 +1320,15 @@ new_segment:
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
+			/* skb changing from pure zc to mixed, must charge zc */
+			if (unlikely(skb_zcopy_pure(skb))) {
+				if (!sk_wmem_schedule(sk, skb->data_len))
+					goto wait_for_space;
+
+				sk_mem_charge(sk, skb->data_len);
+				skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+			}
+
 			if (!sk_wmem_schedule(sk, copy))
 				goto wait_for_space;
 
@@ -1339,8 +1349,16 @@ new_segment:
 			}
 			pfrag->offset += copy;
 		} else {
-			if (!sk_wmem_schedule(sk, copy))
-				goto wait_for_space;
+			/* First append to a fragless skb builds initial
+			 * pure zerocopy skb
+			 */
+			if (!skb->len)
+				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
+
+			if (!skb_zcopy_pure(skb)) {
+				if (!sk_wmem_schedule(sk, copy))
+					goto wait_for_space;
+			}
 
 			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
 			if (err == -EMSGSIZE || err == -EEXIST) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6fbbf1558033..287b57aadc37 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1677,7 +1677,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	if (delta_truesize) {
 		skb->truesize	   -= delta_truesize;
 		sk_wmem_queued_add(sk, -delta_truesize);
-		sk_mem_uncharge(sk, delta_truesize);
+		if (!skb_zcopy_pure(skb))
+			sk_mem_uncharge(sk, delta_truesize);
 	}
 
 	/* Any change of skb->len requires recalculation of tso factor. */
@@ -2295,7 +2296,9 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
 		if (len <= skb->len)
 			break;
 
-		if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
+		if (unlikely(TCP_SKB_CB(skb)->eor) ||
+		    tcp_has_tx_tstamp(skb) ||
+		    !skb_pure_zcopy_same(skb, next))
 			return false;
 
 		len -= skb->len;
-- 
cgit v1.2.3


From fcdb44d08a95003c3d040aecdee286156ec6f34e Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 1 Nov 2021 10:36:28 -0700
Subject: net: arp: introduce arp_evict_nocarrier sysctl parameter

This change introduces a new sysctl parameter, arp_evict_nocarrier.
When set (default) the ARP cache will be cleared on a NOCARRIER event.
This new option has been defaulted to '1' which maintains existing
behavior.

Clearing the ARP cache on NOCARRIER is relatively new, introduced by:

commit 859bd2ef1fc1110a8031b967ee656c53a6260a76
Author: David Ahern <dsahern@gmail.com>
Date:   Thu Oct 11 20:33:49 2018 -0700

    net: Evict neighbor entries on carrier down

The reason for this changes is to prevent the ARP cache from being
cleared when a wireless device roams. Specifically for wireless roams
the ARP cache should not be cleared because the underlying network has not
changed. Clearing the ARP cache in this case can introduce significant
delays sending out packets after a roam.

A user reported such a situation here:

https://lore.kernel.org/linux-wireless/CACsRnHWa47zpx3D1oDq9JYnZWniS8yBwW1h0WAVZ6vrbwL_S0w@mail.gmail.com/

After some investigation it was found that the kernel was holding onto
packets until ARP finished which resulted in this 1 second delay. It
was also found that the first ARP who-has was never responded to,
which is actually what caues the delay. This change is more or less
working around this behavior, but again, there is no reason to clear
the cache on a roam anyways.

As for the unanswered who-has, we know the packet made it OTA since
it was seen while monitoring. Why it never received a response is
unknown. In any case, since this is a problem on the AP side of things
all that can be done is to work around it until it is solved.

Some background on testing/reproducing the packet delay:

Hardware:
 - 2 access points configured for Fast BSS Transition (Though I don't
   see why regular reassociation wouldn't have the same behavior)
 - Wireless station running IWD as supplicant
 - A device on network able to respond to pings (I used one of the APs)

Procedure:
 - Connect to first AP
 - Ping once to establish an ARP entry
 - Start a tcpdump
 - Roam to second AP
 - Wait for operstate UP event, and note the timestamp
 - Start pinging

Results:

Below is the tcpdump after UP. It was recorded the interface went UP at
10:42:01.432875.

10:42:01.461871 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28
10:42:02.497976 ARP, Request who-has 192.168.254.1 tell 192.168.254.71, length 28
10:42:02.507162 ARP, Reply 192.168.254.1 is-at ac:86:74:55:b0:20, length 46
10:42:02.507185 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 1, length 64
10:42:02.507205 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 2, length 64
10:42:02.507212 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 3, length 64
10:42:02.507219 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 4, length 64
10:42:02.507225 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 5, length 64
10:42:02.507232 IP 192.168.254.71 > 192.168.254.1: ICMP echo request, id 52792, seq 6, length 64
10:42:02.515373 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 1, length 64
10:42:02.521399 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 2, length 64
10:42:02.521612 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 3, length 64
10:42:02.521941 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 4, length 64
10:42:02.522419 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 5, length 64
10:42:02.523085 IP 192.168.254.1 > 192.168.254.71: ICMP echo reply, id 52792, seq 6, length 64

You can see the first ARP who-has went out very quickly after UP, but
was never responded to. Nearly a second later the kernel retries and
gets a response. Only then do the ping packets go out. If an ARP entry
is manually added prior to UP (after the cache is cleared) it is seen
that the first ping is never responded to, so its not only an issue with
ARP but with data packets in general.

As mentioned prior, the wireless interface was also monitored to verify
the ping/ARP packet made it OTA which was observed to be true.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ip-sysctl.rst |  9 +++++++++
 include/linux/inetdevice.h             |  2 ++
 include/uapi/linux/ip.h                |  1 +
 include/uapi/linux/sysctl.h            |  1 +
 net/ipv4/arp.c                         | 11 ++++++++++-
 net/ipv4/devinet.c                     |  4 ++++
 6 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 16b8bf72feaf..18fde4ed7a5e 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -1611,6 +1611,15 @@ arp_accept - BOOLEAN
 	gratuitous arp frame, the arp table will be updated regardless
 	if this setting is on or off.
 
+arp_evict_nocarrier - BOOLEAN
+	Clears the ARP cache on NOCARRIER events. This option is important for
+	wireless devices where the ARP cache should not be cleared when roaming
+	between access points on the same network. In most cases this should
+	remain as the default (1).
+
+	- 1 - (default): Clear the ARP cache on NOCARRIER events
+	- 0 - Do not clear ARP cache on NOCARRIER events
+
 mcast_solicit - INTEGER
 	The maximum number of multicast probes in INCOMPLETE state,
 	when the associated hardware address is unknown.  Defaults
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index a038feb63f23..518b484a7f07 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -133,6 +133,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
 #define IN_DEV_ARP_IGNORE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
 #define IN_DEV_ARP_NOTIFY(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_NOTIFY)
+#define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \
+							  ARP_EVICT_NOCARRIER)
 
 struct in_ifaddr {
 	struct hlist_node	hash;
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index e42d13b55cf3..e00bbb9c47bb 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -169,6 +169,7 @@ enum
 	IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
 	IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
 	IPV4_DEVCONF_BC_FORWARDING,
+	IPV4_DEVCONF_ARP_EVICT_NOCARRIER,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 1e05d3caa712..6a3b194c50fe 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
+	NET_IPV4_CONF_ARP_EVICT_NOCARRIER=23,
 };
 
 /* /proc/sys/net/ipv4/netfilter */
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 922dd73e5740..857a144b1ea9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1247,6 +1247,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct netdev_notifier_change_info *change_info;
+	struct in_device *in_dev;
+	bool evict_nocarrier;
 
 	switch (event) {
 	case NETDEV_CHANGEADDR:
@@ -1257,7 +1259,14 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
 		change_info = ptr;
 		if (change_info->flags_changed & IFF_NOARP)
 			neigh_changeaddr(&arp_tbl, dev);
-		if (!netif_carrier_ok(dev))
+
+		in_dev = __in_dev_get_rtnl(dev);
+		if (!in_dev)
+			evict_nocarrier = true;
+		else
+			evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev);
+
+		if (evict_nocarrier && !netif_carrier_ok(dev))
 			neigh_carrier_down(&arp_tbl, dev);
 		break;
 	default:
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index f4468980b675..ec73a0d52d3e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -75,6 +75,7 @@ static struct ipv4_devconf ipv4_devconf = {
 		[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
+		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
 	},
 };
 
@@ -87,6 +88,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
 		[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
 		[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
 		[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] =  1000 /*ms*/,
+		[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
 	},
 };
 
@@ -2532,6 +2534,8 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+		DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
+					"arp_evict_nocarrier"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
 		DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
 					"force_igmp_version"),
-- 
cgit v1.2.3


From 18ac597af25e9760b76471524096f5b29eb820e6 Mon Sep 17 00:00:00 2001
From: James Prestwood <prestwoj@gmail.com>
Date: Mon, 1 Nov 2021 10:36:29 -0700
Subject: net: ndisc: introduce ndisc_evict_nocarrier sysctl parameter

In most situations the neighbor discovery cache should be cleared on a
NOCARRIER event which is currently done unconditionally. But for wireless
roams the neighbor discovery cache can and should remain intact since
the underlying network has not changed.

This patch introduces a sysctl option ndisc_evict_nocarrier which can
be disabled by a wireless supplicant during a roam. This allows packets
to be sent after a roam immediately without having to wait for
neighbor discovery.

A user reported roughly a 1 second delay after a roam before packets
could be sent out (note, on IPv4). This delay was due to the ARP
cache being cleared. During testing of this same scenario using IPv6
no delay was noticed, but regardless there is no reason to clear
the ndisc cache for wireless roams.

Signed-off-by: James Prestwood <prestwoj@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ip-sysctl.rst |  9 +++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 12 ++++++++++++
 net/ipv6/ndisc.c                       | 12 +++++++++++-
 5 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 18fde4ed7a5e..c61cc0219f4c 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2350,6 +2350,15 @@ ndisc_tclass - INTEGER
 
 	* 0 - (default)
 
+ndisc_evict_nocarrier - BOOLEAN
+	Clears the neighbor discovery table on NOCARRIER events. This option is
+	important for wireless devices where the neighbor discovery cache should
+	not be cleared when roaming between access points on the same network.
+	In most cases this should remain as the default (1).
+
+	- 1 - (default): Clear neighbor discover cache on NOCARRIER events.
+	- 0 - Do not clear neighbor discovery cache on NOCARRIER events.
+
 mldv1_unsolicited_report_interval - INTEGER
 	The interval in milliseconds in which the next unsolicited
 	MLDv1 report retransmit will take place.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c383630d3f06..20c1f968da7c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -79,6 +79,7 @@ struct ipv6_devconf {
 	__u32		ioam6_id;
 	__u32		ioam6_id_wide;
 	__u8		ioam6_enabled;
+	__u8		ndisc_evict_nocarrier;
 
 	struct ctl_table_header *sysctl_header;
 };
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index b243a53fa985..d4178dace0bf 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -193,6 +193,7 @@ enum {
 	DEVCONF_IOAM6_ENABLED,
 	DEVCONF_IOAM6_ID,
 	DEVCONF_IOAM6_ID_WIDE,
+	DEVCONF_NDISC_EVICT_NOCARRIER,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9e1463a2acae..3445f8017430 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.ioam6_enabled		= 0,
 	.ioam6_id               = IOAM6_DEFAULT_IF_ID,
 	.ioam6_id_wide		= IOAM6_DEFAULT_IF_ID_WIDE,
+	.ndisc_evict_nocarrier	= 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.ioam6_enabled		= 0,
 	.ioam6_id               = IOAM6_DEFAULT_IF_ID,
 	.ioam6_id_wide		= IOAM6_DEFAULT_IF_ID_WIDE,
+	.ndisc_evict_nocarrier	= 1,
 };
 
 /* Check if link is ready: is it up and is a valid qdisc available */
@@ -5545,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
 	array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
 	array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
+	array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -6986,6 +6989,15 @@ static const struct ctl_table addrconf_sysctl[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_douintvec,
 	},
+	{
+		.procname	= "ndisc_evict_nocarrier",
+		.data		= &ipv6_devconf.ndisc_evict_nocarrier,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1		= (void *)SYSCTL_ZERO,
+		.extra2		= (void *)SYSCTL_ONE,
+	},
 	{
 		/* sentinel */
 	}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 184190b9ea25..f03b597e4121 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 	struct netdev_notifier_change_info *change_info;
 	struct net *net = dev_net(dev);
 	struct inet6_dev *idev;
+	bool evict_nocarrier;
 
 	switch (event) {
 	case NETDEV_CHANGEADDR:
@@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
 		in6_dev_put(idev);
 		break;
 	case NETDEV_CHANGE:
+		idev = in6_dev_get(dev);
+		if (!idev)
+			evict_nocarrier = true;
+		else {
+			evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
+					  net->ipv6.devconf_all->ndisc_evict_nocarrier;
+			in6_dev_put(idev);
+		}
+
 		change_info = ptr;
 		if (change_info->flags_changed & IFF_NOARP)
 			neigh_changeaddr(&nd_tbl, dev);
-		if (!netif_carrier_ok(dev))
+		if (evict_nocarrier && !netif_carrier_ok(dev))
 			neigh_carrier_down(&nd_tbl, dev);
 		break;
 	case NETDEV_DOWN:
-- 
cgit v1.2.3


From 4a08e3271c55f8b5d56906a8aa5bd041911cf897 Mon Sep 17 00:00:00 2001
From: "Hector.Yuan" <hector.yuan@mediatek.com>
Date: Mon, 4 Oct 2021 22:42:33 +0800
Subject: cpufreq: Fix parameter in parse_perf_domain()

Pass cpu to parse_perf_domain() instead of pcpu.

Fixes: 8486a32dd484 ("cpufreq: Add of_perf_domain_get_sharing_cpumask")
Signed-off-by: Hector.Yuan <hector.yuan@mediatek.com>
[ Viresh: Massaged changelog ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 include/linux/cpufreq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index ff88bb3e44fc..66a1f495f01a 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -1041,7 +1041,7 @@ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_
 		if (cpu == pcpu)
 			continue;
 
-		ret = parse_perf_domain(pcpu, list_name, cell_name);
+		ret = parse_perf_domain(cpu, list_name, cell_name);
 		if (ret < 0)
 			continue;
 
-- 
cgit v1.2.3


From 84882cf72cd774cf16fd338bdbf00f69ac9f9194 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 1 Nov 2021 22:26:08 -0700
Subject: Revert "net: avoid double accounting for pure zerocopy skbs"

This reverts commit f1a456f8f3fc5828d8abcad941860380ae147b1d.

  WARNING: CPU: 1 PID: 6819 at net/core/skbuff.c:5429 skb_try_coalesce+0x78b/0x7e0
  CPU: 1 PID: 6819 Comm: xxxxxxx Kdump: loaded Tainted: G S                5.15.0-04194-gd852503f7711 #16
  RIP: 0010:skb_try_coalesce+0x78b/0x7e0
  Code: e8 2a bf 41 ff 44 8b b3 bc 00 00 00 48 8b 7c 24 30 e8 19 c0 41 ff 44 89 f0 48 03 83 c0 00 00 00 48 89 44 24 40 e9 47 fb ff ff <0f> 0b e9 ca fc ff ff 4c 8d 70 ff 48 83 c0 07 48 89 44 24 38 e9 61
  RSP: 0018:ffff88881f449688 EFLAGS: 00010282
  RAX: 00000000fffffe96 RBX: ffff8881566e4460 RCX: ffffffff82079f7e
  RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8881566e47b0
  RBP: ffff8881566e46e0 R08: ffffed102619235d R09: ffffed102619235d
  R10: ffff888130c91ae3 R11: ffffed102619235c R12: ffff88881f4498a0
  R13: 0000000000000056 R14: 0000000000000009 R15: ffff888130c91ac0
  FS:  00007fec2cbb9700(0000) GS:ffff88881f440000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007fec1b060d80 CR3: 00000003acf94005 CR4: 00000000003706e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   <IRQ>
   tcp_try_coalesce+0xeb/0x290
   ? tcp_parse_options+0x610/0x610
   ? mark_held_locks+0x79/0xa0
   tcp_queue_rcv+0x69/0x2f0
   tcp_rcv_established+0xa49/0xd40
   ? tcp_data_queue+0x18a0/0x18a0
   tcp_v6_do_rcv+0x1c9/0x880
   ? rt6_mtu_change_route+0x100/0x100
   tcp_v6_rcv+0x1624/0x1830

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 19 +------------------
 include/net/tcp.h      |  8 ++------
 net/core/datagram.c    |  3 +--
 net/core/skbuff.c      |  3 +--
 net/ipv4/tcp.c         | 22 ++--------------------
 net/ipv4/tcp_output.c  |  7 ++-----
 6 files changed, 9 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 10869906cc57..0bd6520329f6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -454,15 +454,9 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBFL_SHARED_FRAG = BIT(1),
-
-	/* segment contains only zerocopy data and should not be
-	 * charged to the kernel memory.
-	 */
-	SKBFL_PURE_ZEROCOPY = BIT(2),
 };
 
 #define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
-#define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
 
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1470,17 +1464,6 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 	return is_zcopy ? skb_uarg(skb) : NULL;
 }
 
-static inline bool skb_zcopy_pure(const struct sk_buff *skb)
-{
-	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
-}
-
-static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
-				       const struct sk_buff *skb2)
-{
-	return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
-}
-
 static inline void net_zcopy_get(struct ubuf_info *uarg)
 {
 	refcount_inc(&uarg->refcnt);
@@ -1545,7 +1528,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
 		if (!skb_zcopy_is_nouarg(skb))
 			uarg->callback(skb, uarg, zerocopy_success);
 
-		skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
+		skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index af91f370432e..70972f3ac8fa 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -293,10 +293,7 @@ static inline bool tcp_out_of_memory(struct sock *sk)
 static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
 	sk_wmem_queued_add(sk, -skb->truesize);
-	if (!skb_zcopy_pure(skb))
-		sk_mem_uncharge(sk, skb->truesize);
-	else
-		sk_mem_uncharge(sk, SKB_TRUESIZE(MAX_TCP_HEADER));
+	sk_mem_uncharge(sk, skb->truesize);
 	__kfree_skb(skb);
 }
 
@@ -977,8 +974,7 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 					const struct sk_buff *from)
 {
 	return likely(tcp_skb_can_collapse_to(to) &&
-		      mptcp_skb_can_collapse(to, from) &&
-		      skb_pure_zcopy_same(to, from));
+		      mptcp_skb_can_collapse(to, from));
 }
 
 /* Events passed to congestion control interface */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee290776c661..15ab9ffb27fe 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,8 +646,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 		skb->truesize += truesize;
 		if (sk && sk->sk_type == SOCK_STREAM) {
 			sk_wmem_queued_add(sk, truesize);
-			if (!skb_zcopy_pure(skb))
-				sk_mem_charge(sk, truesize);
+			sk_mem_charge(sk, truesize);
 		} else {
 			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 29e617d8d7fb..67a9188d8a49 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3433,9 +3433,8 @@ static inline void skb_split_no_header(struct sk_buff *skb,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
-	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
 
-	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
+	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
 	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2561c14a6e63..bc7f419184aa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -863,7 +863,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 	if (likely(skb)) {
 		bool mem_scheduled;
 
-		skb->truesize = SKB_TRUESIZE(size + MAX_TCP_HEADER);
 		if (force_schedule) {
 			mem_scheduled = true;
 			sk_forced_mem_schedule(sk, skb->truesize);
@@ -1320,15 +1319,6 @@ new_segment:
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			/* skb changing from pure zc to mixed, must charge zc */
-			if (unlikely(skb_zcopy_pure(skb))) {
-				if (!sk_wmem_schedule(sk, skb->data_len))
-					goto wait_for_space;
-
-				sk_mem_charge(sk, skb->data_len);
-				skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
-			}
-
 			if (!sk_wmem_schedule(sk, copy))
 				goto wait_for_space;
 
@@ -1349,16 +1339,8 @@ new_segment:
 			}
 			pfrag->offset += copy;
 		} else {
-			/* First append to a fragless skb builds initial
-			 * pure zerocopy skb
-			 */
-			if (!skb->len)
-				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
-
-			if (!skb_zcopy_pure(skb)) {
-				if (!sk_wmem_schedule(sk, copy))
-					goto wait_for_space;
-			}
+			if (!sk_wmem_schedule(sk, copy))
+				goto wait_for_space;
 
 			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
 			if (err == -EMSGSIZE || err == -EEXIST) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 287b57aadc37..6fbbf1558033 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1677,8 +1677,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	if (delta_truesize) {
 		skb->truesize	   -= delta_truesize;
 		sk_wmem_queued_add(sk, -delta_truesize);
-		if (!skb_zcopy_pure(skb))
-			sk_mem_uncharge(sk, delta_truesize);
+		sk_mem_uncharge(sk, delta_truesize);
 	}
 
 	/* Any change of skb->len requires recalculation of tso factor. */
@@ -2296,9 +2295,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
 		if (len <= skb->len)
 			break;
 
-		if (unlikely(TCP_SKB_CB(skb)->eor) ||
-		    tcp_has_tx_tstamp(skb) ||
-		    !skb_pure_zcopy_same(skb, next))
+		if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
 			return false;
 
 		len -= skb->len;
-- 
cgit v1.2.3


From ca7752caeaa70bd31d1714af566c9809688544af Mon Sep 17 00:00:00 2001
From: Michael Pratt <mpratt@google.com>
Date: Mon, 1 Nov 2021 17:06:15 -0400
Subject: posix-cpu-timers: Clear task::posix_cputimers_work in copy_process()

copy_process currently copies task_struct.posix_cputimers_work as-is. If a
timer interrupt arrives while handling clone and before dup_task_struct
completes then the child task will have:

1. posix_cputimers_work.scheduled = true
2. posix_cputimers_work.work queued.

copy_process clears task_struct.task_works, so (2) will have no effect and
posix_cpu_timers_work will never run (not to mention it doesn't make sense
for two tasks to share a common linked list).

Since posix_cpu_timers_work never runs, posix_cputimers_work.scheduled is
never cleared. Since scheduled is set, future timer interrupts will skip
scheduling work, with the ultimate result that the task will never receive
timer expirations.

Together, the complete flow is:

1. Task 1 calls clone(), enters kernel.
2. Timer interrupt fires, schedules task work on Task 1.
   2a. task_struct.posix_cputimers_work.scheduled = true
   2b. task_struct.posix_cputimers_work.work added to
       task_struct.task_works.
3. dup_task_struct() copies Task 1 to Task 2.
4. copy_process() clears task_struct.task_works for Task 2.
5. Future timer interrupts on Task 2 see
   task_struct.posix_cputimers_work.scheduled = true and skip scheduling
   work.

Fix this by explicitly clearing contents of task_struct.posix_cputimers_work
in copy_process(). This was never meant to be shared or inherited across
tasks in the first place.

Fixes: 1fb497dd0030 ("posix-cpu-timers: Provide mechanisms to defer timer handling to task_work")
Reported-by: Rhys Hiltner <rhys@justin.tv>
Signed-off-by: Michael Pratt <mpratt@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211101210615.716522-1-mpratt@google.com
---
 include/linux/posix-timers.h   |  2 ++
 kernel/fork.c                  |  1 +
 kernel/time/posix-cpu-timers.c | 19 +++++++++++++++++--
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 00fef0064355..5bbcd280bfd2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void clear_posix_cputimers_work(struct task_struct *p);
 void posix_cputimers_init_work(void);
 #else
+static inline void clear_posix_cputimers_work(struct task_struct *p) { }
 static inline void posix_cputimers_init_work(void) { }
 #endif
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e9feeef555e..8269ae2e5d7c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2279,6 +2279,7 @@ static __latent_entropy struct task_struct *copy_process(
 	p->pdeath_signal = 0;
 	INIT_LIST_HEAD(&p->thread_group);
 	p->task_works = NULL;
+	clear_posix_cputimers_work(p);
 
 #ifdef CONFIG_KRETPROBES
 	p->kretprobe_instances.first = NULL;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 643d412ac623..96b4e7810426 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1158,14 +1158,29 @@ static void posix_cpu_timers_work(struct callback_head *work)
 	handle_posix_cpu_timers(current);
 }
 
+/*
+ * Clear existing posix CPU timers task work.
+ */
+void clear_posix_cputimers_work(struct task_struct *p)
+{
+	/*
+	 * A copied work entry from the old task is not meaningful, clear it.
+	 * N.B. init_task_work will not do this.
+	 */
+	memset(&p->posix_cputimers_work.work, 0,
+	       sizeof(p->posix_cputimers_work.work));
+	init_task_work(&p->posix_cputimers_work.work,
+		       posix_cpu_timers_work);
+	p->posix_cputimers_work.scheduled = false;
+}
+
 /*
  * Initialize posix CPU timers task work in init task. Out of line to
  * keep the callback static and to avoid header recursion hell.
  */
 void __init posix_cputimers_init_work(void)
 {
-	init_task_work(&current->posix_cputimers_work.work,
-		       posix_cpu_timers_work);
+	clear_posix_cputimers_work(current);
 }
 
 /*
-- 
cgit v1.2.3


From 8791545eda52e8f3bc48e3cd902e38bf4ba4c9de Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 22 Oct 2021 16:17:03 -0400
Subject: NFS: Move NFS protocol display macros to global header

Refactor: surface useful show_ macros so they can be shared between
the client and server trace code.

Additional clean up:
- Housekeeping: ensure the correct #include files are pulled in
  and add proper TRACE_DEFINE_ENUM where they are missing
- Use a consistent naming scheme for the helpers
- Store values to be displayed symbolically as unsigned long, as
  that is the type that the __print_yada() functions take

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4trace.h         | 403 ++++-----------------------------------------
 fs/nfs/nfstrace.h          | 117 +++----------
 fs/nfs/pnfs.h              |   4 -
 fs/nfsd/trace.h            |   1 +
 include/linux/nfs4.h       |   4 +
 include/trace/events/nfs.h | 375 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 433 insertions(+), 471 deletions(-)
 create mode 100644 include/trace/events/nfs.h

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 424d9cd4c196..18f149f72160 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -12,309 +12,7 @@
 #include <trace/events/sunrpc_base.h>
 
 #include <trace/events/fs.h>
-
-TRACE_DEFINE_ENUM(EPERM);
-TRACE_DEFINE_ENUM(ENOENT);
-TRACE_DEFINE_ENUM(EIO);
-TRACE_DEFINE_ENUM(ENXIO);
-TRACE_DEFINE_ENUM(EACCES);
-TRACE_DEFINE_ENUM(EEXIST);
-TRACE_DEFINE_ENUM(EXDEV);
-TRACE_DEFINE_ENUM(ENOTDIR);
-TRACE_DEFINE_ENUM(EISDIR);
-TRACE_DEFINE_ENUM(EFBIG);
-TRACE_DEFINE_ENUM(ENOSPC);
-TRACE_DEFINE_ENUM(EROFS);
-TRACE_DEFINE_ENUM(EMLINK);
-TRACE_DEFINE_ENUM(ENAMETOOLONG);
-TRACE_DEFINE_ENUM(ENOTEMPTY);
-TRACE_DEFINE_ENUM(EDQUOT);
-TRACE_DEFINE_ENUM(ESTALE);
-TRACE_DEFINE_ENUM(EBADHANDLE);
-TRACE_DEFINE_ENUM(EBADCOOKIE);
-TRACE_DEFINE_ENUM(ENOTSUPP);
-TRACE_DEFINE_ENUM(ETOOSMALL);
-TRACE_DEFINE_ENUM(EREMOTEIO);
-TRACE_DEFINE_ENUM(EBADTYPE);
-TRACE_DEFINE_ENUM(EAGAIN);
-TRACE_DEFINE_ENUM(ELOOP);
-TRACE_DEFINE_ENUM(EOPNOTSUPP);
-TRACE_DEFINE_ENUM(EDEADLK);
-TRACE_DEFINE_ENUM(ENOMEM);
-TRACE_DEFINE_ENUM(EKEYEXPIRED);
-TRACE_DEFINE_ENUM(ETIMEDOUT);
-TRACE_DEFINE_ENUM(ERESTARTSYS);
-TRACE_DEFINE_ENUM(ECONNREFUSED);
-TRACE_DEFINE_ENUM(ECONNRESET);
-TRACE_DEFINE_ENUM(ENETUNREACH);
-TRACE_DEFINE_ENUM(EHOSTUNREACH);
-TRACE_DEFINE_ENUM(EHOSTDOWN);
-TRACE_DEFINE_ENUM(EPIPE);
-TRACE_DEFINE_ENUM(EPFNOSUPPORT);
-TRACE_DEFINE_ENUM(EPROTONOSUPPORT);
-
-TRACE_DEFINE_ENUM(NFS4_OK);
-TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
-TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
-TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
-TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
-TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
-TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
-TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
-TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
-TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
-TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
-TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
-TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
-TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
-TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
-TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
-TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
-TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
-TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
-TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
-TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
-TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
-TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
-TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
-TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
-TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
-TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
-TRACE_DEFINE_ENUM(NFS4ERR_IO);
-TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
-TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
-TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
-TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
-TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
-TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
-TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
-TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
-TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
-TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
-TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
-TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
-TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
-TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
-TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
-TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
-TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
-TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
-TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
-TRACE_DEFINE_ENUM(NFS4ERR_PERM);
-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
-TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
-TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
-TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
-TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
-TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
-TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
-TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
-TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
-TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
-TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
-TRACE_DEFINE_ENUM(NFS4ERR_SAME);
-TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
-TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
-TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
-TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
-TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
-TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
-TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
-TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
-TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
-TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
-
-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
-TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
-
-#define show_nfsv4_errors(error) \
-	__print_symbolic(error, \
-		{ NFS4_OK, "OK" }, \
-		/* Mapped by nfs4_stat_to_errno() */ \
-		{ EPERM, "EPERM" }, \
-		{ ENOENT, "ENOENT" }, \
-		{ EIO, "EIO" }, \
-		{ ENXIO, "ENXIO" }, \
-		{ EACCES, "EACCES" }, \
-		{ EEXIST, "EEXIST" }, \
-		{ EXDEV, "EXDEV" }, \
-		{ ENOTDIR, "ENOTDIR" }, \
-		{ EISDIR, "EISDIR" }, \
-		{ EFBIG, "EFBIG" }, \
-		{ ENOSPC, "ENOSPC" }, \
-		{ EROFS, "EROFS" }, \
-		{ EMLINK, "EMLINK" }, \
-		{ ENAMETOOLONG, "ENAMETOOLONG" }, \
-		{ ENOTEMPTY, "ENOTEMPTY" }, \
-		{ EDQUOT, "EDQUOT" }, \
-		{ ESTALE, "ESTALE" }, \
-		{ EBADHANDLE, "EBADHANDLE" }, \
-		{ EBADCOOKIE, "EBADCOOKIE" }, \
-		{ ENOTSUPP, "ENOTSUPP" }, \
-		{ ETOOSMALL, "ETOOSMALL" }, \
-		{ EREMOTEIO, "EREMOTEIO" }, \
-		{ EBADTYPE, "EBADTYPE" }, \
-		{ EAGAIN, "EAGAIN" }, \
-		{ ELOOP, "ELOOP" }, \
-		{ EOPNOTSUPP, "EOPNOTSUPP" }, \
-		{ EDEADLK, "EDEADLK" }, \
-		/* RPC errors */ \
-		{ ENOMEM, "ENOMEM" }, \
-		{ EKEYEXPIRED, "EKEYEXPIRED" }, \
-		{ ETIMEDOUT, "ETIMEDOUT" }, \
-		{ ERESTARTSYS, "ERESTARTSYS" }, \
-		{ ECONNREFUSED, "ECONNREFUSED" }, \
-		{ ECONNRESET, "ECONNRESET" }, \
-		{ ENETUNREACH, "ENETUNREACH" }, \
-		{ EHOSTUNREACH, "EHOSTUNREACH" }, \
-		{ EHOSTDOWN, "EHOSTDOWN" }, \
-		{ EPIPE, "EPIPE" }, \
-		{ EPFNOSUPPORT, "EPFNOSUPPORT" }, \
-		{ EPROTONOSUPPORT, "EPROTONOSUPPORT" }, \
-		/* NFSv4 native errors */ \
-		{ NFS4ERR_ACCESS, "ACCESS" }, \
-		{ NFS4ERR_ATTRNOTSUPP, "ATTRNOTSUPP" }, \
-		{ NFS4ERR_ADMIN_REVOKED, "ADMIN_REVOKED" }, \
-		{ NFS4ERR_BACK_CHAN_BUSY, "BACK_CHAN_BUSY" }, \
-		{ NFS4ERR_BADCHAR, "BADCHAR" }, \
-		{ NFS4ERR_BADHANDLE, "BADHANDLE" }, \
-		{ NFS4ERR_BADIOMODE, "BADIOMODE" }, \
-		{ NFS4ERR_BADLAYOUT, "BADLAYOUT" }, \
-		{ NFS4ERR_BADLABEL, "BADLABEL" }, \
-		{ NFS4ERR_BADNAME, "BADNAME" }, \
-		{ NFS4ERR_BADOWNER, "BADOWNER" }, \
-		{ NFS4ERR_BADSESSION, "BADSESSION" }, \
-		{ NFS4ERR_BADSLOT, "BADSLOT" }, \
-		{ NFS4ERR_BADTYPE, "BADTYPE" }, \
-		{ NFS4ERR_BADXDR, "BADXDR" }, \
-		{ NFS4ERR_BAD_COOKIE, "BAD_COOKIE" }, \
-		{ NFS4ERR_BAD_HIGH_SLOT, "BAD_HIGH_SLOT" }, \
-		{ NFS4ERR_BAD_RANGE, "BAD_RANGE" }, \
-		{ NFS4ERR_BAD_SEQID, "BAD_SEQID" }, \
-		{ NFS4ERR_BAD_SESSION_DIGEST, "BAD_SESSION_DIGEST" }, \
-		{ NFS4ERR_BAD_STATEID, "BAD_STATEID" }, \
-		{ NFS4ERR_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
-		{ NFS4ERR_CLID_INUSE, "CLID_INUSE" }, \
-		{ NFS4ERR_CLIENTID_BUSY, "CLIENTID_BUSY" }, \
-		{ NFS4ERR_COMPLETE_ALREADY, "COMPLETE_ALREADY" }, \
-		{ NFS4ERR_CONN_NOT_BOUND_TO_SESSION, \
-			"CONN_NOT_BOUND_TO_SESSION" }, \
-		{ NFS4ERR_DEADLOCK, "DEADLOCK" }, \
-		{ NFS4ERR_DEADSESSION, "DEAD_SESSION" }, \
-		{ NFS4ERR_DELAY, "DELAY" }, \
-		{ NFS4ERR_DELEG_ALREADY_WANTED, \
-			"DELEG_ALREADY_WANTED" }, \
-		{ NFS4ERR_DELEG_REVOKED, "DELEG_REVOKED" }, \
-		{ NFS4ERR_DENIED, "DENIED" }, \
-		{ NFS4ERR_DIRDELEG_UNAVAIL, "DIRDELEG_UNAVAIL" }, \
-		{ NFS4ERR_DQUOT, "DQUOT" }, \
-		{ NFS4ERR_ENCR_ALG_UNSUPP, "ENCR_ALG_UNSUPP" }, \
-		{ NFS4ERR_EXIST, "EXIST" }, \
-		{ NFS4ERR_EXPIRED, "EXPIRED" }, \
-		{ NFS4ERR_FBIG, "FBIG" }, \
-		{ NFS4ERR_FHEXPIRED, "FHEXPIRED" }, \
-		{ NFS4ERR_FILE_OPEN, "FILE_OPEN" }, \
-		{ NFS4ERR_GRACE, "GRACE" }, \
-		{ NFS4ERR_HASH_ALG_UNSUPP, "HASH_ALG_UNSUPP" }, \
-		{ NFS4ERR_INVAL, "INVAL" }, \
-		{ NFS4ERR_IO, "IO" }, \
-		{ NFS4ERR_ISDIR, "ISDIR" }, \
-		{ NFS4ERR_LAYOUTTRYLATER, "LAYOUTTRYLATER" }, \
-		{ NFS4ERR_LAYOUTUNAVAILABLE, "LAYOUTUNAVAILABLE" }, \
-		{ NFS4ERR_LEASE_MOVED, "LEASE_MOVED" }, \
-		{ NFS4ERR_LOCKED, "LOCKED" }, \
-		{ NFS4ERR_LOCKS_HELD, "LOCKS_HELD" }, \
-		{ NFS4ERR_LOCK_RANGE, "LOCK_RANGE" }, \
-		{ NFS4ERR_MINOR_VERS_MISMATCH, "MINOR_VERS_MISMATCH" }, \
-		{ NFS4ERR_MLINK, "MLINK" }, \
-		{ NFS4ERR_MOVED, "MOVED" }, \
-		{ NFS4ERR_NAMETOOLONG, "NAMETOOLONG" }, \
-		{ NFS4ERR_NOENT, "NOENT" }, \
-		{ NFS4ERR_NOFILEHANDLE, "NOFILEHANDLE" }, \
-		{ NFS4ERR_NOMATCHING_LAYOUT, "NOMATCHING_LAYOUT" }, \
-		{ NFS4ERR_NOSPC, "NOSPC" }, \
-		{ NFS4ERR_NOTDIR, "NOTDIR" }, \
-		{ NFS4ERR_NOTEMPTY, "NOTEMPTY" }, \
-		{ NFS4ERR_NOTSUPP, "NOTSUPP" }, \
-		{ NFS4ERR_NOT_ONLY_OP, "NOT_ONLY_OP" }, \
-		{ NFS4ERR_NOT_SAME, "NOT_SAME" }, \
-		{ NFS4ERR_NO_GRACE, "NO_GRACE" }, \
-		{ NFS4ERR_NXIO, "NXIO" }, \
-		{ NFS4ERR_OLD_STATEID, "OLD_STATEID" }, \
-		{ NFS4ERR_OPENMODE, "OPENMODE" }, \
-		{ NFS4ERR_OP_ILLEGAL, "OP_ILLEGAL" }, \
-		{ NFS4ERR_OP_NOT_IN_SESSION, "OP_NOT_IN_SESSION" }, \
-		{ NFS4ERR_PERM, "PERM" }, \
-		{ NFS4ERR_PNFS_IO_HOLE, "PNFS_IO_HOLE" }, \
-		{ NFS4ERR_PNFS_NO_LAYOUT, "PNFS_NO_LAYOUT" }, \
-		{ NFS4ERR_RECALLCONFLICT, "RECALLCONFLICT" }, \
-		{ NFS4ERR_RECLAIM_BAD, "RECLAIM_BAD" }, \
-		{ NFS4ERR_RECLAIM_CONFLICT, "RECLAIM_CONFLICT" }, \
-		{ NFS4ERR_REJECT_DELEG, "REJECT_DELEG" }, \
-		{ NFS4ERR_REP_TOO_BIG, "REP_TOO_BIG" }, \
-		{ NFS4ERR_REP_TOO_BIG_TO_CACHE, \
-			"REP_TOO_BIG_TO_CACHE" }, \
-		{ NFS4ERR_REQ_TOO_BIG, "REQ_TOO_BIG" }, \
-		{ NFS4ERR_RESOURCE, "RESOURCE" }, \
-		{ NFS4ERR_RESTOREFH, "RESTOREFH" }, \
-		{ NFS4ERR_RETRY_UNCACHED_REP, "RETRY_UNCACHED_REP" }, \
-		{ NFS4ERR_RETURNCONFLICT, "RETURNCONFLICT" }, \
-		{ NFS4ERR_ROFS, "ROFS" }, \
-		{ NFS4ERR_SAME, "SAME" }, \
-		{ NFS4ERR_SHARE_DENIED, "SHARE_DENIED" }, \
-		{ NFS4ERR_SEQUENCE_POS, "SEQUENCE_POS" }, \
-		{ NFS4ERR_SEQ_FALSE_RETRY, "SEQ_FALSE_RETRY" }, \
-		{ NFS4ERR_SEQ_MISORDERED, "SEQ_MISORDERED" }, \
-		{ NFS4ERR_SERVERFAULT, "SERVERFAULT" }, \
-		{ NFS4ERR_STALE, "STALE" }, \
-		{ NFS4ERR_STALE_CLIENTID, "STALE_CLIENTID" }, \
-		{ NFS4ERR_STALE_STATEID, "STALE_STATEID" }, \
-		{ NFS4ERR_SYMLINK, "SYMLINK" }, \
-		{ NFS4ERR_TOOSMALL, "TOOSMALL" }, \
-		{ NFS4ERR_TOO_MANY_OPS, "TOO_MANY_OPS" }, \
-		{ NFS4ERR_UNKNOWN_LAYOUTTYPE, "UNKNOWN_LAYOUTTYPE" }, \
-		{ NFS4ERR_UNSAFE_COMPOUND, "UNSAFE_COMPOUND" }, \
-		{ NFS4ERR_WRONGSEC, "WRONGSEC" }, \
-		{ NFS4ERR_WRONG_CRED, "WRONG_CRED" }, \
-		{ NFS4ERR_WRONG_TYPE, "WRONG_TYPE" }, \
-		{ NFS4ERR_XDEV, "XDEV" }, \
-		/* ***** Internal to Linux NFS client ***** */ \
-		{ NFS4ERR_RESET_TO_MDS, "RESET_TO_MDS" }, \
-		{ NFS4ERR_RESET_TO_PNFS, "RESET_TO_PNFS" })
+#include <trace/events/nfs.h>
 
 #define show_nfs_fattr_flags(valid) \
 	__print_flags((unsigned long)valid, "|", \
@@ -355,7 +53,7 @@ DECLARE_EVENT_CLASS(nfs4_clientid_event,
 		TP_printk(
 			"error=%ld (%s) dstaddr=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			__get_str(dstaddr)
 		)
 );
@@ -379,29 +77,6 @@ DEFINE_NFS4_CLIENTID_EVENT(nfs4_bind_conn_to_session);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_sequence);
 DEFINE_NFS4_CLIENTID_EVENT(nfs4_reclaim_complete);
 
-#define show_nfs4_sequence_status_flags(status) \
-	__print_flags((unsigned long)status, "|", \
-		{ SEQ4_STATUS_CB_PATH_DOWN, "CB_PATH_DOWN" }, \
-		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING, \
-			"CB_GSS_CONTEXTS_EXPIRING" }, \
-		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED, \
-			"CB_GSS_CONTEXTS_EXPIRED" }, \
-		{ SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, \
-			"EXPIRED_ALL_STATE_REVOKED" }, \
-		{ SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, \
-			"EXPIRED_SOME_STATE_REVOKED" }, \
-		{ SEQ4_STATUS_ADMIN_STATE_REVOKED, \
-			"ADMIN_STATE_REVOKED" }, \
-		{ SEQ4_STATUS_RECALLABLE_STATE_REVOKED,	 \
-			"RECALLABLE_STATE_REVOKED" }, \
-		{ SEQ4_STATUS_LEASE_MOVED, "LEASE_MOVED" }, \
-		{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, \
-			"RESTART_RECLAIM_NEEDED" }, \
-		{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, \
-			"CB_PATH_DOWN_SESSION" }, \
-		{ SEQ4_STATUS_BACKCHANNEL_FAULT, \
-			"BACKCHANNEL_FAULT" })
-
 TRACE_EVENT(nfs4_sequence_done,
 		TP_PROTO(
 			const struct nfs4_session *session,
@@ -415,7 +90,7 @@ TRACE_EVENT(nfs4_sequence_done,
 			__field(unsigned int, seq_nr)
 			__field(unsigned int, highest_slotid)
 			__field(unsigned int, target_highest_slotid)
-			__field(unsigned int, status_flags)
+			__field(unsigned long, status_flags)
 			__field(unsigned long, error)
 		),
 
@@ -434,16 +109,16 @@ TRACE_EVENT(nfs4_sequence_done,
 		TP_printk(
 			"error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
 			"highest_slotid=%u target_highest_slotid=%u "
-			"status_flags=%u (%s)",
+			"status_flags=0x%lx (%s)",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			__entry->session,
 			__entry->slot_nr,
 			__entry->seq_nr,
 			__entry->highest_slotid,
 			__entry->target_highest_slotid,
 			__entry->status_flags,
-			show_nfs4_sequence_status_flags(__entry->status_flags)
+			show_nfs4_seq4_status(__entry->status_flags)
 		)
 );
 
@@ -480,7 +155,7 @@ TRACE_EVENT(nfs4_cb_sequence,
 			"error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
 			"highest_slotid=%u",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			__entry->session,
 			__entry->slot_nr,
 			__entry->seq_nr,
@@ -517,7 +192,7 @@ TRACE_EVENT(nfs4_cb_seqid_err,
 			"error=%ld (%s) session=0x%08x slot_nr=%u seq_nr=%u "
 			"highest_slotid=%u",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			__entry->session,
 			__entry->slot_nr,
 			__entry->seq_nr,
@@ -651,7 +326,7 @@ TRACE_EVENT(nfs4_state_mgr_failed,
 			"hostname=%s clp state=%s error=%ld (%s) section=%s",
 			__get_str(hostname),
 			show_nfs4_clp_state(__entry->state), -__entry->error,
-			show_nfsv4_errors(__entry->error), __get_str(section)
+			show_nfs4_status(__entry->error), __get_str(section)
 
 		)
 )
@@ -722,7 +397,7 @@ DECLARE_EVENT_CLASS(nfs4_xdr_event,
 		TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
 			  " xid=0x%08x error=%ld (%s) operation=%u",
 			__entry->task_id, __entry->client_id, __entry->xid,
-			-__entry->error, show_nfsv4_errors(__entry->error),
+			-__entry->error, show_nfs4_status(__entry->error),
 			__entry->op
 		)
 );
@@ -837,7 +512,7 @@ DECLARE_EVENT_CLASS(nfs4_open_event,
 			"name=%02x:%02x:%llu/%s stateid=%d:0x%08x "
 			"openstateid=%d:0x%08x",
 			 -__entry->error,
-			 show_nfsv4_errors(__entry->error),
+			 show_nfs4_status(__entry->error),
 			 __entry->flags,
 			 show_fs_fcntl_open_flags(__entry->flags),
 			 show_fs_fmode_flags(__entry->fmode),
@@ -941,7 +616,7 @@ TRACE_EVENT(nfs4_close,
 			"error=%ld (%s) fmode=%s fileid=%02x:%02x:%llu "
 			"fhandle=0x%08x openstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			__entry->fmode ?  show_fs_fmode_flags(__entry->fmode) :
 					  "closed",
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -996,7 +671,7 @@ DECLARE_EVENT_CLASS(nfs4_lock_event,
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"stateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			show_fs_fcntl_cmd(__entry->cmd),
 			show_fs_fcntl_lock_type(__entry->type),
 			(long long)__entry->start,
@@ -1072,7 +747,7 @@ TRACE_EVENT(nfs4_set_lock,
 			"fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"stateid=%d:0x%08x lockstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			show_fs_fcntl_cmd(__entry->cmd),
 			show_fs_fcntl_lock_type(__entry->type),
 			(long long)__entry->start,
@@ -1238,7 +913,7 @@ TRACE_EVENT(nfs4_delegreturn_exit,
 			"error=%ld (%s) dev=%02x:%02x fhandle=0x%08x "
 			"stateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			__entry->fhandle,
 			__entry->stateid_seq, __entry->stateid_hash
@@ -1281,7 +956,7 @@ DECLARE_EVENT_CLASS(nfs4_test_stateid_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"stateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1328,7 +1003,7 @@ DECLARE_EVENT_CLASS(nfs4_lookup_event,
 		TP_printk(
 			"error=%ld (%s) name=%02x:%02x:%llu/%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->dir,
 			__get_str(name)
@@ -1375,7 +1050,7 @@ TRACE_EVENT(nfs4_lookupp,
 		TP_printk(
 			"error=%ld (%s) inode=%02x:%02x:%llu",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->ino
 		)
@@ -1414,7 +1089,7 @@ TRACE_EVENT(nfs4_rename,
 			"error=%ld (%s) oldname=%02x:%02x:%llu/%s "
 			"newname=%02x:%02x:%llu/%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->olddir,
 			__get_str(oldname),
@@ -1449,7 +1124,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_event,
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle
@@ -1507,7 +1182,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"stateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1560,7 +1235,7 @@ DECLARE_EVENT_CLASS(nfs4_getattr_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"valid=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1616,7 +1291,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_callback_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"dstaddr=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1677,7 +1352,7 @@ DECLARE_EVENT_CLASS(nfs4_inode_stateid_callback_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"stateid=%d:0x%08x dstaddr=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1726,7 +1401,7 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event,
 
 		TP_printk(
 			"error=%ld (%s) id=%u name=%s",
-			-__entry->error, show_nfsv4_errors(__entry->error),
+			-__entry->error, show_nfs4_status(__entry->error),
 			__entry->id,
 			__get_str(name)
 		)
@@ -1804,7 +1479,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
 			"offset=%lld count=%u res=%u stateid=%d:0x%08x "
 			"layoutstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1878,7 +1553,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
 			"offset=%lld count=%u res=%u stateid=%d:0x%08x "
 			"layoutstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1942,7 +1617,7 @@ DECLARE_EVENT_CLASS(nfs4_commit_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"offset=%lld count=%u layoutstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -1962,16 +1637,6 @@ DEFINE_NFS4_COMMIT_EVENT(nfs4_commit);
 #ifdef CONFIG_NFS_V4_1
 DEFINE_NFS4_COMMIT_EVENT(nfs4_pnfs_commit_ds);
 
-TRACE_DEFINE_ENUM(IOMODE_READ);
-TRACE_DEFINE_ENUM(IOMODE_RW);
-TRACE_DEFINE_ENUM(IOMODE_ANY);
-
-#define show_pnfs_iomode(iomode) \
-	__print_symbolic(iomode, \
-		{ IOMODE_READ, "READ" }, \
-		{ IOMODE_RW, "RW" }, \
-		{ IOMODE_ANY, "ANY" })
-
 TRACE_EVENT(nfs4_layoutget,
 		TP_PROTO(
 			const struct nfs_open_context *ctx,
@@ -2027,11 +1692,11 @@ TRACE_EVENT(nfs4_layoutget,
 			"iomode=%s offset=%llu count=%llu stateid=%d:0x%08x "
 			"layoutstateid=%d:0x%08x",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			show_pnfs_iomode(__entry->iomode),
+			show_pnfs_layout_iomode(__entry->iomode),
 			(unsigned long long)__entry->offset,
 			(unsigned long long)__entry->count,
 			__entry->stateid_seq, __entry->stateid_hash,
@@ -2125,7 +1790,7 @@ TRACE_EVENT(pnfs_update_layout,
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			show_pnfs_iomode(__entry->iomode),
+			show_pnfs_layout_iomode(__entry->iomode),
 			(unsigned long long)__entry->pos,
 			(unsigned long long)__entry->count,
 			__entry->layoutstateid_seq, __entry->layoutstateid_hash,
@@ -2179,7 +1844,7 @@ DECLARE_EVENT_CLASS(pnfs_layout_event,
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
-			show_pnfs_iomode(__entry->iomode),
+			show_pnfs_layout_iomode(__entry->iomode),
 			(unsigned long long)__entry->pos,
 			(unsigned long long)__entry->count,
 			__entry->layoutstateid_seq, __entry->layoutstateid_hash,
@@ -2324,7 +1989,7 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -2380,7 +2045,7 @@ TRACE_EVENT(ff_layout_commit_error,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"offset=%llu count=%u dstaddr=%s",
 			-__entry->error,
-			show_nfsv4_errors(__entry->error),
+			show_nfs4_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 331bcc0c0a75..76f04aa3367c 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -12,6 +12,7 @@
 #include <linux/iversion.h>
 
 #include <trace/events/fs.h>
+#include <trace/events/nfs.h>
 #include <trace/events/sunrpc_base.h>
 
 #define nfs_show_cache_validity(v) \
@@ -115,7 +116,7 @@ DECLARE_EVENT_CLASS(nfs_inode_event_done,
 			"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
 			"type=%u (%s) version=%llu size=%lld "
 			"cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s)",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -206,7 +207,7 @@ TRACE_EVENT(nfs_access_exit,
 			"type=%u (%s) version=%llu size=%lld "
 			"cache_validity=0x%lx (%s) nfs_flags=0x%lx (%s) "
 			"mask=0x%x permitted=0x%x",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
@@ -342,7 +343,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
 
 		TP_printk(
 			"error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			__entry->flags,
 			show_fs_lookup_flags(__entry->flags),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -433,7 +434,7 @@ TRACE_EVENT(nfs_atomic_open_exit,
 		TP_printk(
 			"error=%ld (%s) flags=0x%lx (%s) fmode=%s "
 			"name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			__entry->flags,
 			show_fs_fcntl_open_flags(__entry->flags),
 			show_fs_fmode_flags(__entry->fmode),
@@ -504,7 +505,7 @@ TRACE_EVENT(nfs_create_exit,
 
 		TP_printk(
 			"error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			__entry->flags,
 			show_fs_fcntl_open_flags(__entry->flags),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -574,7 +575,7 @@ DECLARE_EVENT_CLASS(nfs_directory_event_done,
 
 		TP_printk(
 			"error=%ld (%s) name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->dir,
 			__get_str(name)
@@ -664,7 +665,7 @@ TRACE_EVENT(nfs_link_exit,
 
 		TP_printk(
 			"error=%ld (%s) fileid=%02x:%02x:%llu name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			__entry->fileid,
 			MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -751,7 +752,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
 		TP_printk(
 			"error=%ld (%s) old_name=%02x:%02x:%llu/%s "
 			"new_name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->old_dir,
 			__get_str(old_name),
@@ -805,7 +806,7 @@ TRACE_EVENT(nfs_sillyrename_unlink,
 
 		TP_printk(
 			"error=%ld (%s) name=%02x:%02x:%llu/%s",
-			-__entry->error, nfs_show_status(__entry->error),
+			-__entry->error, show_nfs_status(__entry->error),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->dir,
 			__get_str(name)
@@ -1134,16 +1135,6 @@ TRACE_EVENT(nfs_pgio_error,
 	)
 );
 
-TRACE_DEFINE_ENUM(NFS_UNSTABLE);
-TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
-TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
-
-#define nfs_show_stable(stable) \
-	__print_symbolic(stable, \
-			{ NFS_UNSTABLE, "UNSTABLE" }, \
-			{ NFS_DATA_SYNC, "DATA_SYNC" }, \
-			{ NFS_FILE_SYNC, "FILE_SYNC" })
-
 TRACE_EVENT(nfs_initiate_write,
 		TP_PROTO(
 			const struct nfs_pgio_header *hdr
@@ -1157,7 +1148,7 @@ TRACE_EVENT(nfs_initiate_write,
 			__field(u64, fileid)
 			__field(loff_t, offset)
 			__field(u32, count)
-			__field(enum nfs3_stable_how, stable)
+			__field(unsigned long, stable)
 		),
 
 		TP_fast_assign(
@@ -1181,7 +1172,7 @@ TRACE_EVENT(nfs_initiate_write,
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
 			(long long)__entry->offset, __entry->count,
-			nfs_show_stable(__entry->stable)
+			show_nfs_stable_how(__entry->stable)
 		)
 );
 
@@ -1201,7 +1192,7 @@ TRACE_EVENT(nfs_writeback_done,
 			__field(u32, arg_count)
 			__field(u32, res_count)
 			__field(int, status)
-			__field(enum nfs3_stable_how, stable)
+			__field(unsigned long, stable)
 			__array(char, verifier, NFS4_VERIFIER_SIZE)
 		),
 
@@ -1234,8 +1225,8 @@ TRACE_EVENT(nfs_writeback_done,
 			__entry->fhandle,
 			(long long)__entry->offset, __entry->arg_count,
 			__entry->res_count, __entry->status,
-			nfs_show_stable(__entry->stable),
-			__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+			show_nfs_stable_how(__entry->stable),
+			show_nfs4_verifier(__entry->verifier)
 		)
 );
 
@@ -1336,7 +1327,7 @@ TRACE_EVENT(nfs_commit_done,
 			__field(u64, fileid)
 			__field(loff_t, offset)
 			__field(int, status)
-			__field(enum nfs3_stable_how, stable)
+			__field(unsigned long, stable)
 			__array(char, verifier, NFS4_VERIFIER_SIZE)
 		),
 
@@ -1365,8 +1356,8 @@ TRACE_EVENT(nfs_commit_done,
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle,
 			(long long)__entry->offset, __entry->status,
-			nfs_show_stable(__entry->stable),
-			__print_hex_str(__entry->verifier, NFS4_VERIFIER_SIZE)
+			show_nfs_stable_how(__entry->stable),
+			show_nfs4_verifier(__entry->verifier)
 		)
 );
 
@@ -1403,76 +1394,6 @@ TRACE_EVENT(nfs_fh_to_dentry,
 		)
 );
 
-TRACE_DEFINE_ENUM(NFS_OK);
-TRACE_DEFINE_ENUM(NFSERR_PERM);
-TRACE_DEFINE_ENUM(NFSERR_NOENT);
-TRACE_DEFINE_ENUM(NFSERR_IO);
-TRACE_DEFINE_ENUM(NFSERR_NXIO);
-TRACE_DEFINE_ENUM(ECHILD);
-TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
-TRACE_DEFINE_ENUM(NFSERR_ACCES);
-TRACE_DEFINE_ENUM(NFSERR_EXIST);
-TRACE_DEFINE_ENUM(NFSERR_XDEV);
-TRACE_DEFINE_ENUM(NFSERR_NODEV);
-TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
-TRACE_DEFINE_ENUM(NFSERR_ISDIR);
-TRACE_DEFINE_ENUM(NFSERR_INVAL);
-TRACE_DEFINE_ENUM(NFSERR_FBIG);
-TRACE_DEFINE_ENUM(NFSERR_NOSPC);
-TRACE_DEFINE_ENUM(NFSERR_ROFS);
-TRACE_DEFINE_ENUM(NFSERR_MLINK);
-TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
-TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
-TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
-TRACE_DEFINE_ENUM(NFSERR_DQUOT);
-TRACE_DEFINE_ENUM(NFSERR_STALE);
-TRACE_DEFINE_ENUM(NFSERR_REMOTE);
-TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
-TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
-TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
-TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
-TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
-TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
-TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
-TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
-TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
-
-#define nfs_show_status(x) \
-	__print_symbolic(x, \
-			{ NFS_OK, "OK" }, \
-			{ NFSERR_PERM, "PERM" }, \
-			{ NFSERR_NOENT, "NOENT" }, \
-			{ NFSERR_IO, "IO" }, \
-			{ NFSERR_NXIO, "NXIO" }, \
-			{ ECHILD, "CHILD" }, \
-			{ NFSERR_EAGAIN, "AGAIN" }, \
-			{ NFSERR_ACCES, "ACCES" }, \
-			{ NFSERR_EXIST, "EXIST" }, \
-			{ NFSERR_XDEV, "XDEV" }, \
-			{ NFSERR_NODEV, "NODEV" }, \
-			{ NFSERR_NOTDIR, "NOTDIR" }, \
-			{ NFSERR_ISDIR, "ISDIR" }, \
-			{ NFSERR_INVAL, "INVAL" }, \
-			{ NFSERR_FBIG, "FBIG" }, \
-			{ NFSERR_NOSPC, "NOSPC" }, \
-			{ NFSERR_ROFS, "ROFS" }, \
-			{ NFSERR_MLINK, "MLINK" }, \
-			{ NFSERR_OPNOTSUPP, "OPNOTSUPP" }, \
-			{ NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
-			{ NFSERR_NOTEMPTY, "NOTEMPTY" }, \
-			{ NFSERR_DQUOT, "DQUOT" }, \
-			{ NFSERR_STALE, "STALE" }, \
-			{ NFSERR_REMOTE, "REMOTE" }, \
-			{ NFSERR_WFLUSH, "WFLUSH" }, \
-			{ NFSERR_BADHANDLE, "BADHANDLE" }, \
-			{ NFSERR_NOT_SYNC, "NOTSYNC" }, \
-			{ NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
-			{ NFSERR_NOTSUPP, "NOTSUPP" }, \
-			{ NFSERR_TOOSMALL, "TOOSMALL" }, \
-			{ NFSERR_SERVERFAULT, "REMOTEIO" }, \
-			{ NFSERR_BADTYPE, "BADTYPE" }, \
-			{ NFSERR_JUKEBOX, "JUKEBOX" })
-
 DECLARE_EVENT_CLASS(nfs_xdr_event,
 		TP_PROTO(
 			const struct xdr_stream *xdr,
@@ -1512,7 +1433,7 @@ DECLARE_EVENT_CLASS(nfs_xdr_event,
 			__entry->task_id, __entry->client_id, __entry->xid,
 			__get_str(program), __entry->version,
 			__get_str(procedure), -__entry->error,
-			nfs_show_status(__entry->error)
+			show_nfs_status(__entry->error)
 		)
 );
 #define DEFINE_NFS_XDR_EVENT(name) \
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index a0f6ff094b3a..f4d7548d67b2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -82,10 +82,6 @@ enum pnfs_try_status {
 	PNFS_TRY_AGAIN     = 2,
 };
 
-/* error codes for internal use */
-#define NFS4ERR_RESET_TO_MDS   12001
-#define NFS4ERR_RESET_TO_PNFS  12002
-
 #ifdef CONFIG_NFS_V4_1
 
 #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 538520957a81..f1e0d3c51bc2 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,6 +9,7 @@
 #define _NFSD_TRACE_H
 
 #include <linux/tracepoint.h>
+
 #include "export.h"
 #include "nfsfh.h"
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 15004c469807..5662d8be04eb 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -292,6 +292,10 @@ enum nfsstat4 {
 	NFS4ERR_XATTR2BIG      = 10096,
 };
 
+/* error codes for internal client use */
+#define NFS4ERR_RESET_TO_MDS   12001
+#define NFS4ERR_RESET_TO_PNFS  12002
+
 static inline bool seqid_mutating_err(u32 err)
 {
 	/* See RFC 7530, section 9.1.7 */
diff --git a/include/trace/events/nfs.h b/include/trace/events/nfs.h
new file mode 100644
index 000000000000..09ffdbb04134
--- /dev/null
+++ b/include/trace/events/nfs.h
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Display helpers for NFS protocol elements
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2020, Oracle and/or its affiliates.
+ */
+
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <uapi/linux/nfs.h>
+
+TRACE_DEFINE_ENUM(NFS_OK);
+TRACE_DEFINE_ENUM(NFSERR_PERM);
+TRACE_DEFINE_ENUM(NFSERR_NOENT);
+TRACE_DEFINE_ENUM(NFSERR_IO);
+TRACE_DEFINE_ENUM(NFSERR_NXIO);
+TRACE_DEFINE_ENUM(NFSERR_EAGAIN);
+TRACE_DEFINE_ENUM(NFSERR_ACCES);
+TRACE_DEFINE_ENUM(NFSERR_EXIST);
+TRACE_DEFINE_ENUM(NFSERR_XDEV);
+TRACE_DEFINE_ENUM(NFSERR_NODEV);
+TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+TRACE_DEFINE_ENUM(NFSERR_INVAL);
+TRACE_DEFINE_ENUM(NFSERR_FBIG);
+TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+TRACE_DEFINE_ENUM(NFSERR_ROFS);
+TRACE_DEFINE_ENUM(NFSERR_MLINK);
+TRACE_DEFINE_ENUM(NFSERR_OPNOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+TRACE_DEFINE_ENUM(NFSERR_STALE);
+TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+
+#define show_nfs_status(x) \
+	__print_symbolic(x, \
+		{ NFS_OK,			"OK" }, \
+		{ NFSERR_PERM,			"PERM" }, \
+		{ NFSERR_NOENT,			"NOENT" }, \
+		{ NFSERR_IO,			"IO" }, \
+		{ NFSERR_NXIO,			"NXIO" }, \
+		{ ECHILD,			"CHILD" }, \
+		{ NFSERR_EAGAIN,		"AGAIN" }, \
+		{ NFSERR_ACCES,			"ACCES" }, \
+		{ NFSERR_EXIST,			"EXIST" }, \
+		{ NFSERR_XDEV,			"XDEV" }, \
+		{ NFSERR_NODEV,			"NODEV" }, \
+		{ NFSERR_NOTDIR,		"NOTDIR" }, \
+		{ NFSERR_ISDIR,			"ISDIR" }, \
+		{ NFSERR_INVAL,			"INVAL" }, \
+		{ NFSERR_FBIG,			"FBIG" }, \
+		{ NFSERR_NOSPC,			"NOSPC" }, \
+		{ NFSERR_ROFS,			"ROFS" }, \
+		{ NFSERR_MLINK,			"MLINK" }, \
+		{ NFSERR_OPNOTSUPP,		"OPNOTSUPP" }, \
+		{ NFSERR_NAMETOOLONG,		"NAMETOOLONG" }, \
+		{ NFSERR_NOTEMPTY,		"NOTEMPTY" }, \
+		{ NFSERR_DQUOT,			"DQUOT" }, \
+		{ NFSERR_STALE,			"STALE" }, \
+		{ NFSERR_REMOTE,		"REMOTE" }, \
+		{ NFSERR_WFLUSH,		"WFLUSH" }, \
+		{ NFSERR_BADHANDLE,		"BADHANDLE" }, \
+		{ NFSERR_NOT_SYNC,		"NOTSYNC" }, \
+		{ NFSERR_BAD_COOKIE,		"BADCOOKIE" }, \
+		{ NFSERR_NOTSUPP,		"NOTSUPP" }, \
+		{ NFSERR_TOOSMALL,		"TOOSMALL" }, \
+		{ NFSERR_SERVERFAULT,		"REMOTEIO" }, \
+		{ NFSERR_BADTYPE,		"BADTYPE" }, \
+		{ NFSERR_JUKEBOX,		"JUKEBOX" })
+
+TRACE_DEFINE_ENUM(NFS_UNSTABLE);
+TRACE_DEFINE_ENUM(NFS_DATA_SYNC);
+TRACE_DEFINE_ENUM(NFS_FILE_SYNC);
+
+#define show_nfs_stable_how(x) \
+	__print_symbolic(x, \
+		{ NFS_UNSTABLE,			"UNSTABLE" }, \
+		{ NFS_DATA_SYNC,		"DATA_SYNC" }, \
+		{ NFS_FILE_SYNC,		"FILE_SYNC" })
+
+TRACE_DEFINE_ENUM(NFS4_OK);
+TRACE_DEFINE_ENUM(NFS4ERR_ACCESS);
+TRACE_DEFINE_ENUM(NFS4ERR_ATTRNOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_ADMIN_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_BACK_CHAN_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_BADCHAR);
+TRACE_DEFINE_ENUM(NFS4ERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADIOMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADLABEL);
+TRACE_DEFINE_ENUM(NFS4ERR_BADNAME);
+TRACE_DEFINE_ENUM(NFS4ERR_BADOWNER);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_BADSLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_BADXDR);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_HIGH_SLOT);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SEQID);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_SESSION_DIGEST);
+TRACE_DEFINE_ENUM(NFS4ERR_BAD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_CB_PATH_DOWN);
+TRACE_DEFINE_ENUM(NFS4ERR_CLID_INUSE);
+TRACE_DEFINE_ENUM(NFS4ERR_CLIENTID_BUSY);
+TRACE_DEFINE_ENUM(NFS4ERR_COMPLETE_ALREADY);
+TRACE_DEFINE_ENUM(NFS4ERR_CONN_NOT_BOUND_TO_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADLOCK);
+TRACE_DEFINE_ENUM(NFS4ERR_DEADSESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_DELAY);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_ALREADY_WANTED);
+TRACE_DEFINE_ENUM(NFS4ERR_DELEG_REVOKED);
+TRACE_DEFINE_ENUM(NFS4ERR_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_DIRDELEG_UNAVAIL);
+TRACE_DEFINE_ENUM(NFS4ERR_DQUOT);
+TRACE_DEFINE_ENUM(NFS4ERR_ENCR_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_EXIST);
+TRACE_DEFINE_ENUM(NFS4ERR_EXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FBIG);
+TRACE_DEFINE_ENUM(NFS4ERR_FHEXPIRED);
+TRACE_DEFINE_ENUM(NFS4ERR_FILE_OPEN);
+TRACE_DEFINE_ENUM(NFS4ERR_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_HASH_ALG_UNSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_INVAL);
+TRACE_DEFINE_ENUM(NFS4ERR_IO);
+TRACE_DEFINE_ENUM(NFS4ERR_ISDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTTRYLATER);
+TRACE_DEFINE_ENUM(NFS4ERR_LAYOUTUNAVAILABLE);
+TRACE_DEFINE_ENUM(NFS4ERR_LEASE_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKED);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCKS_HELD);
+TRACE_DEFINE_ENUM(NFS4ERR_LOCK_RANGE);
+TRACE_DEFINE_ENUM(NFS4ERR_MINOR_VERS_MISMATCH);
+TRACE_DEFINE_ENUM(NFS4ERR_MLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_MOVED);
+TRACE_DEFINE_ENUM(NFS4ERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFS4ERR_NOENT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOFILEHANDLE);
+TRACE_DEFINE_ENUM(NFS4ERR_NOMATCHING_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_NOSPC);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFS4ERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_ONLY_OP);
+TRACE_DEFINE_ENUM(NFS4ERR_NOT_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_NO_GRACE);
+TRACE_DEFINE_ENUM(NFS4ERR_NXIO);
+TRACE_DEFINE_ENUM(NFS4ERR_OLD_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_OPENMODE);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_ILLEGAL);
+TRACE_DEFINE_ENUM(NFS4ERR_OP_NOT_IN_SESSION);
+TRACE_DEFINE_ENUM(NFS4ERR_PERM);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_IO_HOLE);
+TRACE_DEFINE_ENUM(NFS4ERR_PNFS_NO_LAYOUT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECALLCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_BAD);
+TRACE_DEFINE_ENUM(NFS4ERR_RECLAIM_CONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_REJECT_DELEG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+TRACE_DEFINE_ENUM(NFS4ERR_REQ_TOO_BIG);
+TRACE_DEFINE_ENUM(NFS4ERR_RESOURCE);
+TRACE_DEFINE_ENUM(NFS4ERR_RESTOREFH);
+TRACE_DEFINE_ENUM(NFS4ERR_RETRY_UNCACHED_REP);
+TRACE_DEFINE_ENUM(NFS4ERR_RETURNCONFLICT);
+TRACE_DEFINE_ENUM(NFS4ERR_ROFS);
+TRACE_DEFINE_ENUM(NFS4ERR_SAME);
+TRACE_DEFINE_ENUM(NFS4ERR_SHARE_DENIED);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQUENCE_POS);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_FALSE_RETRY);
+TRACE_DEFINE_ENUM(NFS4ERR_SEQ_MISORDERED);
+TRACE_DEFINE_ENUM(NFS4ERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_CLIENTID);
+TRACE_DEFINE_ENUM(NFS4ERR_STALE_STATEID);
+TRACE_DEFINE_ENUM(NFS4ERR_SYMLINK);
+TRACE_DEFINE_ENUM(NFS4ERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFS4ERR_TOO_MANY_OPS);
+TRACE_DEFINE_ENUM(NFS4ERR_UNKNOWN_LAYOUTTYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_UNSAFE_COMPOUND);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONGSEC);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_CRED);
+TRACE_DEFINE_ENUM(NFS4ERR_WRONG_TYPE);
+TRACE_DEFINE_ENUM(NFS4ERR_XDEV);
+
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_MDS);
+TRACE_DEFINE_ENUM(NFS4ERR_RESET_TO_PNFS);
+
+#define show_nfs4_status(x) \
+	__print_symbolic(x, \
+		{ NFS4_OK,			"OK" }, \
+		{ EPERM,			"EPERM" }, \
+		{ ENOENT,			"ENOENT" }, \
+		{ EIO,				"EIO" }, \
+		{ ENXIO,			"ENXIO" }, \
+		{ EACCES,			"EACCES" }, \
+		{ EEXIST,			"EEXIST" }, \
+		{ EXDEV,			"EXDEV" }, \
+		{ ENOTDIR,			"ENOTDIR" }, \
+		{ EISDIR,			"EISDIR" }, \
+		{ EFBIG,			"EFBIG" }, \
+		{ ENOSPC,			"ENOSPC" }, \
+		{ EROFS,			"EROFS" }, \
+		{ EMLINK,			"EMLINK" }, \
+		{ ENAMETOOLONG,			"ENAMETOOLONG" }, \
+		{ ENOTEMPTY,			"ENOTEMPTY" }, \
+		{ EDQUOT,			"EDQUOT" }, \
+		{ ESTALE,			"ESTALE" }, \
+		{ EBADHANDLE,			"EBADHANDLE" }, \
+		{ EBADCOOKIE,			"EBADCOOKIE" }, \
+		{ ENOTSUPP,			"ENOTSUPP" }, \
+		{ ETOOSMALL,			"ETOOSMALL" }, \
+		{ EREMOTEIO,			"EREMOTEIO" }, \
+		{ EBADTYPE,			"EBADTYPE" }, \
+		{ EAGAIN,			"EAGAIN" }, \
+		{ ELOOP,			"ELOOP" }, \
+		{ EOPNOTSUPP,			"EOPNOTSUPP" }, \
+		{ EDEADLK,			"EDEADLK" }, \
+		{ ENOMEM,			"ENOMEM" }, \
+		{ EKEYEXPIRED,			"EKEYEXPIRED" }, \
+		{ ETIMEDOUT,			"ETIMEDOUT" }, \
+		{ ERESTARTSYS,			"ERESTARTSYS" }, \
+		{ ECONNREFUSED,			"ECONNREFUSED" }, \
+		{ ECONNRESET,			"ECONNRESET" }, \
+		{ ENETUNREACH,			"ENETUNREACH" }, \
+		{ EHOSTUNREACH,			"EHOSTUNREACH" }, \
+		{ EHOSTDOWN,			"EHOSTDOWN" }, \
+		{ EPIPE,			"EPIPE" }, \
+		{ EPFNOSUPPORT,			"EPFNOSUPPORT" }, \
+		{ EPROTONOSUPPORT,		"EPROTONOSUPPORT" }, \
+		{ NFS4ERR_ACCESS,		"ACCESS" }, \
+		{ NFS4ERR_ATTRNOTSUPP,		"ATTRNOTSUPP" }, \
+		{ NFS4ERR_ADMIN_REVOKED,	"ADMIN_REVOKED" }, \
+		{ NFS4ERR_BACK_CHAN_BUSY,	"BACK_CHAN_BUSY" }, \
+		{ NFS4ERR_BADCHAR,		"BADCHAR" }, \
+		{ NFS4ERR_BADHANDLE,		"BADHANDLE" }, \
+		{ NFS4ERR_BADIOMODE,		"BADIOMODE" }, \
+		{ NFS4ERR_BADLAYOUT,		"BADLAYOUT" }, \
+		{ NFS4ERR_BADLABEL,		"BADLABEL" }, \
+		{ NFS4ERR_BADNAME,		"BADNAME" }, \
+		{ NFS4ERR_BADOWNER,		"BADOWNER" }, \
+		{ NFS4ERR_BADSESSION,		"BADSESSION" }, \
+		{ NFS4ERR_BADSLOT,		"BADSLOT" }, \
+		{ NFS4ERR_BADTYPE,		"BADTYPE" }, \
+		{ NFS4ERR_BADXDR,		"BADXDR" }, \
+		{ NFS4ERR_BAD_COOKIE,		"BAD_COOKIE" }, \
+		{ NFS4ERR_BAD_HIGH_SLOT,	"BAD_HIGH_SLOT" }, \
+		{ NFS4ERR_BAD_RANGE,		"BAD_RANGE" }, \
+		{ NFS4ERR_BAD_SEQID,		"BAD_SEQID" }, \
+		{ NFS4ERR_BAD_SESSION_DIGEST,	"BAD_SESSION_DIGEST" }, \
+		{ NFS4ERR_BAD_STATEID,		"BAD_STATEID" }, \
+		{ NFS4ERR_CB_PATH_DOWN,		"CB_PATH_DOWN" }, \
+		{ NFS4ERR_CLID_INUSE,		"CLID_INUSE" }, \
+		{ NFS4ERR_CLIENTID_BUSY,	"CLIENTID_BUSY" }, \
+		{ NFS4ERR_COMPLETE_ALREADY,	"COMPLETE_ALREADY" }, \
+		{ NFS4ERR_CONN_NOT_BOUND_TO_SESSION, "CONN_NOT_BOUND_TO_SESSION" }, \
+		{ NFS4ERR_DEADLOCK,		"DEADLOCK" }, \
+		{ NFS4ERR_DEADSESSION,		"DEAD_SESSION" }, \
+		{ NFS4ERR_DELAY,		"DELAY" }, \
+		{ NFS4ERR_DELEG_ALREADY_WANTED,	"DELEG_ALREADY_WANTED" }, \
+		{ NFS4ERR_DELEG_REVOKED,	"DELEG_REVOKED" }, \
+		{ NFS4ERR_DENIED,		"DENIED" }, \
+		{ NFS4ERR_DIRDELEG_UNAVAIL,	"DIRDELEG_UNAVAIL" }, \
+		{ NFS4ERR_DQUOT,		"DQUOT" }, \
+		{ NFS4ERR_ENCR_ALG_UNSUPP,	"ENCR_ALG_UNSUPP" }, \
+		{ NFS4ERR_EXIST,		"EXIST" }, \
+		{ NFS4ERR_EXPIRED,		"EXPIRED" }, \
+		{ NFS4ERR_FBIG,			"FBIG" }, \
+		{ NFS4ERR_FHEXPIRED,		"FHEXPIRED" }, \
+		{ NFS4ERR_FILE_OPEN,		"FILE_OPEN" }, \
+		{ NFS4ERR_GRACE,		"GRACE" }, \
+		{ NFS4ERR_HASH_ALG_UNSUPP,	"HASH_ALG_UNSUPP" }, \
+		{ NFS4ERR_INVAL,		"INVAL" }, \
+		{ NFS4ERR_IO,			"IO" }, \
+		{ NFS4ERR_ISDIR,		"ISDIR" }, \
+		{ NFS4ERR_LAYOUTTRYLATER,	"LAYOUTTRYLATER" }, \
+		{ NFS4ERR_LAYOUTUNAVAILABLE,	"LAYOUTUNAVAILABLE" }, \
+		{ NFS4ERR_LEASE_MOVED,		"LEASE_MOVED" }, \
+		{ NFS4ERR_LOCKED,		"LOCKED" }, \
+		{ NFS4ERR_LOCKS_HELD,		"LOCKS_HELD" }, \
+		{ NFS4ERR_LOCK_RANGE,		"LOCK_RANGE" }, \
+		{ NFS4ERR_MINOR_VERS_MISMATCH,	"MINOR_VERS_MISMATCH" }, \
+		{ NFS4ERR_MLINK,		"MLINK" }, \
+		{ NFS4ERR_MOVED,		"MOVED" }, \
+		{ NFS4ERR_NAMETOOLONG,		"NAMETOOLONG" }, \
+		{ NFS4ERR_NOENT,		"NOENT" }, \
+		{ NFS4ERR_NOFILEHANDLE,		"NOFILEHANDLE" }, \
+		{ NFS4ERR_NOMATCHING_LAYOUT,	"NOMATCHING_LAYOUT" }, \
+		{ NFS4ERR_NOSPC,		"NOSPC" }, \
+		{ NFS4ERR_NOTDIR,		"NOTDIR" }, \
+		{ NFS4ERR_NOTEMPTY,		"NOTEMPTY" }, \
+		{ NFS4ERR_NOTSUPP,		"NOTSUPP" }, \
+		{ NFS4ERR_NOT_ONLY_OP,		"NOT_ONLY_OP" }, \
+		{ NFS4ERR_NOT_SAME,		"NOT_SAME" }, \
+		{ NFS4ERR_NO_GRACE,		"NO_GRACE" }, \
+		{ NFS4ERR_NXIO,			"NXIO" }, \
+		{ NFS4ERR_OLD_STATEID,		"OLD_STATEID" }, \
+		{ NFS4ERR_OPENMODE,		"OPENMODE" }, \
+		{ NFS4ERR_OP_ILLEGAL,		"OP_ILLEGAL" }, \
+		{ NFS4ERR_OP_NOT_IN_SESSION,	"OP_NOT_IN_SESSION" }, \
+		{ NFS4ERR_PERM,			"PERM" }, \
+		{ NFS4ERR_PNFS_IO_HOLE,		"PNFS_IO_HOLE" }, \
+		{ NFS4ERR_PNFS_NO_LAYOUT,	"PNFS_NO_LAYOUT" }, \
+		{ NFS4ERR_RECALLCONFLICT,	"RECALLCONFLICT" }, \
+		{ NFS4ERR_RECLAIM_BAD,		"RECLAIM_BAD" }, \
+		{ NFS4ERR_RECLAIM_CONFLICT,	"RECLAIM_CONFLICT" }, \
+		{ NFS4ERR_REJECT_DELEG,		"REJECT_DELEG" }, \
+		{ NFS4ERR_REP_TOO_BIG,		"REP_TOO_BIG" }, \
+		{ NFS4ERR_REP_TOO_BIG_TO_CACHE,	"REP_TOO_BIG_TO_CACHE" }, \
+		{ NFS4ERR_REQ_TOO_BIG,		"REQ_TOO_BIG" }, \
+		{ NFS4ERR_RESOURCE,		"RESOURCE" }, \
+		{ NFS4ERR_RESTOREFH,		"RESTOREFH" }, \
+		{ NFS4ERR_RETRY_UNCACHED_REP,	"RETRY_UNCACHED_REP" }, \
+		{ NFS4ERR_RETURNCONFLICT,	"RETURNCONFLICT" }, \
+		{ NFS4ERR_ROFS,			"ROFS" }, \
+		{ NFS4ERR_SAME,			"SAME" }, \
+		{ NFS4ERR_SHARE_DENIED,		"SHARE_DENIED" }, \
+		{ NFS4ERR_SEQUENCE_POS,		"SEQUENCE_POS" }, \
+		{ NFS4ERR_SEQ_FALSE_RETRY,	"SEQ_FALSE_RETRY" }, \
+		{ NFS4ERR_SEQ_MISORDERED,	"SEQ_MISORDERED" }, \
+		{ NFS4ERR_SERVERFAULT,		"SERVERFAULT" }, \
+		{ NFS4ERR_STALE,		"STALE" }, \
+		{ NFS4ERR_STALE_CLIENTID,	"STALE_CLIENTID" }, \
+		{ NFS4ERR_STALE_STATEID,	"STALE_STATEID" }, \
+		{ NFS4ERR_SYMLINK,		"SYMLINK" }, \
+		{ NFS4ERR_TOOSMALL,		"TOOSMALL" }, \
+		{ NFS4ERR_TOO_MANY_OPS,		"TOO_MANY_OPS" }, \
+		{ NFS4ERR_UNKNOWN_LAYOUTTYPE,	"UNKNOWN_LAYOUTTYPE" }, \
+		{ NFS4ERR_UNSAFE_COMPOUND,	"UNSAFE_COMPOUND" }, \
+		{ NFS4ERR_WRONGSEC,		"WRONGSEC" }, \
+		{ NFS4ERR_WRONG_CRED,		"WRONG_CRED" }, \
+		{ NFS4ERR_WRONG_TYPE,		"WRONG_TYPE" }, \
+		{ NFS4ERR_XDEV,			"XDEV" }, \
+		/* ***** Internal to Linux NFS client ***** */ \
+		{ NFS4ERR_RESET_TO_MDS,		"RESET_TO_MDS" }, \
+		{ NFS4ERR_RESET_TO_PNFS,	"RESET_TO_PNFS" })
+
+#define show_nfs4_verifier(x) \
+	__print_hex_str(x, NFS4_VERIFIER_SIZE)
+
+TRACE_DEFINE_ENUM(IOMODE_READ);
+TRACE_DEFINE_ENUM(IOMODE_RW);
+TRACE_DEFINE_ENUM(IOMODE_ANY);
+
+#define show_pnfs_layout_iomode(x) \
+	__print_symbolic(x, \
+		{ IOMODE_READ,			"READ" }, \
+		{ IOMODE_RW,			"RW" }, \
+		{ IOMODE_ANY,			"ANY" })
+
+#define show_nfs4_seq4_status(x) \
+	__print_flags(x, "|", \
+		{ SEQ4_STATUS_CB_PATH_DOWN,		"CB_PATH_DOWN" }, \
+		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING,	"CB_GSS_CONTEXTS_EXPIRING" }, \
+		{ SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRED,	"CB_GSS_CONTEXTS_EXPIRED" }, \
+		{ SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED, "EXPIRED_ALL_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED, "EXPIRED_SOME_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_ADMIN_STATE_REVOKED,	"ADMIN_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_RECALLABLE_STATE_REVOKED,	"RECALLABLE_STATE_REVOKED" }, \
+		{ SEQ4_STATUS_LEASE_MOVED,		"LEASE_MOVED" }, \
+		{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED,	"RESTART_RECLAIM_NEEDED" }, \
+		{ SEQ4_STATUS_CB_PATH_DOWN_SESSION,	"CB_PATH_DOWN_SESSION" }, \
+		{ SEQ4_STATUS_BACKCHANNEL_FAULT,	"BACKCHANNEL_FAULT" })
-- 
cgit v1.2.3


From 5fe11512cdc24ccc66ac5da3c815ac9e59449abc Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:33:13 -0700
Subject: Input: remove unused header <linux/input/cy8ctmg110_pdata.h>

Commit 83b41248ed04 ("Input: cy8ctmg110_ts - switch to using gpiod API")
remove the last use of <linux/input/cy8ctmg110_pdata.h> but left the header
file behind.  Nothing uses it now, delete it.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/r/20211102220203.940290-6-corbet@lwn.net
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 include/linux/input/cy8ctmg110_pdata.h | 10 ----------
 1 file changed, 10 deletions(-)
 delete mode 100644 include/linux/input/cy8ctmg110_pdata.h

(limited to 'include/linux')

diff --git a/include/linux/input/cy8ctmg110_pdata.h b/include/linux/input/cy8ctmg110_pdata.h
deleted file mode 100644
index ee1d44545f30..000000000000
--- a/include/linux/input/cy8ctmg110_pdata.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_CY8CTMG110_PDATA_H
-#define _LINUX_CY8CTMG110_PDATA_H
-
-struct cy8ctmg110_pdata
-{
-	int reset_pin;		/* Reset pin is wired to this GPIO (optional) */
-};
-
-#endif
-- 
cgit v1.2.3


From c4777efa751d293e369aec464ce6875e957be255 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 1 Nov 2021 17:45:55 -0700
Subject: net: add and use skb_unclone_keeptruesize() helper

While commit 097b9146c0e2 ("net: fix up truesize of cloned
skb in skb_prepare_for_shift()") fixed immediate issues found
when KFENCE was enabled/tested, there are still similar issues,
when tcp_trim_head() hits KFENCE while the master skb
is cloned.

This happens under heavy networking TX workloads,
when the TX completion might be delayed after incoming ACK.

This patch fixes the WARNING in sk_stream_kill_queues
when sk->sk_mem_queued/sk->sk_forward_alloc are not zero.

Fixes: d3fb45f370d9 ("mm, kfence: insert KFENCE hooks for SLAB")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20211102004555.1359210-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/skbuff.h | 16 ++++++++++++++++
 net/core/skbuff.c      | 14 +-------------
 net/ipv4/tcp_output.c  |  6 +++---
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0bd6520329f6..a63e13082397 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1675,6 +1675,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri)
 	return 0;
 }
 
+/* This variant of skb_unclone() makes sure skb->truesize is not changed */
+static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
+{
+	might_sleep_if(gfpflags_allow_blocking(pri));
+
+	if (skb_cloned(skb)) {
+		unsigned int save = skb->truesize;
+		int res;
+
+		res = pskb_expand_head(skb, 0, 0, pri);
+		skb->truesize = save;
+		return res;
+	}
+	return 0;
+}
+
 /**
  *	skb_header_cloned - is the header a clone
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 67a9188d8a49..3ec42cdee16a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3449,19 +3449,7 @@ EXPORT_SYMBOL(skb_split);
  */
 static int skb_prepare_for_shift(struct sk_buff *skb)
 {
-	int ret = 0;
-
-	if (skb_cloned(skb)) {
-		/* Save and restore truesize: pskb_expand_head() may reallocate
-		 * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
-		 * cannot change truesize at this point.
-		 */
-		unsigned int save_truesize = skb->truesize;
-
-		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-		skb->truesize = save_truesize;
-	}
-	return ret;
+	return skb_unclone_keeptruesize(skb, GFP_ATOMIC);
 }
 
 /**
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 6fbbf1558033..76cc1641beb4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1559,7 +1559,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
 		return -ENOMEM;
 	}
 
-	if (skb_unclone(skb, gfp))
+	if (skb_unclone_keeptruesize(skb, gfp))
 		return -ENOMEM;
 
 	/* Get a new skb... force flag on. */
@@ -1667,7 +1667,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 {
 	u32 delta_truesize;
 
-	if (skb_unclone(skb, GFP_ATOMIC))
+	if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
 		return -ENOMEM;
 
 	delta_truesize = __pskb_trim_head(skb, len);
@@ -3166,7 +3166,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
 				 cur_mss, GFP_ATOMIC))
 			return -ENOMEM; /* We'll try again later. */
 	} else {
-		if (skb_unclone(skb, GFP_ATOMIC))
+		if (skb_unclone_keeptruesize(skb, GFP_ATOMIC))
 			return -ENOMEM;
 
 		diff = tcp_skb_pcount(skb);
-- 
cgit v1.2.3


From 0dc54bd4d6e03be1f0b678c4297170b79f1a44ab Mon Sep 17 00:00:00 2001
From: Dominique Martinet <asmadeus@codewreck.org>
Date: Wed, 3 Nov 2021 17:34:05 +0900
Subject: fscache_cookie_enabled: check cookie is valid before accessing it

fscache_cookie_enabled() could be called on NULL cookies and cause a
null pointer dereference when accessing cookie flags: just make sure
the cookie is valid first

Suggested-by: David Howells <dhowells@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
---
 include/linux/fscache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index a4dab5998613..3b2282c157f7 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -167,7 +167,7 @@ struct fscache_cookie {
 
 static inline bool fscache_cookie_enabled(struct fscache_cookie *cookie)
 {
-	return test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
+	return fscache_cookie_valid(cookie) && test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags);
 }
 
 /*
-- 
cgit v1.2.3


From c081d53f97a1a90a38e4296dd3d6fda5e38dca2c Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 2 Nov 2021 08:02:47 -0400
Subject: security: pass asoc to sctp_assoc_request and sctp_sk_clone

This patch is to move secid and peer_secid from endpoint to association,
and pass asoc to sctp_assoc_request and sctp_sk_clone instead of ep. As
ep is the local endpoint and asoc represents a connection, and in SCTP
one sk/ep could have multiple asoc/connection, saving secid/peer_secid
for new asoc will overwrite the old asoc's.

Note that since asoc can be passed as NULL, security_sctp_assoc_request()
is moved to the place right after the new_asoc is created in
sctp_sf_do_5_1B_init() and sctp_sf_do_unexpected_init().

v1->v2:
  - fix the description of selinux_netlbl_skbuff_setsid(), as Jakub noticed.
  - fix the annotation in selinux_sctp_assoc_request(), as Richard Noticed.

Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Reviewed-by: Richard Haines <richard_c_haines@btinternet.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/security/SCTP.rst     | 28 ++++++++++++++--------------
 include/linux/lsm_hook_defs.h       |  4 ++--
 include/linux/lsm_hooks.h           |  8 ++++----
 include/linux/security.h            | 10 +++++-----
 include/net/sctp/structs.h          | 20 ++++++++++----------
 net/sctp/sm_statefuns.c             | 26 +++++++++++++-------------
 net/sctp/socket.c                   |  5 ++---
 security/security.c                 |  8 ++++----
 security/selinux/hooks.c            | 22 +++++++++++-----------
 security/selinux/include/netlabel.h |  4 ++--
 security/selinux/netlabel.c         | 18 +++++++++---------
 11 files changed, 76 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index 0bcf6c1245ee..415b548d9ce0 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -26,11 +26,11 @@ described in the `SCTP SELinux Support`_ chapter.
 
 security_sctp_assoc_request()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Passes the ``@ep`` and ``@chunk->skb`` of the association INIT packet to the
+Passes the ``@asoc`` and ``@chunk->skb`` of the association INIT packet to the
 security module. Returns 0 on success, error on failure.
 ::
 
-    @ep - pointer to sctp endpoint structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of association packet.
 
 
@@ -117,9 +117,9 @@ Called whenever a new socket is created by **accept**\(2)
 calls **sctp_peeloff**\(3).
 ::
 
-    @ep - pointer to current sctp endpoint structure.
+    @asoc - pointer to current sctp association structure.
     @sk - pointer to current sock structure.
-    @sk - pointer to new sock structure.
+    @newsk - pointer to new sock structure.
 
 
 security_inet_conn_established()
@@ -200,22 +200,22 @@ hooks with the SELinux specifics expanded below::
 
 security_sctp_assoc_request()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Passes the ``@ep`` and ``@chunk->skb`` of the association INIT packet to the
+Passes the ``@asoc`` and ``@chunk->skb`` of the association INIT packet to the
 security module. Returns 0 on success, error on failure.
 ::
 
-    @ep - pointer to sctp endpoint structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of association packet.
 
 The security module performs the following operations:
-     IF this is the first association on ``@ep->base.sk``, then set the peer
+     IF this is the first association on ``@asoc->base.sk``, then set the peer
      sid to that in ``@skb``. This will ensure there is only one peer sid
-     assigned to ``@ep->base.sk`` that may support multiple associations.
+     assigned to ``@asoc->base.sk`` that may support multiple associations.
 
-     ELSE validate the ``@ep->base.sk peer_sid`` against the ``@skb peer sid``
+     ELSE validate the ``@asoc->base.sk peer_sid`` against the ``@skb peer sid``
      to determine whether the association should be allowed or denied.
 
-     Set the sctp ``@ep sid`` to socket's sid (from ``ep->base.sk``) with
+     Set the sctp ``@asoc sid`` to socket's sid (from ``asoc->base.sk``) with
      MLS portion taken from ``@skb peer sid``. This will be used by SCTP
      TCP style sockets and peeled off connections as they cause a new socket
      to be generated.
@@ -259,13 +259,13 @@ security_sctp_sk_clone()
 Called whenever a new socket is created by **accept**\(2) (i.e. a TCP style
 socket) or when a socket is 'peeled off' e.g userspace calls
 **sctp_peeloff**\(3). ``security_sctp_sk_clone()`` will set the new
-sockets sid and peer sid to that contained in the ``@ep sid`` and
-``@ep peer sid`` respectively.
+sockets sid and peer sid to that contained in the ``@asoc sid`` and
+``@asoc peer sid`` respectively.
 ::
 
-    @ep - pointer to current sctp endpoint structure.
+    @asoc - pointer to current sctp association structure.
     @sk - pointer to current sock structure.
-    @sk - pointer to new sock structure.
+    @newsk - pointer to new sock structure.
 
 
 security_inet_conn_established()
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index a9ac70ae01ab..df8de62f4710 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -329,11 +329,11 @@ LSM_HOOK(int, 0, tun_dev_create, void)
 LSM_HOOK(int, 0, tun_dev_attach_queue, void *security)
 LSM_HOOK(int, 0, tun_dev_attach, struct sock *sk, void *security)
 LSM_HOOK(int, 0, tun_dev_open, void *security)
-LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_endpoint *ep,
+LSM_HOOK(int, 0, sctp_assoc_request, struct sctp_association *asoc,
 	 struct sk_buff *skb)
 LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
-LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_endpoint *ep,
+LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
 #endif /* CONFIG_SECURITY_NETWORK */
 
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 0bada4df23fc..d45b6f6e27fd 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1027,9 +1027,9 @@
  * Security hooks for SCTP
  *
  * @sctp_assoc_request:
- *	Passes the @ep and @chunk->skb of the association INIT packet to
+ *	Passes the @asoc and @chunk->skb of the association INIT packet to
  *	the security module.
- *	@ep pointer to sctp endpoint structure.
+ *	@asoc pointer to sctp association structure.
  *	@skb pointer to skbuff of association packet.
  *	Return 0 on success, error on failure.
  * @sctp_bind_connect:
@@ -1047,9 +1047,9 @@
  *	Called whenever a new socket is created by accept(2) (i.e. a TCP
  *	style socket) or when a socket is 'peeled off' e.g userspace
  *	calls sctp_peeloff(3).
- *	@ep pointer to current sctp endpoint structure.
+ *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
- *	@sk pointer to new sock structure.
+ *	@newsk pointer to new sock structure.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index 7e0ba63b5dde..bbf44a466832 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -179,7 +179,7 @@ struct xfrm_policy;
 struct xfrm_state;
 struct xfrm_user_sec_ctx;
 struct seq_file;
-struct sctp_endpoint;
+struct sctp_association;
 
 #ifdef CONFIG_MMU
 extern unsigned long mmap_min_addr;
@@ -1425,10 +1425,10 @@ int security_tun_dev_create(void);
 int security_tun_dev_attach_queue(void *security);
 int security_tun_dev_attach(struct sock *sk, void *security);
 int security_tun_dev_open(void *security);
-int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb);
+int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb);
 int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
-void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
+void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
@@ -1631,7 +1631,7 @@ static inline int security_tun_dev_open(void *security)
 	return 0;
 }
 
-static inline int security_sctp_assoc_request(struct sctp_endpoint *ep,
+static inline int security_sctp_assoc_request(struct sctp_association *asoc,
 					      struct sk_buff *skb)
 {
 	return 0;
@@ -1644,7 +1644,7 @@ static inline int security_sctp_bind_connect(struct sock *sk, int optname,
 	return 0;
 }
 
-static inline void security_sctp_sk_clone(struct sctp_endpoint *ep,
+static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *sk,
 					  struct sock *newsk)
 {
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 651bba654d77..899c29c326ba 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1355,16 +1355,6 @@ struct sctp_endpoint {
 	      reconf_enable:1;
 
 	__u8  strreset_enable;
-
-	/* Security identifiers from incoming (INIT). These are set by
-	 * security_sctp_assoc_request(). These will only be used by
-	 * SCTP TCP type sockets and peeled off connections as they
-	 * cause a new socket to be generated. security_sctp_sk_clone()
-	 * will then plug these into the new socket.
-	 */
-
-	u32 secid;
-	u32 peer_secid;
 };
 
 /* Recover the outter endpoint structure. */
@@ -2104,6 +2094,16 @@ struct sctp_association {
 	__u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
 	__u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
 
+	/* Security identifiers from incoming (INIT). These are set by
+	 * security_sctp_assoc_request(). These will only be used by
+	 * SCTP TCP type sockets and peeled off connections as they
+	 * cause a new socket to be generated. security_sctp_sk_clone()
+	 * will then plug these into the new socket.
+	 */
+
+	u32 secid;
+	u32 peer_secid;
+
 	struct rcu_head rcu;
 };
 
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index fb3da4d8f4a3..3206374209bc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -326,11 +326,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
 	struct sctp_packet *packet;
 	int len;
 
-	/* Update socket peer label if first association. */
-	if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
-					chunk->skb))
-		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
 	/* 6.10 Bundling
 	 * An endpoint MUST NOT bundle INIT, INIT ACK or
 	 * SHUTDOWN COMPLETE with any other chunks.
@@ -415,6 +410,12 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
 	if (!new_asoc)
 		goto nomem;
 
+	/* Update socket peer label if first association. */
+	if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+		sctp_association_free(new_asoc);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
+
 	if (sctp_assoc_set_bind_addr_from_ep(new_asoc,
 					     sctp_scope(sctp_source(chunk)),
 					     GFP_ATOMIC) < 0)
@@ -780,7 +781,6 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
 		}
 	}
 
-
 	/* Delay state machine commands until later.
 	 *
 	 * Re-build the bind address for the association is done in
@@ -1517,11 +1517,6 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
 	struct sctp_packet *packet;
 	int len;
 
-	/* Update socket peer label if first association. */
-	if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
-					chunk->skb))
-		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
 	/* 6.10 Bundling
 	 * An endpoint MUST NOT bundle INIT, INIT ACK or
 	 * SHUTDOWN COMPLETE with any other chunks.
@@ -1594,6 +1589,12 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
 	if (!new_asoc)
 		goto nomem;
 
+	/* Update socket peer label if first association. */
+	if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
+		sctp_association_free(new_asoc);
+		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+	}
+
 	if (sctp_assoc_set_bind_addr_from_ep(new_asoc,
 				sctp_scope(sctp_source(chunk)), GFP_ATOMIC) < 0)
 		goto nomem;
@@ -2255,8 +2256,7 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
 	}
 
 	/* Update socket peer label if first association. */
-	if (security_sctp_assoc_request((struct sctp_endpoint *)ep,
-					chunk->skb)) {
+	if (security_sctp_assoc_request(new_asoc, chunk->skb)) {
 		sctp_association_free(new_asoc);
 		return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
 	}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6b937bfd4751..33391254fa82 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -9412,7 +9412,6 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 	struct inet_sock *inet = inet_sk(sk);
 	struct inet_sock *newinet;
 	struct sctp_sock *sp = sctp_sk(sk);
-	struct sctp_endpoint *ep = sp->ep;
 
 	newsk->sk_type = sk->sk_type;
 	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
@@ -9457,9 +9456,9 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
 		net_enable_timestamp();
 
 	/* Set newsk security attributes from original sk and connection
-	 * security attribute from ep.
+	 * security attribute from asoc.
 	 */
-	security_sctp_sk_clone(ep, sk, newsk);
+	security_sctp_sk_clone(asoc, sk, newsk);
 }
 
 static inline void sctp_copy_descendant(struct sock *sk_to,
diff --git a/security/security.c b/security/security.c
index 95e30fadba78..c88167a414b4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2367,9 +2367,9 @@ int security_tun_dev_open(void *security)
 }
 EXPORT_SYMBOL(security_tun_dev_open);
 
-int security_sctp_assoc_request(struct sctp_endpoint *ep, struct sk_buff *skb)
+int security_sctp_assoc_request(struct sctp_association *asoc, struct sk_buff *skb)
 {
-	return call_int_hook(sctp_assoc_request, 0, ep, skb);
+	return call_int_hook(sctp_assoc_request, 0, asoc, skb);
 }
 EXPORT_SYMBOL(security_sctp_assoc_request);
 
@@ -2381,10 +2381,10 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 }
 EXPORT_SYMBOL(security_sctp_bind_connect);
 
-void security_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
+void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk)
 {
-	call_void_hook(sctp_sk_clone, ep, sk, newsk);
+	call_void_hook(sctp_sk_clone, asoc, sk, newsk);
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ea7b2876a5ae..62d30c0a30c2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5339,10 +5339,10 @@ static void selinux_sock_graft(struct sock *sk, struct socket *parent)
  * connect(2), sctp_connectx(3) or sctp_sendmsg(3) (with no association
  * already present).
  */
-static int selinux_sctp_assoc_request(struct sctp_endpoint *ep,
+static int selinux_sctp_assoc_request(struct sctp_association *asoc,
 				      struct sk_buff *skb)
 {
-	struct sk_security_struct *sksec = ep->base.sk->sk_security;
+	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 	struct common_audit_data ad;
 	struct lsm_network_audit net = {0,};
 	u8 peerlbl_active;
@@ -5359,7 +5359,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep,
 		/* This will return peer_sid = SECSID_NULL if there are
 		 * no peer labels, see security_net_peersid_resolve().
 		 */
-		err = selinux_skb_peerlbl_sid(skb, ep->base.sk->sk_family,
+		err = selinux_skb_peerlbl_sid(skb, asoc->base.sk->sk_family,
 					      &peer_sid);
 		if (err)
 			return err;
@@ -5383,7 +5383,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep,
 		 */
 		ad.type = LSM_AUDIT_DATA_NET;
 		ad.u.net = &net;
-		ad.u.net->sk = ep->base.sk;
+		ad.u.net->sk = asoc->base.sk;
 		err = avc_has_perm(&selinux_state,
 				   sksec->peer_sid, peer_sid, sksec->sclass,
 				   SCTP_SOCKET__ASSOCIATION, &ad);
@@ -5392,7 +5392,7 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep,
 	}
 
 	/* Compute the MLS component for the connection and store
-	 * the information in ep. This will be used by SCTP TCP type
+	 * the information in asoc. This will be used by SCTP TCP type
 	 * sockets and peeled off connections as they cause a new
 	 * socket to be generated. selinux_sctp_sk_clone() will then
 	 * plug this into the new socket.
@@ -5401,11 +5401,11 @@ static int selinux_sctp_assoc_request(struct sctp_endpoint *ep,
 	if (err)
 		return err;
 
-	ep->secid = conn_sid;
-	ep->peer_secid = peer_sid;
+	asoc->secid = conn_sid;
+	asoc->peer_secid = peer_sid;
 
 	/* Set any NetLabel labels including CIPSO/CALIPSO options. */
-	return selinux_netlbl_sctp_assoc_request(ep, skb);
+	return selinux_netlbl_sctp_assoc_request(asoc, skb);
 }
 
 /* Check if sctp IPv4/IPv6 addresses are valid for binding or connecting
@@ -5490,7 +5490,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname,
 }
 
 /* Called whenever a new socket is created by accept(2) or sctp_peeloff(3). */
-static void selinux_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
+static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 				  struct sock *newsk)
 {
 	struct sk_security_struct *sksec = sk->sk_security;
@@ -5502,8 +5502,8 @@ static void selinux_sctp_sk_clone(struct sctp_endpoint *ep, struct sock *sk,
 	if (!selinux_policycap_extsockclass())
 		return selinux_sk_clone_security(sk, newsk);
 
-	newsksec->sid = ep->secid;
-	newsksec->peer_sid = ep->peer_secid;
+	newsksec->sid = asoc->secid;
+	newsksec->peer_sid = asoc->peer_secid;
 	newsksec->sclass = sksec->sclass;
 	selinux_netlbl_sctp_sk_clone(sk, newsk);
 }
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index 0c58f62dc6ab..4d0456d3d459 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -39,7 +39,7 @@ int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 				 u16 family,
 				 u32 sid);
-int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep,
+int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc,
 				     struct sk_buff *skb);
 int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family);
 void selinux_netlbl_inet_csk_clone(struct sock *sk, u16 family);
@@ -98,7 +98,7 @@ static inline int selinux_netlbl_skbuff_setsid(struct sk_buff *skb,
 	return 0;
 }
 
-static inline int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep,
+static inline int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc,
 						    struct sk_buff *skb)
 {
 	return 0;
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 29b88e81869b..1321f15799e2 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -261,30 +261,30 @@ skbuff_setsid_return:
 
 /**
  * selinux_netlbl_sctp_assoc_request - Label an incoming sctp association.
- * @ep: incoming association endpoint.
+ * @asoc: incoming association.
  * @skb: the packet.
  *
  * Description:
- * A new incoming connection is represented by @ep, ......
+ * A new incoming connection is represented by @asoc, ......
  * Returns zero on success, negative values on failure.
  *
  */
-int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep,
+int selinux_netlbl_sctp_assoc_request(struct sctp_association *asoc,
 				     struct sk_buff *skb)
 {
 	int rc;
 	struct netlbl_lsm_secattr secattr;
-	struct sk_security_struct *sksec = ep->base.sk->sk_security;
+	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
 	struct sockaddr_in addr4;
 	struct sockaddr_in6 addr6;
 
-	if (ep->base.sk->sk_family != PF_INET &&
-				ep->base.sk->sk_family != PF_INET6)
+	if (asoc->base.sk->sk_family != PF_INET &&
+	    asoc->base.sk->sk_family != PF_INET6)
 		return 0;
 
 	netlbl_secattr_init(&secattr);
 	rc = security_netlbl_sid_to_secattr(&selinux_state,
-					    ep->secid, &secattr);
+					    asoc->secid, &secattr);
 	if (rc != 0)
 		goto assoc_request_return;
 
@@ -294,11 +294,11 @@ int selinux_netlbl_sctp_assoc_request(struct sctp_endpoint *ep,
 	if (ip_hdr(skb)->version == 4) {
 		addr4.sin_family = AF_INET;
 		addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
-		rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr4, &secattr);
+		rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr4, &secattr);
 	} else if (IS_ENABLED(CONFIG_IPV6) && ip_hdr(skb)->version == 6) {
 		addr6.sin6_family = AF_INET6;
 		addr6.sin6_addr = ipv6_hdr(skb)->saddr;
-		rc = netlbl_conn_setattr(ep->base.sk, (void *)&addr6, &secattr);
+		rc = netlbl_conn_setattr(asoc->base.sk, (void *)&addr6, &secattr);
 	} else {
 		rc = -EAFNOSUPPORT;
 	}
-- 
cgit v1.2.3


From 7c2ef0240e6abfd3cc59511339517358350a8910 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 2 Nov 2021 08:02:49 -0400
Subject: security: add sctp_assoc_established hook

security_sctp_assoc_established() is added to replace
security_inet_conn_established() called in
sctp_sf_do_5_1E_ca(), so that asoc can be accessed in security
subsystem and save the peer secid to asoc->peer_secid.

v1->v2:
  - fix the return value of security_sctp_assoc_established() in
    security.h, found by kernel test robot and Ondrej.

Fixes: 72e89f50084c ("security: Add support for SCTP security hooks")
Reported-by: Prashanth Prahlad <pprahlad@redhat.com>
Reviewed-by: Richard Haines <richard_c_haines@btinternet.com>
Tested-by: Richard Haines <richard_c_haines@btinternet.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/security/SCTP.rst | 22 ++++++++++------------
 include/linux/lsm_hook_defs.h   |  2 ++
 include/linux/lsm_hooks.h       |  5 +++++
 include/linux/security.h        |  7 +++++++
 net/sctp/sm_statefuns.c         |  2 +-
 security/security.c             |  7 +++++++
 6 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index d5fd6ccc3dcb..406cc68b8808 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,10 +15,7 @@ For security module support, three SCTP specific hooks have been implemented::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-
-Also the following security hook has been utilised::
-
-    security_inet_conn_established()
+    security_sctp_assoc_established()
 
 The usage of these hooks are described below with the SELinux implementation
 described in the `SCTP SELinux Support`_ chapter.
@@ -122,11 +119,12 @@ calls **sctp_peeloff**\(3).
     @newsk - pointer to new sock structure.
 
 
-security_inet_conn_established()
+security_sctp_assoc_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received::
+Called when a COOKIE ACK is received, and the peer secid will be
+saved into ``@asoc->peer_secid`` for client::
 
-    @sk  - pointer to sock structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
@@ -134,7 +132,7 @@ Security Hooks used for Association Establishment
 -------------------------------------------------
 
 The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
+``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
 establishing an association.
 ::
 
@@ -172,7 +170,7 @@ establishing an association.
           <------------------------------------------- COOKIE ACK
           |                                               |
     sctp_sf_do_5_1E_ca                                    |
- Call security_inet_conn_established()                    |
+ Call security_sctp_assoc_established()                   |
  to set the peer label.                                   |
           |                                               |
           |                               If SCTP_SOCKET_TCP or peeled off
@@ -198,7 +196,7 @@ hooks with the SELinux specifics expanded below::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_inet_conn_established()
+    security_sctp_assoc_established()
 
 
 security_sctp_assoc_request()
@@ -271,12 +269,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
     @newsk - pointer to new sock structure.
 
 
-security_inet_conn_established()
+security_sctp_assoc_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-    @sk  - pointer to sock structure.
+    @asoc - pointer to sctp association structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index df8de62f4710..442a611fa0fb 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -335,6 +335,8 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
+LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc,
+	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d45b6f6e27fd..d6823214d5c1 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1050,6 +1050,11 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
+ * @sctp_assoc_established:
+ *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
+ *	to the security module.
+ *	@asoc pointer to sctp association structure.
+ *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index bbf44a466832..06eac4e61a13 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1430,6 +1430,8 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
+void security_sctp_assoc_established(struct sctp_association *asoc,
+				     struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1649,6 +1651,11 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *newsk)
 {
 }
+
+static inline void security_sctp_assoc_established(struct sctp_association *asoc,
+						   struct sk_buff *skb)
+{
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index b818532c3fc2..5fabaa54b77d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -946,7 +946,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
 
 	/* Set peer label for connection. */
-	security_inet_conn_established(ep->base.sk, chunk->skb);
+	security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb);
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
diff --git a/security/security.c b/security/security.c
index c88167a414b4..779a9edea0a0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2388,6 +2388,13 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
+void security_sctp_assoc_established(struct sctp_association *asoc,
+				     struct sk_buff *skb)
+{
+	call_void_hook(sctp_assoc_established, asoc, skb);
+}
+EXPORT_SYMBOL(security_sctp_assoc_established);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
-- 
cgit v1.2.3


From 9b65b17db72313b7a4fe9bc9502928c88be57986 Mon Sep 17 00:00:00 2001
From: Talal Ahmad <talalahmad@google.com>
Date: Tue, 2 Nov 2021 22:58:44 -0400
Subject: net: avoid double accounting for pure zerocopy skbs

Track skbs containing only zerocopy data and avoid charging them to
kernel memory to correctly account the memory utilization for
msg_zerocopy. All of the data in such skbs is held in user pages which
are already accounted to user. Before this change, they are charged
again in kernel in __zerocopy_sg_from_iter. The charging in kernel is
excessive because data is not being copied into skb frags. This
excessive charging can lead to kernel going into memory pressure
state which impacts all sockets in the system adversely. Mark pure
zerocopy skbs with a SKBFL_PURE_ZEROCOPY flag and remove
charge/uncharge for data in such skbs.

Initially, an skb is marked pure zerocopy when it is empty and in
zerocopy path. skb can then change from a pure zerocopy skb to mixed
data skb (zerocopy and copy data) if it is at tail of write queue and
there is room available in it and non-zerocopy data is being sent in
the next sendmsg call. At this time sk_mem_charge is done for the pure
zerocopied data and the pure zerocopy flag is unmarked. We found that
this happens very rarely on workloads that pass MSG_ZEROCOPY.

A pure zerocopy skb can later be coalesced into normal skb if they are
next to each other in queue but this patch prevents coalescing from
happening. This avoids complexity of charging when skb downgrades from
pure zerocopy to mixed. This is also rare.

In sk_wmem_free_skb, if it is a pure zerocopy skb, an sk_mem_uncharge
for SKB_TRUESIZE(skb_end_offset(skb)) is done for sk_mem_charge in
tcp_skb_entail for an skb without data.

Testing with the msg_zerocopy.c benchmark between two hosts(100G nics)
with zerocopy showed that before this patch the 'sock' variable in
memory.stat for cgroup2 that tracks sum of sk_forward_alloc,
sk_rmem_alloc and sk_wmem_queued is around 1822720 and with this
change it is 0. This is due to no charge to sk_forward_alloc for
zerocopy data and shows memory utilization for kernel is lowered.

With this commit we don't see the warning we saw in previous commit
which resulted in commit 84882cf72cd774cf16fd338bdbf00f69ac9f9194.

Signed-off-by: Talal Ahmad <talalahmad@google.com>
Acked-by: Arjun Roy <arjunroy@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 19 ++++++++++++++++++-
 include/net/tcp.h      |  8 ++++++--
 net/core/datagram.c    |  3 ++-
 net/core/skbuff.c      |  3 ++-
 net/ipv4/tcp.c         | 22 ++++++++++++++++++++--
 net/ipv4/tcp_output.c  |  7 +++++--
 6 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a63e13082397..686a666d073d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -454,9 +454,15 @@ enum {
 	 * all frags to avoid possible bad checksum
 	 */
 	SKBFL_SHARED_FRAG = BIT(1),
+
+	/* segment contains only zerocopy data and should not be
+	 * charged to the kernel memory.
+	 */
+	SKBFL_PURE_ZEROCOPY = BIT(2),
 };
 
 #define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
+#define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY)
 
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1464,6 +1470,17 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
 	return is_zcopy ? skb_uarg(skb) : NULL;
 }
 
+static inline bool skb_zcopy_pure(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
+}
+
+static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
+				       const struct sk_buff *skb2)
+{
+	return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2);
+}
+
 static inline void net_zcopy_get(struct ubuf_info *uarg)
 {
 	refcount_inc(&uarg->refcnt);
@@ -1528,7 +1545,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
 		if (!skb_zcopy_is_nouarg(skb))
 			uarg->callback(skb, uarg, zerocopy_success);
 
-		skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG;
+		skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY;
 	}
 }
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70972f3ac8fa..4da22b41bde6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -293,7 +293,10 @@ static inline bool tcp_out_of_memory(struct sock *sk)
 static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
 {
 	sk_wmem_queued_add(sk, -skb->truesize);
-	sk_mem_uncharge(sk, skb->truesize);
+	if (!skb_zcopy_pure(skb))
+		sk_mem_uncharge(sk, skb->truesize);
+	else
+		sk_mem_uncharge(sk, SKB_TRUESIZE(skb_end_offset(skb)));
 	__kfree_skb(skb);
 }
 
@@ -974,7 +977,8 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
 					const struct sk_buff *from)
 {
 	return likely(tcp_skb_can_collapse_to(to) &&
-		      mptcp_skb_can_collapse(to, from));
+		      mptcp_skb_can_collapse(to, from) &&
+		      skb_pure_zcopy_same(to, from));
 }
 
 /* Events passed to congestion control interface */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 15ab9ffb27fe..ee290776c661 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -646,7 +646,8 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
 		skb->truesize += truesize;
 		if (sk && sk->sk_type == SOCK_STREAM) {
 			sk_wmem_queued_add(sk, truesize);
-			sk_mem_charge(sk, truesize);
+			if (!skb_zcopy_pure(skb))
+				sk_mem_charge(sk, truesize);
 		} else {
 			refcount_add(truesize, &skb->sk->sk_wmem_alloc);
 		}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3ec42cdee16a..ba2f38246f07 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3433,8 +3433,9 @@ static inline void skb_split_no_header(struct sk_buff *skb,
 void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 {
 	int pos = skb_headlen(skb);
+	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
 
-	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG;
+	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
 	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
 		skb_split_inside_header(skb, skb1, len, pos);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bc7f419184aa..b461ae573afc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -863,6 +863,7 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
 	if (likely(skb)) {
 		bool mem_scheduled;
 
+		skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
 		if (force_schedule) {
 			mem_scheduled = true;
 			sk_forced_mem_schedule(sk, skb->truesize);
@@ -1319,6 +1320,15 @@ new_segment:
 
 			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
+			/* skb changing from pure zc to mixed, must charge zc */
+			if (unlikely(skb_zcopy_pure(skb))) {
+				if (!sk_wmem_schedule(sk, skb->data_len))
+					goto wait_for_space;
+
+				sk_mem_charge(sk, skb->data_len);
+				skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY;
+			}
+
 			if (!sk_wmem_schedule(sk, copy))
 				goto wait_for_space;
 
@@ -1339,8 +1349,16 @@ new_segment:
 			}
 			pfrag->offset += copy;
 		} else {
-			if (!sk_wmem_schedule(sk, copy))
-				goto wait_for_space;
+			/* First append to a fragless skb builds initial
+			 * pure zerocopy skb
+			 */
+			if (!skb->len)
+				skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY;
+
+			if (!skb_zcopy_pure(skb)) {
+				if (!sk_wmem_schedule(sk, copy))
+					goto wait_for_space;
+			}
 
 			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
 			if (err == -EMSGSIZE || err == -EEXIST) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 76cc1641beb4..6f7860e283c6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1677,7 +1677,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 	if (delta_truesize) {
 		skb->truesize	   -= delta_truesize;
 		sk_wmem_queued_add(sk, -delta_truesize);
-		sk_mem_uncharge(sk, delta_truesize);
+		if (!skb_zcopy_pure(skb))
+			sk_mem_uncharge(sk, delta_truesize);
 	}
 
 	/* Any change of skb->len requires recalculation of tso factor. */
@@ -2295,7 +2296,9 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
 		if (len <= skb->len)
 			break;
 
-		if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb))
+		if (unlikely(TCP_SKB_CB(skb)->eor) ||
+		    tcp_has_tx_tstamp(skb) ||
+		    !skb_pure_zcopy_same(skb, next))
 			return false;
 
 		len -= skb->len;
-- 
cgit v1.2.3


From 1aabe578dd86e9f2867c4db4fba9a15f4ba1825d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 2 Nov 2021 15:02:36 -0700
Subject: ethtool: fix ethtool msg len calculation for pause stats

ETHTOOL_A_PAUSE_STAT_MAX is the MAX attribute id,
so we need to subtract non-stats and add one to
get a count (IOW -2+1 == -1).

Otherwise we'll see:

  ethnl cmd 21: calculated reply length 40, but consumed 52

Fixes: 9a27a33027f2 ("ethtool: add standard pause stats")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool_netlink.h      | 3 +++
 include/uapi/linux/ethtool_netlink.h | 4 +++-
 net/ethtool/pause.c                  | 3 +--
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index 1e7bf78cb382..aba348d58ff6 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -10,6 +10,9 @@
 #define __ETHTOOL_LINK_MODE_MASK_NWORDS \
 	DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
 
+#define ETHTOOL_PAUSE_STAT_CNT	(__ETHTOOL_A_PAUSE_STAT_CNT -		\
+				 ETHTOOL_A_PAUSE_STAT_TX_FRAMES)
+
 enum ethtool_multicast_groups {
 	ETHNL_MCGRP_MONITOR,
 };
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index ca5fbb59fa42..999777d32dcf 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -411,7 +411,9 @@ enum {
 	ETHTOOL_A_PAUSE_STAT_TX_FRAMES,
 	ETHTOOL_A_PAUSE_STAT_RX_FRAMES,
 
-	/* add new constants above here */
+	/* add new constants above here
+	 * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats!
+	 */
 	__ETHTOOL_A_PAUSE_STAT_CNT,
 	ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1)
 };
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index 9009f412151e..ee1e5806bc93 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -56,8 +56,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base,
 
 	if (req_base->flags & ETHTOOL_FLAG_STATS)
 		n += nla_total_size(0) +	/* _PAUSE_STATS */
-			nla_total_size_64bit(sizeof(u64)) *
-				(ETHTOOL_A_PAUSE_STAT_MAX - 2);
+		     nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT;
 	return n;
 }
 
-- 
cgit v1.2.3


From 4330fe35b8213e92ff51907b4cb6323be943a9ad Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:01:56 -0600
Subject: nfs: remove unused header <linux/pnfs_osd_xdr.h>

Commit 19fcae3d4f2dd ("scsi: remove the SCSI OSD library") deleted the last
file that included <linux/pnfs_osd_xdr.h> but left that file behind.  It's
unused, get rid of it now.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: linux-nfs@vger.kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/pnfs_osd_xdr.h | 317 -------------------------------------------
 1 file changed, 317 deletions(-)
 delete mode 100644 include/linux/pnfs_osd_xdr.h

(limited to 'include/linux')

diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
deleted file mode 100644
index 17d7d0d20eca..000000000000
--- a/include/linux/pnfs_osd_xdr.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- *  pNFS-osd on-the-wire data structures
- *
- *  Copyright (C) 2007 Panasas Inc. [year of first publication]
- *  All rights reserved.
- *
- *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <ooo@electrozaur.com>
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2
- *  See the file COPYING included with this distribution for more details.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the Panasas company nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __PNFS_OSD_XDR_H__
-#define __PNFS_OSD_XDR_H__
-
-#include <linux/nfs_fs.h>
-
-/*
- * draft-ietf-nfsv4-minorversion-22
- * draft-ietf-nfsv4-pnfs-obj-12
- */
-
-/* Layout Structure */
-
-enum pnfs_osd_raid_algorithm4 {
-	PNFS_OSD_RAID_0		= 1,
-	PNFS_OSD_RAID_4		= 2,
-	PNFS_OSD_RAID_5		= 3,
-	PNFS_OSD_RAID_PQ	= 4     /* Reed-Solomon P+Q */
-};
-
-/*   struct pnfs_osd_data_map4 {
- *       uint32_t                    odm_num_comps;
- *       length4                     odm_stripe_unit;
- *       uint32_t                    odm_group_width;
- *       uint32_t                    odm_group_depth;
- *       uint32_t                    odm_mirror_cnt;
- *       pnfs_osd_raid_algorithm4    odm_raid_algorithm;
- *   };
- */
-struct pnfs_osd_data_map {
-	u32	odm_num_comps;
-	u64	odm_stripe_unit;
-	u32	odm_group_width;
-	u32	odm_group_depth;
-	u32	odm_mirror_cnt;
-	u32	odm_raid_algorithm;
-};
-
-/*   struct pnfs_osd_objid4 {
- *       deviceid4       oid_device_id;
- *       uint64_t        oid_partition_id;
- *       uint64_t        oid_object_id;
- *   };
- */
-struct pnfs_osd_objid {
-	struct nfs4_deviceid	oid_device_id;
-	u64			oid_partition_id;
-	u64			oid_object_id;
-};
-
-/* For printout. I use:
- * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer));
- * BE style
- */
-#define _DEVID_LO(oid_device_id) \
-	(unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data)
-
-#define _DEVID_HI(oid_device_id) \
-	(unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
-
-enum pnfs_osd_version {
-	PNFS_OSD_MISSING              = 0,
-	PNFS_OSD_VERSION_1            = 1,
-	PNFS_OSD_VERSION_2            = 2
-};
-
-struct pnfs_osd_opaque_cred {
-	u32 cred_len;
-	void *cred;
-};
-
-enum pnfs_osd_cap_key_sec {
-	PNFS_OSD_CAP_KEY_SEC_NONE     = 0,
-	PNFS_OSD_CAP_KEY_SEC_SSV      = 1,
-};
-
-/*   struct pnfs_osd_object_cred4 {
- *       pnfs_osd_objid4         oc_object_id;
- *       pnfs_osd_version4       oc_osd_version;
- *       pnfs_osd_cap_key_sec4   oc_cap_key_sec;
- *       opaque                  oc_capability_key<>;
- *       opaque                  oc_capability<>;
- *   };
- */
-struct pnfs_osd_object_cred {
-	struct pnfs_osd_objid		oc_object_id;
-	u32				oc_osd_version;
-	u32				oc_cap_key_sec;
-	struct pnfs_osd_opaque_cred	oc_cap_key;
-	struct pnfs_osd_opaque_cred	oc_cap;
-};
-
-/*   struct pnfs_osd_layout4 {
- *       pnfs_osd_data_map4      olo_map;
- *       uint32_t                olo_comps_index;
- *       pnfs_osd_object_cred4   olo_components<>;
- *   };
- */
-struct pnfs_osd_layout {
-	struct pnfs_osd_data_map	olo_map;
-	u32				olo_comps_index;
-	u32				olo_num_comps;
-	struct pnfs_osd_object_cred	*olo_comps;
-};
-
-/* Device Address */
-enum pnfs_osd_targetid_type {
-	OBJ_TARGET_ANON = 1,
-	OBJ_TARGET_SCSI_NAME = 2,
-	OBJ_TARGET_SCSI_DEVICE_ID = 3,
-};
-
-/*   union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) {
- *       case OBJ_TARGET_SCSI_NAME:
- *           string              oti_scsi_name<>;
- *
- *       case OBJ_TARGET_SCSI_DEVICE_ID:
- *           opaque              oti_scsi_device_id<>;
- *
- *       default:
- *           void;
- *   };
- *
- *   union pnfs_osd_targetaddr4 switch (bool ota_available) {
- *       case TRUE:
- *           netaddr4            ota_netaddr;
- *       case FALSE:
- *           void;
- *   };
- *
- *   struct pnfs_osd_deviceaddr4 {
- *       pnfs_osd_targetid4      oda_targetid;
- *       pnfs_osd_targetaddr4    oda_targetaddr;
- *       uint64_t                oda_lun;
- *       opaque                  oda_systemid<>;
- *       pnfs_osd_object_cred4   oda_root_obj_cred;
- *       opaque                  oda_osdname<>;
- *   };
- */
-struct pnfs_osd_targetid {
-	u32				oti_type;
-	struct nfs4_string		oti_scsi_device_id;
-};
-
-/*   struct netaddr4 {
- *       // see struct rpcb in RFC1833
- *       string r_netid<>;    // network id
- *       string r_addr<>;     // universal address
- *   };
- */
-struct pnfs_osd_net_addr {
-	struct nfs4_string	r_netid;
-	struct nfs4_string	r_addr;
-};
-
-struct pnfs_osd_targetaddr {
-	u32				ota_available;
-	struct pnfs_osd_net_addr	ota_netaddr;
-};
-
-struct pnfs_osd_deviceaddr {
-	struct pnfs_osd_targetid	oda_targetid;
-	struct pnfs_osd_targetaddr	oda_targetaddr;
-	u8				oda_lun[8];
-	struct nfs4_string		oda_systemid;
-	struct pnfs_osd_object_cred	oda_root_obj_cred;
-	struct nfs4_string		oda_osdname;
-};
-
-/* LAYOUTCOMMIT: layoutupdate */
-
-/*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
- *       case TRUE:
- *           int64_t     dsu_delta;
- *       case FALSE:
- *           void;
- *   };
- *
- *   struct pnfs_osd_layoutupdate4 {
- *       pnfs_osd_deltaspaceused4    olu_delta_space_used;
- *       bool                        olu_ioerr_flag;
- *   };
- */
-struct pnfs_osd_layoutupdate {
-	u32	dsu_valid;
-	s64	dsu_delta;
-	u32	olu_ioerr_flag;
-};
-
-/* LAYOUTRETURN: I/O Rrror Report */
-
-enum pnfs_osd_errno {
-	PNFS_OSD_ERR_EIO		= 1,
-	PNFS_OSD_ERR_NOT_FOUND		= 2,
-	PNFS_OSD_ERR_NO_SPACE		= 3,
-	PNFS_OSD_ERR_BAD_CRED		= 4,
-	PNFS_OSD_ERR_NO_ACCESS		= 5,
-	PNFS_OSD_ERR_UNREACHABLE	= 6,
-	PNFS_OSD_ERR_RESOURCE		= 7
-};
-
-/*   struct pnfs_osd_ioerr4 {
- *       pnfs_osd_objid4     oer_component;
- *       length4             oer_comp_offset;
- *       length4             oer_comp_length;
- *       bool                oer_iswrite;
- *       pnfs_osd_errno4     oer_errno;
- *   };
- */
-struct pnfs_osd_ioerr {
-	struct pnfs_osd_objid	oer_component;
-	u64			oer_comp_offset;
-	u64			oer_comp_length;
-	u32			oer_iswrite;
-	u32			oer_errno;
-};
-
-/* OSD XDR Client API */
-/* Layout helpers */
-/* Layout decoding is done in two parts:
- * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
- *    of the layout. @iter members need not be initialized.
- *    Returned:
- *             @layout members are set. (@layout->olo_comps set to NULL).
- *
- *             Zero on success, or negative error if passed xdr is broken.
- *
- * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns
- *    false, to decode the next component.
- *    Returned:
- *       true if there is more to decode or false if we are done or error.
- *
- * Example:
- *	struct pnfs_osd_xdr_decode_layout_iter iter;
- *	struct pnfs_osd_layout layout;
- *	struct pnfs_osd_object_cred comp;
- *	int status;
- *
- *	status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
- *	if (unlikely(status))
- *		goto err;
- *	while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) {
- *		// All of @comp strings point to inside the xdr_buffer
- *		// or scrach buffer. Copy them out to user memory eg.
- *		copy_single_comp(dest_comp++, &comp);
- *	}
- *	if (unlikely(status))
- *		goto err;
- */
-
-struct pnfs_osd_xdr_decode_layout_iter {
-	unsigned total_comps;
-	unsigned decoded_comps;
-};
-
-extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr);
-
-extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
-	struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
-	int *err);
-
-/* Device Info helpers */
-
-/* Note: All strings inside @deviceaddr point to space inside @p.
- * @p should stay valid while @deviceaddr is in use.
- */
-extern void pnfs_osd_xdr_decode_deviceaddr(
-	struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p);
-
-/* layoutupdate (layout_commit) xdr helpers */
-extern int
-pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
-				 struct pnfs_osd_layoutupdate *lou);
-
-/* osd_ioerror encoding (layout_return) */
-extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
-extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
-
-#endif /* __PNFS_OSD_XDR_H__ */
-- 
cgit v1.2.3


From 92f62485b3715882cd397b0cbd80a96d179b86d6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 2 Nov 2021 21:31:22 +0200
Subject: net: dsa: felix: fix broken VLAN-tagged PTP under VLAN-aware bridge

Normally it is expected that the dsa_device_ops :: rcv() method finishes
parsing the DSA tag and consumes it, then never looks at it again.

But commit c0bcf537667c ("net: dsa: ocelot: add hardware timestamping
support for Felix") added support for RX timestamping in a very
unconventional way. On this switch, a partial timestamp is available in
the DSA header, but the driver got away with not parsing that timestamp
right away, but instead delayed that parsing for a little longer:

dsa_switch_rcv():
	nskb = cpu_dp->rcv(skb, dev); <------------- not here
	-> ocelot_rcv()
	...

	skb = nskb;
	skb_push(skb, ETH_HLEN);
	skb->pkt_type = PACKET_HOST;
	skb->protocol = eth_type_trans(skb, skb->dev);

	...

	if (dsa_skb_defer_rx_timestamp(p, skb)) <--- but here
	-> felix_rxtstamp()
		return 0;

When in felix_rxtstamp(), this driver accounted for the fact that
eth_type_trans() happened in the meanwhile, so it got a hold of the
extraction header again by subtracting (ETH_HLEN + OCELOT_TAG_LEN) bytes
from the current skb->data.

This worked for quite some time but was quite fragile from the very
beginning. Not to mention that having DSA tag parsing split in two
different files, under different folders (net/dsa/tag_ocelot.c vs
drivers/net/dsa/ocelot/felix.c) made it quite non-obvious for patches to
come that they might break this.

Finally, the blamed commit does the following: at the end of
ocelot_rcv(), it checks whether the skb payload contains a VLAN header.
If it does, and this port is under a VLAN-aware bridge, that VLAN ID
might not be correct in the sense that the packet might have suffered
VLAN rewriting due to TCAM rules (VCAP IS1). So we consume the VLAN ID
from the skb payload using __skb_vlan_pop(), and take the classified
VLAN ID from the DSA tag, and construct a hwaccel VLAN tag with the
classified VLAN, and the skb payload is VLAN-untagged.

The big problem is that __skb_vlan_pop() does:

	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
	__skb_pull(skb, VLAN_HLEN);

aka it moves the Ethernet header 4 bytes to the right, and pulls 4 bytes
from the skb headroom (effectively also moving skb->data, by definition).
So for felix_rxtstamp()'s fragile logic, all bets are off now.
Instead of having the "extraction" pointer point to the DSA header,
it actually points to 4 bytes _inside_ the extraction header.
Corollary, the last 4 bytes of the "extraction" header are in fact 4
stale bytes of the destination MAC address from the Ethernet header,
from prior to the __skb_vlan_pop() movement.

So of course, RX timestamps are completely bogus when the system is
configured in this way.

The fix is actually very simple: just don't structure the code like that.
For better or worse, the DSA PTP timestamping API does not offer a
straightforward way for drivers to present their RX timestamps, but
other drivers (sja1105) have established a simple mechanism to carry
their RX timestamp from dsa_device_ops :: rcv() all the way to
dsa_switch_ops :: port_rxtstamp() and even later. That mechanism is to
simply save the partial timestamp to the skb->cb, and complete it later.

Question: why don't we simply populate the skb's struct
skb_shared_hwtstamps from ocelot_rcv(), and bother with this
complication of propagating the timestamp to felix_rxtstamp()?

Answer: dsa_switch_ops :: port_rxtstamp() answers the question whether
PTP packets need sleepable context to retrieve the full RX timestamp.
Currently felix_rxtstamp() answers "no, thanks" to that question, and
calls ocelot_ptp_gettime64() from softirq atomic context. This is
understandable, since Felix VSC9959 is a PCIe memory-mapped switch, so
hardware access does not require sleeping. But the felix driver is
preparing for the introduction of other switches where hardware access
is over a slow bus like SPI or MDIO:
https://lore.kernel.org/lkml/20210814025003.2449143-1-colin.foster@in-advantage.com/

So I would like to keep this code structure, so the rework needed when
that driver will need PTP support will be minimal (answer "yes, I need
deferred context for this skb's RX timestamp", then the partial
timestamp will still be found in the skb->cb.

Fixes: ea440cd2d9b2 ("net: dsa: tag_ocelot: use VLAN information from tagging header when available")
Reported-by: Po Liu <po.liu@nxp.com>
Cc: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 9 +++------
 include/linux/dsa/ocelot.h     | 1 +
 net/dsa/tag_ocelot.c           | 3 +++
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 83808e7dbdda..327cc4654806 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1370,12 +1370,12 @@ out:
 static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 			   struct sk_buff *skb, unsigned int type)
 {
-	u8 *extraction = skb->data - ETH_HLEN - OCELOT_TAG_LEN;
+	u32 tstamp_lo = OCELOT_SKB_CB(skb)->tstamp_lo;
 	struct skb_shared_hwtstamps *shhwtstamps;
 	struct ocelot *ocelot = ds->priv;
-	u32 tstamp_lo, tstamp_hi;
 	struct timespec64 ts;
-	u64 tstamp, val;
+	u32 tstamp_hi;
+	u64 tstamp;
 
 	/* If the "no XTR IRQ" workaround is in use, tell DSA to defer this skb
 	 * for RX timestamping. Then free it, and poll for its copy through
@@ -1390,9 +1390,6 @@ static bool felix_rxtstamp(struct dsa_switch *ds, int port,
 	ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
 	tstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
 
-	ocelot_xfh_get_rew_val(extraction, &val);
-	tstamp_lo = (u32)val;
-
 	tstamp_hi = tstamp >> 32;
 	if ((tstamp & 0xffffffff) < tstamp_lo)
 		tstamp_hi--;
diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h
index d42010cf5468..7ee708ad7df2 100644
--- a/include/linux/dsa/ocelot.h
+++ b/include/linux/dsa/ocelot.h
@@ -12,6 +12,7 @@
 struct ocelot_skb_cb {
 	struct sk_buff *clone;
 	unsigned int ptp_class; /* valid only for clones */
+	u32 tstamp_lo;
 	u8 ptp_cmd;
 	u8 ts_id;
 };
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index cd60b94fc175..de1c849a0a70 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -101,6 +101,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	struct dsa_port *dp;
 	u8 *extraction;
 	u16 vlan_tpid;
+	u64 rew_val;
 
 	/* Revert skb->data by the amount consumed by the DSA master,
 	 * so it points to the beginning of the frame.
@@ -130,6 +131,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	ocelot_xfh_get_qos_class(extraction, &qos_class);
 	ocelot_xfh_get_tag_type(extraction, &tag_type);
 	ocelot_xfh_get_vlan_tci(extraction, &vlan_tci);
+	ocelot_xfh_get_rew_val(extraction, &rew_val);
 
 	skb->dev = dsa_master_find_slave(netdev, 0, src_port);
 	if (!skb->dev)
@@ -143,6 +145,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 
 	dsa_default_offload_fwd_mark(skb);
 	skb->priority = qos_class;
+	OCELOT_SKB_CB(skb)->tstamp_lo = rew_val;
 
 	/* Ocelot switches copy frames unmodified to the CPU. However, it is
 	 * possible for the user to request a VLAN modification through
-- 
cgit v1.2.3


From 6429e46304ac7820eebbea2bf5d73b90c18e0e06 Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Thu, 28 Oct 2021 10:47:21 +0100
Subject: libfs: Move shmem_exchange to simple_rename_exchange

Move shmem_exchange and make it available to other callers.

Suggested-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/bpf/20211028094724.59043-2-lmb@cloudflare.com
---
 fs/libfs.c         | 24 ++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 mm/shmem.c         | 24 +-----------------------
 3 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index 51b4de3b3447..1cf144dc9ed2 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -448,6 +448,30 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(simple_rmdir);
 
+int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
+			   struct inode *new_dir, struct dentry *new_dentry)
+{
+	bool old_is_dir = d_is_dir(old_dentry);
+	bool new_is_dir = d_is_dir(new_dentry);
+
+	if (old_dir != new_dir && old_is_dir != new_is_dir) {
+		if (old_is_dir) {
+			drop_nlink(old_dir);
+			inc_nlink(new_dir);
+		} else {
+			drop_nlink(new_dir);
+			inc_nlink(old_dir);
+		}
+	}
+	old_dir->i_ctime = old_dir->i_mtime =
+	new_dir->i_ctime = new_dir->i_mtime =
+	d_inode(old_dentry)->i_ctime =
+	d_inode(new_dentry)->i_ctime = current_time(old_dir);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(simple_rename_exchange);
+
 int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
 		  struct dentry *old_dentry, struct inode *new_dir,
 		  struct dentry *new_dentry, unsigned int flags)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f3cfca5edc9a..bc4e97b82ddd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3383,6 +3383,8 @@ extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
+extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
+				  struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename(struct user_namespace *, struct inode *,
 			 struct dentry *, struct inode *, struct dentry *,
 			 unsigned int);
diff --git a/mm/shmem.c b/mm/shmem.c
index 17e344e26e73..56616aabe0a1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2947,28 +2947,6 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
 	return shmem_unlink(dir, dentry);
 }
 
-static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
-{
-	bool old_is_dir = d_is_dir(old_dentry);
-	bool new_is_dir = d_is_dir(new_dentry);
-
-	if (old_dir != new_dir && old_is_dir != new_is_dir) {
-		if (old_is_dir) {
-			drop_nlink(old_dir);
-			inc_nlink(new_dir);
-		} else {
-			drop_nlink(new_dir);
-			inc_nlink(old_dir);
-		}
-	}
-	old_dir->i_ctime = old_dir->i_mtime =
-	new_dir->i_ctime = new_dir->i_mtime =
-	d_inode(old_dentry)->i_ctime =
-	d_inode(new_dentry)->i_ctime = current_time(old_dir);
-
-	return 0;
-}
-
 static int shmem_whiteout(struct user_namespace *mnt_userns,
 			  struct inode *old_dir, struct dentry *old_dentry)
 {
@@ -3014,7 +2992,7 @@ static int shmem_rename2(struct user_namespace *mnt_userns,
 		return -EINVAL;
 
 	if (flags & RENAME_EXCHANGE)
-		return shmem_exchange(old_dir, old_dentry, new_dir, new_dentry);
+		return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
 
 	if (!simple_empty(new_dentry))
 		return -ENOTEMPTY;
-- 
cgit v1.2.3


From b18c1ad685d9a6af5189d4cc037a653270a07b4c Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 18 Oct 2021 15:17:25 +0300
Subject: i2c: Allow an ACPI driver to manage the device's power state during
 probe

Enable drivers to tell ACPI that there's no need to power on a device for
probe. Drivers should still perform this by themselves if there's a need
to. In some cases powering on the device during probe is undesirable, and
this change enables a driver to choose what fits best for it.

Add a field called "flags" into struct i2c_driver for driver flags, and a
flag I2C_DRV_ACPI_WAIVE_D0_PROBE to tell a driver supports probe in ACPI D
states other than 0.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Acked-by: Wolfram Sang <wsa@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/i2c/i2c-core-acpi.c | 10 ++++++++++
 drivers/i2c/i2c-core-base.c |  7 ++++---
 include/linux/i2c.h         | 18 ++++++++++++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 546cc935e035..c025919cdaae 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -526,6 +526,16 @@ struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
 }
 EXPORT_SYMBOL_GPL(i2c_acpi_new_device);
 
+bool i2c_acpi_waive_d0_probe(struct device *dev)
+{
+	struct i2c_driver *driver = to_i2c_driver(dev->driver);
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	return driver->flags & I2C_DRV_ACPI_WAIVE_D0_PROBE &&
+		adev && adev->power.state_for_enumeration >= adev->power.state;
+}
+EXPORT_SYMBOL_GPL(i2c_acpi_waive_d0_probe);
+
 #ifdef CONFIG_ACPI_I2C_OPREGION
 static int acpi_gsb_i2c_read_bytes(struct i2c_client *client,
 		u8 cmd, u8 *data, u8 data_len)
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 54964fbe3f03..f193f9058584 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -551,7 +551,8 @@ static int i2c_device_probe(struct device *dev)
 	if (status < 0)
 		goto err_clear_wakeup_irq;
 
-	status = dev_pm_domain_attach(&client->dev, true);
+	status = dev_pm_domain_attach(&client->dev,
+				      !i2c_acpi_waive_d0_probe(dev));
 	if (status)
 		goto err_clear_wakeup_irq;
 
@@ -590,7 +591,7 @@ static int i2c_device_probe(struct device *dev)
 err_release_driver_resources:
 	devres_release_group(&client->dev, client->devres_group_id);
 err_detach_pm_domain:
-	dev_pm_domain_detach(&client->dev, true);
+	dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
 err_clear_wakeup_irq:
 	dev_pm_clear_wake_irq(&client->dev);
 	device_init_wakeup(&client->dev, false);
@@ -621,7 +622,7 @@ static void i2c_device_remove(struct device *dev)
 
 	devres_release_group(&client->dev, client->devres_group_id);
 
-	dev_pm_domain_detach(&client->dev, true);
+	dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev));
 	if (!pm_runtime_status_suspended(&client->dev) && adap->bus_regulator)
 		regulator_disable(adap->bus_regulator);
 
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 2ce3efbe9198..16119ac1aa97 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -11,6 +11,7 @@
 #define _LINUX_I2C_H
 
 #include <linux/acpi.h>		/* for acpi_handle */
+#include <linux/bits.h>
 #include <linux/mod_devicetable.h>
 #include <linux/device.h>	/* for struct device */
 #include <linux/sched.h>	/* for completion */
@@ -222,6 +223,15 @@ enum i2c_alert_protocol {
 	I2C_PROTOCOL_SMBUS_HOST_NOTIFY,
 };
 
+/**
+ * enum i2c_driver_flags - Flags for an I2C device driver
+ *
+ * @I2C_DRV_ACPI_WAIVE_D0_PROBE: Don't put the device in D0 state for probe
+ */
+enum i2c_driver_flags {
+	I2C_DRV_ACPI_WAIVE_D0_PROBE = BIT(0),
+};
+
 /**
  * struct i2c_driver - represent an I2C device driver
  * @class: What kind of i2c device we instantiate (for detect)
@@ -236,6 +246,7 @@ enum i2c_alert_protocol {
  * @detect: Callback for device detection
  * @address_list: The I2C addresses to probe (for detect)
  * @clients: List of detected clients we created (for i2c-core use only)
+ * @flags: A bitmask of flags defined in &enum i2c_driver_flags
  *
  * The driver.owner field should be set to the module owner of this driver.
  * The driver.name field should be set to the name of this driver.
@@ -294,6 +305,8 @@ struct i2c_driver {
 	int (*detect)(struct i2c_client *client, struct i2c_board_info *info);
 	const unsigned short *address_list;
 	struct list_head clients;
+
+	u32 flags;
 };
 #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver)
 
@@ -1015,6 +1028,7 @@ u32 i2c_acpi_find_bus_speed(struct device *dev);
 struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
 				       struct i2c_board_info *info);
 struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle);
+bool i2c_acpi_waive_d0_probe(struct device *dev);
 #else
 static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
 					     struct acpi_resource_i2c_serialbus **i2c)
@@ -1038,6 +1052,10 @@ static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle ha
 {
 	return NULL;
 }
+static inline bool i2c_acpi_waive_d0_probe(struct device *dev)
+{
+	return false;
+}
 #endif /* CONFIG_ACPI */
 
 #endif /* _LINUX_I2C_H */
-- 
cgit v1.2.3


From b82a7df4a7f3841896aaec1ad81e654bc87b5989 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 18 Oct 2021 15:17:27 +0300
Subject: ACPI: Add a convenience function to tell a device is in D0 state

Add a convenience function to tell whether a device is in D0 state,
primarily for use in drivers' probe or remove functions on busses where
the custom is to power on the device for the duration of both.

Returns false on non-ACPI systems.

Suggested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Tomasz Figa <tfiga@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 26 ++++++++++++++++++++++++++
 include/linux/acpi.h     |  5 +++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 0028b6b51c87..19b33c028f35 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1400,4 +1400,30 @@ bool acpi_storage_d3(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(acpi_storage_d3);
 
+/**
+ * acpi_dev_state_d0 - Tell if the device is in D0 power state
+ * @dev: Physical device the ACPI power state of which to check
+ *
+ * On a system without ACPI, return true. On a system with ACPI, return true if
+ * the current ACPI power state of the device is D0, or false otherwise.
+ *
+ * Note that the power state of a device is not well-defined after it has been
+ * passed to acpi_device_set_power() and before that function returns, so it is
+ * not valid to ask for the ACPI power state of the device in that time frame.
+ *
+ * This function is intended to be used in a driver's probe or remove
+ * function. See Documentation/firmware-guide/acpi/low-power-probe.rst for
+ * more information.
+ */
+bool acpi_dev_state_d0(struct device *dev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (!adev)
+		return true;
+
+	return adev->power.state == ACPI_STATE_D0;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_state_d0);
+
 #endif /* CONFIG_PM */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index fbc2146050a4..df70fc27afd7 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1016,6 +1016,7 @@ int acpi_subsys_runtime_suspend(struct device *dev);
 int acpi_subsys_runtime_resume(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
 bool acpi_storage_d3(struct device *dev);
+bool acpi_dev_state_d0(struct device *dev);
 #else
 static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
@@ -1027,6 +1028,10 @@ static inline bool acpi_storage_d3(struct device *dev)
 {
 	return false;
 }
+static inline bool acpi_dev_state_d0(struct device *dev)
+{
+	return true;
+}
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)
-- 
cgit v1.2.3


From 5c4e0a21fae877a7ef89be6dcc6263ec672372b8 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 2 Nov 2021 07:24:20 -0700
Subject: string: uninline memcpy_and_pad

When building m68k:allmodconfig, recent versions of gcc generate the
following error if the length of UTS_RELEASE is less than 8 bytes.

  In function 'memcpy_and_pad',
    inlined from 'nvmet_execute_disc_identify' at
      drivers/nvme/target/discovery.c:268:2: arch/m68k/include/asm/string.h:72:25: error:
	'__builtin_memcpy' reading 8 bytes from a region of size 7

Discussions around the problem suggest that this only happens if an
architecture does not provide strlen(), if -ffreestanding is provided as
compiler option, and if CONFIG_FORTIFY_SOURCE=n. All of this is the case
for m68k. The exact reasons are unknown, but seem to be related to the
ability of the compiler to evaluate the return value of strlen() and
the resulting execution flow in memcpy_and_pad(). It would be possible
to work around the problem by using sizeof(UTS_RELEASE) instead of
strlen(UTS_RELEASE), but that would only postpone the problem until the
function is called in a similar way. Uninline memcpy_and_pad() instead
to solve the problem for good.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 19 ++-----------------
 lib/string_helpers.c   | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 5a36608144a9..b6572aeca2f5 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -253,23 +253,8 @@ static inline const char *kbasename(const char *path)
 #include <linux/fortify-string.h>
 #endif
 
-/**
- * memcpy_and_pad - Copy one buffer to another with padding
- * @dest: Where to copy to
- * @dest_len: The destination buffer size
- * @src: Where to copy from
- * @count: The number of bytes to copy
- * @pad: Character to use for padding if space is left in destination.
- */
-static inline void memcpy_and_pad(void *dest, size_t dest_len,
-				  const void *src, size_t count, int pad)
-{
-	if (dest_len > count) {
-		memcpy(dest, src, count);
-		memset(dest + count, pad,  dest_len - count);
-	} else
-		memcpy(dest, src, dest_len);
-}
+void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
+		    int pad);
 
 /**
  * memset_after - Set a value after a struct member to the end of a struct
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index faa9d8e4e2c5..d5d008f5b1d9 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -883,6 +883,26 @@ char *strreplace(char *s, char old, char new)
 }
 EXPORT_SYMBOL(strreplace);
 
+/**
+ * memcpy_and_pad - Copy one buffer to another with padding
+ * @dest: Where to copy to
+ * @dest_len: The destination buffer size
+ * @src: Where to copy from
+ * @count: The number of bytes to copy
+ * @pad: Character to use for padding if space is left in destination.
+ */
+void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count,
+		    int pad)
+{
+	if (dest_len > count) {
+		memcpy(dest, src, count);
+		memset(dest + count, pad,  dest_len - count);
+	} else {
+		memcpy(dest, src, dest_len);
+	}
+}
+EXPORT_SYMBOL(memcpy_and_pad);
+
 #ifdef CONFIG_FORTIFY_SOURCE
 void fortify_panic(const char *name)
 {
-- 
cgit v1.2.3


From 00b06da29cf9dc633cdba87acd3f57f4df3fd5c7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 29 Oct 2021 09:14:19 -0500
Subject: signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed

As Andy pointed out that there are races between
force_sig_info_to_task and sigaction[1] when force_sig_info_task.  As
Kees discovered[2] ptrace is also able to change these signals.

In the case of seeccomp killing a process with a signal it is a
security violation to allow the signal to be caught or manipulated.

Solve this problem by introducing a new flag SA_IMMUTABLE that
prevents sigaction and ptrace from modifying these forced signals.
This flag is carefully made kernel internal so that no new ABI is
introduced.

Longer term I think this can be solved by guaranteeing short circuit
delivery of signals in this case.  Unfortunately reliable and
guaranteed short circuit delivery of these signals is still a ways off
from being implemented, tested, and merged.  So I have implemented a much
simpler alternative for now.

[1] https://lkml.kernel.org/r/b5d52d25-7bde-4030-a7b1-7c6f8ab90660@www.fastmail.com
[2] https://lkml.kernel.org/r/202110281136.5CE65399A7@keescook
Cc: stable@vger.kernel.org
Fixes: 307d522f5eb8 ("signal/seccomp: Refactor seccomp signal and coredump generation")
Tested-by: Andrea Righi <andrea.righi@canonical.com>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal_types.h           | 3 +++
 include/uapi/asm-generic/signal-defs.h | 1 +
 kernel/signal.c                        | 8 +++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 34cb28b8f16c..a70b2bdbf4d9 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -70,6 +70,9 @@ struct ksignal {
 	int sig;
 };
 
+/* Used to kill the race between sigaction and forced signals */
+#define SA_IMMUTABLE		0x00800000
+
 #ifndef __ARCH_UAPI_SA_FLAGS
 #ifdef SA_RESTORER
 #define __ARCH_UAPI_SA_FLAGS	SA_RESTORER
diff --git a/include/uapi/asm-generic/signal-defs.h b/include/uapi/asm-generic/signal-defs.h
index fe929e7b77ca..7572f2f46ee8 100644
--- a/include/uapi/asm-generic/signal-defs.h
+++ b/include/uapi/asm-generic/signal-defs.h
@@ -45,6 +45,7 @@
 #define SA_UNSUPPORTED	0x00000400
 #define SA_EXPOSE_TAGBITS	0x00000800
 /* 0x00010000 used on mips */
+/* 0x00800000 used for internal SA_IMMUTABLE */
 /* 0x01000000 used on x86 */
 /* 0x02000000 used on x86 */
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 6a5e1802b9a2..056a107e3cbc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1336,6 +1336,7 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool
 	blocked = sigismember(&t->blocked, sig);
 	if (blocked || ignored || sigdfl) {
 		action->sa.sa_handler = SIG_DFL;
+		action->sa.sa_flags |= SA_IMMUTABLE;
 		if (blocked) {
 			sigdelset(&t->blocked, sig);
 			recalc_sigpending_and_wake(t);
@@ -2760,7 +2761,8 @@ relock:
 		if (!signr)
 			break; /* will return 0 */
 
-		if (unlikely(current->ptrace) && signr != SIGKILL) {
+		if (unlikely(current->ptrace) && (signr != SIGKILL) &&
+		    !(sighand->action[signr -1].sa.sa_flags & SA_IMMUTABLE)) {
 			signr = ptrace_signal(signr, &ksig->info);
 			if (!signr)
 				continue;
@@ -4110,6 +4112,10 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 	k = &p->sighand->action[sig-1];
 
 	spin_lock_irq(&p->sighand->siglock);
+	if (k->sa.sa_flags & SA_IMMUTABLE) {
+		spin_unlock_irq(&p->sighand->siglock);
+		return -EINVAL;
+	}
 	if (oact)
 		*oact = *k;
 
-- 
cgit v1.2.3


From 2116274af46b12df7cea1dc5698f3cf2f231f8a9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 4 Nov 2021 18:20:37 +0100
Subject: block: add a loff_t cast to bdev_nr_bytes

Not really needed as both loff_t and sector_t are always 64-bits wide,
but this documents the different types a bit better.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20211104172037.531803-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/genhd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 900325aa5d31..1dce769c4596 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -243,7 +243,7 @@ static inline sector_t bdev_nr_sectors(struct block_device *bdev)
 
 static inline loff_t bdev_nr_bytes(struct block_device *bdev)
 {
-	return bdev_nr_sectors(bdev) << SECTOR_SHIFT;
+	return (loff_t)bdev_nr_sectors(bdev) << SECTOR_SHIFT;
 }
 
 static inline sector_t get_capacity(struct gendisk *disk)
-- 
cgit v1.2.3


From 0ab8d0f6ae3f1234e38eaedc3ff7c22bfdf7f78c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Wed, 29 Sep 2021 17:38:34 +0100
Subject: irqdomain: Make of_phandle_args_to_fwspec() generally available

of_phandle_args_to_fwspec() can be generally useful to code extracting a DT
of_phandle and using an irq_fwspec to use the hierarchical irqdomain API.

Make it visible to the rest of the kernel, including modules.

Link: https://lore.kernel.org/r/20210929163847.2807812-2-maz@kernel.org
Tested-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/irqdomain.h | 4 ++++
 kernel/irq/irqdomain.c    | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 23e4ee523576..cfd442316f39 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -64,6 +64,10 @@ struct irq_fwspec {
 	u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS];
 };
 
+/* Conversion function from of_phandle_args fields to fwspec  */
+void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+			       unsigned int count, struct irq_fwspec *fwspec);
+
 /*
  * Should several domains have the same device node, but serve
  * different purposes (for example one domain is for PCI/MSI, and the
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 19e83e9b723c..5a698c1f6cc6 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -744,9 +744,8 @@ static int irq_domain_translate(struct irq_domain *d,
 	return 0;
 }
 
-static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
-				      unsigned int count,
-				      struct irq_fwspec *fwspec)
+void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
+			       unsigned int count, struct irq_fwspec *fwspec)
 {
 	int i;
 
@@ -756,6 +755,7 @@ static void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args,
 	for (i = 0; i < count; i++)
 		fwspec->param[i] = args[i];
 }
+EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec);
 
 unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
 {
-- 
cgit v1.2.3


From 439b08c57c3fe1df85cfe9d00accdf9b62cb3275 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 3 Nov 2021 16:51:36 +0100
Subject: Revert "usb: core: hcd: Add support for deferring roothub
 registration"

This reverts commit 58877b0824da15698bd85a0a9dbfa8c354e6ecb7.

It has been reported to be causing problems in Arch and Fedora bug
reports.

Reported-by: Hans de Goede <hdegoede@redhat.com>
Link: https://bbs.archlinux.org/viewtopic.php?pid=2000956#p2000956
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019542
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2019576
Link: https://lore.kernel.org/r/42bcbea6-5eb8-16c7-336a-2cb72e71bc36@redhat.com
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
Cc: Chris Chiu <chris.chiu@canonical.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 29 ++++++-----------------------
 include/linux/usb/hcd.h |  2 --
 2 files changed, 6 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index a3311e937847..4d326ee12c36 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2795,7 +2795,6 @@ int usb_add_hcd(struct usb_hcd *hcd,
 {
 	int retval;
 	struct usb_device *rhdev;
-	struct usb_hcd *shared_hcd;
 
 	if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) {
 		hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev);
@@ -2956,26 +2955,13 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		goto err_hcd_driver_start;
 	}
 
-	/* starting here, usbcore will pay attention to the shared HCD roothub */
-	shared_hcd = hcd->shared_hcd;
-	if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) {
-		retval = register_root_hub(shared_hcd);
-		if (retval != 0)
-			goto err_register_root_hub;
-
-		if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd))
-			usb_hcd_poll_rh_status(shared_hcd);
-	}
-
 	/* starting here, usbcore will pay attention to this root hub */
-	if (!HCD_DEFER_RH_REGISTER(hcd)) {
-		retval = register_root_hub(hcd);
-		if (retval != 0)
-			goto err_register_root_hub;
+	retval = register_root_hub(hcd);
+	if (retval != 0)
+		goto err_register_root_hub;
 
-		if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
-			usb_hcd_poll_rh_status(hcd);
-	}
+	if (hcd->uses_new_polling && HCD_POLL_RH(hcd))
+		usb_hcd_poll_rh_status(hcd);
 
 	return retval;
 
@@ -3013,7 +2999,6 @@ EXPORT_SYMBOL_GPL(usb_add_hcd);
 void usb_remove_hcd(struct usb_hcd *hcd)
 {
 	struct usb_device *rhdev = hcd->self.root_hub;
-	bool rh_registered;
 
 	dev_info(hcd->self.controller, "remove, state %x\n", hcd->state);
 
@@ -3024,7 +3009,6 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	dev_dbg(hcd->self.controller, "roothub graceful disconnect\n");
 	spin_lock_irq (&hcd_root_hub_lock);
-	rh_registered = hcd->rh_registered;
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
 
@@ -3034,8 +3018,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	cancel_work_sync(&hcd->died_work);
 
 	mutex_lock(&usb_bus_idr_lock);
-	if (rh_registered)
-		usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
+	usb_disconnect(&rhdev);		/* Sets rhdev to NULL */
 	mutex_unlock(&usb_bus_idr_lock);
 
 	/*
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 2c1fc9212cf2..548a028f2dab 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -124,7 +124,6 @@ struct usb_hcd {
 #define HCD_FLAG_RH_RUNNING		5	/* root hub is running? */
 #define HCD_FLAG_DEAD			6	/* controller has died? */
 #define HCD_FLAG_INTF_AUTHORIZED	7	/* authorize interfaces? */
-#define HCD_FLAG_DEFER_RH_REGISTER	8	/* Defer roothub registration */
 
 	/* The flags can be tested using these macros; they are likely to
 	 * be slightly faster than test_bit().
@@ -135,7 +134,6 @@ struct usb_hcd {
 #define HCD_WAKEUP_PENDING(hcd)	((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING))
 #define HCD_RH_RUNNING(hcd)	((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING))
 #define HCD_DEAD(hcd)		((hcd)->flags & (1U << HCD_FLAG_DEAD))
-#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER))
 
 	/*
 	 * Specifies if interfaces are authorized by default
-- 
cgit v1.2.3


From 27d9a4d69433af1827a764fe235866d5d5501fdb Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 9 Sep 2021 11:48:48 +0200
Subject: pwm: Add might_sleep() annotations for !CONFIG_PWM API functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The normal implementations of these functions make use of mutexes. To
make it obvious that these functions might sleep also add annotations to
the dummy implementations in the !CONFIG_PWM case.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 include/linux/pwm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 725c9b784e60..515e33978e97 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -429,11 +429,13 @@ struct pwm_device *devm_fwnode_pwm_get(struct device *dev,
 #else
 static inline struct pwm_device *pwm_request(int pwm_id, const char *label)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline void pwm_free(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline int pwm_apply_state(struct pwm_device *pwm,
@@ -493,12 +495,14 @@ static inline struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
 						       unsigned int index,
 						       const char *label)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline struct pwm_device *pwm_get(struct device *dev,
 					 const char *consumer)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -506,16 +510,19 @@ static inline struct pwm_device *of_pwm_get(struct device *dev,
 					    struct device_node *np,
 					    const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
 static inline void pwm_put(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline struct pwm_device *devm_pwm_get(struct device *dev,
 					      const char *consumer)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -523,6 +530,7 @@ static inline struct pwm_device *devm_of_pwm_get(struct device *dev,
 						 struct device_node *np,
 						 const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 
@@ -530,6 +538,7 @@ static inline struct pwm_device *
 devm_fwnode_pwm_get(struct device *dev, struct fwnode_handle *fwnode,
 		    const char *con_id)
 {
+	might_sleep();
 	return ERR_PTR(-ENODEV);
 }
 #endif
-- 
cgit v1.2.3


From 4ad91a227817ae48f931595d1101fc7100073ce9 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Thu, 9 Sep 2021 11:48:49 +0200
Subject: pwm: Make it explicit that pwm_apply_state() might sleep
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

At least some implementations sleep. So mark pwm_apply_state() with a
might_sleep() to make callers aware. In the worst case this uncovers a
valid atomic user, then we revert this patch and at least gained some more
knowledge and then can work on a concept similar to
gpio_get_value/gpio_get_value_cansleep.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Thierry Reding <thierry.reding@gmail.com>
---
 drivers/pwm/core.c  | 9 +++++++++
 include/linux/pwm.h | 4 ++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index 4527f09a5c50..fb04a439462c 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -532,6 +532,15 @@ int pwm_apply_state(struct pwm_device *pwm, const struct pwm_state *state)
 	struct pwm_chip *chip;
 	int err;
 
+	/*
+	 * Some lowlevel driver's implementations of .apply() make use of
+	 * mutexes, also with some drivers only returning when the new
+	 * configuration is active calling pwm_apply_state() from atomic context
+	 * is a bad idea. So make it explicit that calling this function might
+	 * sleep.
+	 */
+	might_sleep();
+
 	if (!pwm || !state || !state->period ||
 	    state->duty_cycle > state->period)
 		return -EINVAL;
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index 515e33978e97..e6dac95e4960 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -441,6 +441,7 @@ static inline void pwm_free(struct pwm_device *pwm)
 static inline int pwm_apply_state(struct pwm_device *pwm,
 				  const struct pwm_state *state)
 {
+	might_sleep();
 	return -ENOTSUPP;
 }
 
@@ -452,6 +453,7 @@ static inline int pwm_adjust_config(struct pwm_device *pwm)
 static inline int pwm_config(struct pwm_device *pwm, int duty_ns,
 			     int period_ns)
 {
+	might_sleep();
 	return -EINVAL;
 }
 
@@ -464,11 +466,13 @@ static inline int pwm_capture(struct pwm_device *pwm,
 
 static inline int pwm_enable(struct pwm_device *pwm)
 {
+	might_sleep();
 	return -EINVAL;
 }
 
 static inline void pwm_disable(struct pwm_device *pwm)
 {
+	might_sleep();
 }
 
 static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-- 
cgit v1.2.3


From c9a20383578abd8f7fb8ba88f4c6d25b47924c34 Mon Sep 17 00:00:00 2001
From: Carlos de Paula <me@carlosedp.com>
Date: Mon, 30 Aug 2021 16:53:45 -0300
Subject: mfd: da9063: Add support for latest EA silicon revision

This update adds new regmap to support the latest EA silicon
which will be selected based on the chip and variant
information read from the device.

Signed-off-by: Carlos de Paula <me@carlosedp.com>
Reviewed-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9063-i2c.c        | 2 ++
 include/linux/mfd/da9063/core.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9063-i2c.c b/drivers/mfd/da9063-i2c.c
index 4b7f707b7952..343ed6e96d87 100644
--- a/drivers/mfd/da9063-i2c.c
+++ b/drivers/mfd/da9063-i2c.c
@@ -391,6 +391,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 				&da9063_bb_da_volatile_table;
 			break;
 		case PMIC_DA9063_DA:
+		case PMIC_DA9063_EA:
 			da9063_regmap_config.rd_table =
 				&da9063_da_readable_table;
 			da9063_regmap_config.wr_table =
@@ -416,6 +417,7 @@ static int da9063_i2c_probe(struct i2c_client *i2c,
 				&da9063l_bb_da_volatile_table;
 			break;
 		case PMIC_DA9063_DA:
+		case PMIC_DA9063_EA:
 			da9063_regmap_config.rd_table =
 				&da9063l_da_readable_table;
 			da9063_regmap_config.wr_table =
diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h
index fa7a43f02f27..8db52324f416 100644
--- a/include/linux/mfd/da9063/core.h
+++ b/include/linux/mfd/da9063/core.h
@@ -36,6 +36,7 @@ enum da9063_variant_codes {
 	PMIC_DA9063_BB = 0x5,
 	PMIC_DA9063_CA = 0x6,
 	PMIC_DA9063_DA = 0x7,
+	PMIC_DA9063_EA = 0x8,
 };
 
 /* Interrupts */
-- 
cgit v1.2.3


From ec14d90dee8ec6960324ae9f1116103efcde8a52 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 12 Oct 2021 17:39:35 +0200
Subject: mfd: tps65912: Make tps65912_device_exit() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Up to now tps65912_device_exit() returns zero unconditionally. Make it
return void instead which makes it easier to see in the callers that
there is no error to handle.

Also the return value of i2c and spi remove callbacks is ignored anyway.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211012153945.2651412-11-u.kleine-koenig@pengutronix.de
---
 drivers/mfd/tps65912-core.c  | 4 +---
 drivers/mfd/tps65912-i2c.c   | 4 +++-
 drivers/mfd/tps65912-spi.c   | 4 +++-
 include/linux/mfd/tps65912.h | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65912-core.c b/drivers/mfd/tps65912-core.c
index b55b1d5d6955..c282a05e7146 100644
--- a/drivers/mfd/tps65912-core.c
+++ b/drivers/mfd/tps65912-core.c
@@ -115,11 +115,9 @@ int tps65912_device_init(struct tps65912 *tps)
 }
 EXPORT_SYMBOL_GPL(tps65912_device_init);
 
-int tps65912_device_exit(struct tps65912 *tps)
+void tps65912_device_exit(struct tps65912 *tps)
 {
 	regmap_del_irq_chip(tps->irq, tps->irq_data);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(tps65912_device_exit);
 
diff --git a/drivers/mfd/tps65912-i2c.c b/drivers/mfd/tps65912-i2c.c
index f7c22ea7b36c..06eb2784d322 100644
--- a/drivers/mfd/tps65912-i2c.c
+++ b/drivers/mfd/tps65912-i2c.c
@@ -55,7 +55,9 @@ static int tps65912_i2c_remove(struct i2c_client *client)
 {
 	struct tps65912 *tps = i2c_get_clientdata(client);
 
-	return tps65912_device_exit(tps);
+	tps65912_device_exit(tps);
+
+	return 0;
 }
 
 static const struct i2c_device_id tps65912_i2c_id_table[] = {
diff --git a/drivers/mfd/tps65912-spi.c b/drivers/mfd/tps65912-spi.c
index 21a8d6ac5c4a..d701926aa46e 100644
--- a/drivers/mfd/tps65912-spi.c
+++ b/drivers/mfd/tps65912-spi.c
@@ -54,7 +54,9 @@ static int tps65912_spi_remove(struct spi_device *spi)
 {
 	struct tps65912 *tps = spi_get_drvdata(spi);
 
-	return tps65912_device_exit(tps);
+	tps65912_device_exit(tps);
+
+	return 0;
 }
 
 static const struct spi_device_id tps65912_spi_id_table[] = {
diff --git a/include/linux/mfd/tps65912.h b/include/linux/mfd/tps65912.h
index 7943e413deae..8a61386cb8c1 100644
--- a/include/linux/mfd/tps65912.h
+++ b/include/linux/mfd/tps65912.h
@@ -322,6 +322,6 @@ struct tps65912 {
 extern const struct regmap_config tps65912_regmap_config;
 
 int tps65912_device_init(struct tps65912 *tps);
-int tps65912_device_exit(struct tps65912 *tps);
+void tps65912_device_exit(struct tps65912 *tps);
 
 #endif /*  __LINUX_MFD_TPS65912_H */
-- 
cgit v1.2.3


From 0cee0416563d7cac807c8f092941f3e37ede05db Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca@lucaceresoli.net>
Date: Tue, 19 Oct 2021 16:59:11 +0200
Subject: mfd: max77686: Correct tab-based alignment of register addresses

Some lines have an extra tab, remove them for proper visual alignment as
present on the rest of this file.

Signed-off-by: Luca Ceresoli <luca@lucaceresoli.net>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211019145919.7327-2-luca@lucaceresoli.net
---
 include/linux/mfd/max77686-private.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 833e578e051e..b1482b3cf353 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -133,35 +133,35 @@ enum max77686_pmic_reg {
 	/* Reserved: 0x7A-0x7D */
 
 	MAX77686_REG_BBAT_CHG		= 0x7E,
-	MAX77686_REG_32KHZ			= 0x7F,
+	MAX77686_REG_32KHZ		= 0x7F,
 
 	MAX77686_REG_PMIC_END		= 0x80,
 };
 
 enum max77686_rtc_reg {
-	MAX77686_RTC_INT			= 0x00,
-	MAX77686_RTC_INTM			= 0x01,
+	MAX77686_RTC_INT		= 0x00,
+	MAX77686_RTC_INTM		= 0x01,
 	MAX77686_RTC_CONTROLM		= 0x02,
 	MAX77686_RTC_CONTROL		= 0x03,
 	MAX77686_RTC_UPDATE0		= 0x04,
 	/* Reserved: 0x5 */
 	MAX77686_WTSR_SMPL_CNTL		= 0x06,
-	MAX77686_RTC_SEC			= 0x07,
-	MAX77686_RTC_MIN			= 0x08,
-	MAX77686_RTC_HOUR			= 0x09,
+	MAX77686_RTC_SEC		= 0x07,
+	MAX77686_RTC_MIN		= 0x08,
+	MAX77686_RTC_HOUR		= 0x09,
 	MAX77686_RTC_WEEKDAY		= 0x0A,
-	MAX77686_RTC_MONTH			= 0x0B,
-	MAX77686_RTC_YEAR			= 0x0C,
-	MAX77686_RTC_DATE			= 0x0D,
-	MAX77686_ALARM1_SEC			= 0x0E,
-	MAX77686_ALARM1_MIN			= 0x0F,
+	MAX77686_RTC_MONTH		= 0x0B,
+	MAX77686_RTC_YEAR		= 0x0C,
+	MAX77686_RTC_DATE		= 0x0D,
+	MAX77686_ALARM1_SEC		= 0x0E,
+	MAX77686_ALARM1_MIN		= 0x0F,
 	MAX77686_ALARM1_HOUR		= 0x10,
 	MAX77686_ALARM1_WEEKDAY		= 0x11,
 	MAX77686_ALARM1_MONTH		= 0x12,
 	MAX77686_ALARM1_YEAR		= 0x13,
 	MAX77686_ALARM1_DATE		= 0x14,
-	MAX77686_ALARM2_SEC			= 0x15,
-	MAX77686_ALARM2_MIN			= 0x16,
+	MAX77686_ALARM2_SEC		= 0x15,
+	MAX77686_ALARM2_MIN		= 0x16,
 	MAX77686_ALARM2_HOUR		= 0x17,
 	MAX77686_ALARM2_WEEKDAY		= 0x18,
 	MAX77686_ALARM2_MONTH		= 0x19,
-- 
cgit v1.2.3


From b20cd02f7fef68ae395d9df0a9fb9edcf414b5a2 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Thu, 21 Oct 2021 22:22:58 +0300
Subject: mfd: tps80031: Remove driver

Driver was upstreamed in 2013 and never got a user, remove it.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Link: https://lore.kernel.org/r/20211021192258.21968-4-digetx@gmail.com
---
 drivers/mfd/Kconfig          |  14 -
 drivers/mfd/Makefile         |   1 -
 drivers/mfd/tps80031.c       | 526 -----------------------------------
 include/linux/mfd/tps80031.h | 637 -------------------------------------------
 4 files changed, 1178 deletions(-)
 delete mode 100644 drivers/mfd/tps80031.c
 delete mode 100644 include/linux/mfd/tps80031.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f99334fa0ae6..bc75156dd96f 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1624,20 +1624,6 @@ config MFD_TPS65912_SPI
 	  If you say yes here you get support for the TPS65912 series of
 	  PM chips with SPI interface.
 
-config MFD_TPS80031
-	bool "TI TPS80031/TPS80032 Power Management chips"
-	depends on I2C=y
-	select MFD_CORE
-	select REGMAP_I2C
-	select REGMAP_IRQ
-	help
-	  If you say yes here you get support for the Texas Instruments
-	  TPS80031/ TPS80032 Fully Integrated Power Management with Power
-	  Path and Battery Charger. The device provides five configurable
-	  step-down converters, 11 general purpose LDOs, USB OTG Module,
-	  ADC, RTC, 2 PWM, System Voltage Regulator/Battery Charger with
-	  Power Path from USB, 32K clock generator.
-
 config TWL4030_CORE
 	bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2ba6646e874c..0b1b629aef3e 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -105,7 +105,6 @@ obj-$(CONFIG_MFD_TPS65910)	+= tps65910.o
 obj-$(CONFIG_MFD_TPS65912)	+= tps65912-core.o
 obj-$(CONFIG_MFD_TPS65912_I2C)	+= tps65912-i2c.o
 obj-$(CONFIG_MFD_TPS65912_SPI)  += tps65912-spi.o
-obj-$(CONFIG_MFD_TPS80031)	+= tps80031.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl-core.o twl4030-irq.o twl6030-irq.o
diff --git a/drivers/mfd/tps80031.c b/drivers/mfd/tps80031.c
deleted file mode 100644
index 3c4e62c3406a..000000000000
--- a/drivers/mfd/tps80031.c
+++ /dev/null
@@ -1,526 +0,0 @@
-/*
- * tps80031.c -- TI TPS80031/TPS80032 mfd core driver.
- *
- * MFD core driver for TI TPS80031/TPS80032 Fully Integrated
- * Power Management with Power Path and Battery Charger
- *
- * Copyright (c) 2012, NVIDIA Corporation.
- *
- * Author: Laxman Dewangan <ldewangan@nvidia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
- * whether express or implied; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- * 02111-1307, USA
- */
-
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/tps80031.h>
-#include <linux/pm.h>
-#include <linux/regmap.h>
-#include <linux/slab.h>
-
-static const struct resource tps80031_rtc_resources[] = {
-	DEFINE_RES_IRQ(TPS80031_INT_RTC_ALARM),
-};
-
-/* TPS80031 sub mfd devices */
-static const struct mfd_cell tps80031_cell[] = {
-	{
-		.name = "tps80031-pmic",
-	},
-	{
-		.name = "tps80031-clock",
-	},
-	{
-		.name = "tps80031-rtc",
-		.num_resources = ARRAY_SIZE(tps80031_rtc_resources),
-		.resources = tps80031_rtc_resources,
-	},
-	{
-		.name = "tps80031-gpadc",
-	},
-	{
-		.name = "tps80031-fuel-gauge",
-	},
-	{
-		.name = "tps80031-charger",
-	},
-};
-
-static int tps80031_slave_address[TPS80031_NUM_SLAVES] = {
-	TPS80031_I2C_ID0_ADDR,
-	TPS80031_I2C_ID1_ADDR,
-	TPS80031_I2C_ID2_ADDR,
-	TPS80031_I2C_ID3_ADDR,
-};
-
-struct tps80031_pupd_data {
-	u8	reg;
-	u8	pullup_bit;
-	u8	pulldown_bit;
-};
-
-#define TPS80031_IRQ(_reg, _mask)			\
-	{							\
-		.reg_offset = (TPS80031_INT_MSK_LINE_##_reg) -	\
-				TPS80031_INT_MSK_LINE_A,	\
-		.mask = BIT(_mask),				\
-	}
-
-static const struct regmap_irq tps80031_main_irqs[] = {
-	[TPS80031_INT_PWRON]		= TPS80031_IRQ(A, 0),
-	[TPS80031_INT_RPWRON]		= TPS80031_IRQ(A, 1),
-	[TPS80031_INT_SYS_VLOW]		= TPS80031_IRQ(A, 2),
-	[TPS80031_INT_RTC_ALARM]	= TPS80031_IRQ(A, 3),
-	[TPS80031_INT_RTC_PERIOD]	= TPS80031_IRQ(A, 4),
-	[TPS80031_INT_HOT_DIE]		= TPS80031_IRQ(A, 5),
-	[TPS80031_INT_VXX_SHORT]	= TPS80031_IRQ(A, 6),
-	[TPS80031_INT_SPDURATION]	= TPS80031_IRQ(A, 7),
-	[TPS80031_INT_WATCHDOG]		= TPS80031_IRQ(B, 0),
-	[TPS80031_INT_BAT]		= TPS80031_IRQ(B, 1),
-	[TPS80031_INT_SIM]		= TPS80031_IRQ(B, 2),
-	[TPS80031_INT_MMC]		= TPS80031_IRQ(B, 3),
-	[TPS80031_INT_RES]		= TPS80031_IRQ(B, 4),
-	[TPS80031_INT_GPADC_RT]		= TPS80031_IRQ(B, 5),
-	[TPS80031_INT_GPADC_SW2_EOC]	= TPS80031_IRQ(B, 6),
-	[TPS80031_INT_CC_AUTOCAL]	= TPS80031_IRQ(B, 7),
-	[TPS80031_INT_ID_WKUP]		= TPS80031_IRQ(C, 0),
-	[TPS80031_INT_VBUSS_WKUP]	= TPS80031_IRQ(C, 1),
-	[TPS80031_INT_ID]		= TPS80031_IRQ(C, 2),
-	[TPS80031_INT_VBUS]		= TPS80031_IRQ(C, 3),
-	[TPS80031_INT_CHRG_CTRL]	= TPS80031_IRQ(C, 4),
-	[TPS80031_INT_EXT_CHRG]		= TPS80031_IRQ(C, 5),
-	[TPS80031_INT_INT_CHRG]		= TPS80031_IRQ(C, 6),
-	[TPS80031_INT_RES2]		= TPS80031_IRQ(C, 7),
-};
-
-static struct regmap_irq_chip tps80031_irq_chip = {
-	.name = "tps80031",
-	.irqs = tps80031_main_irqs,
-	.num_irqs = ARRAY_SIZE(tps80031_main_irqs),
-	.num_regs = 3,
-	.status_base = TPS80031_INT_STS_A,
-	.mask_base = TPS80031_INT_MSK_LINE_A,
-};
-
-#define PUPD_DATA(_reg, _pulldown_bit, _pullup_bit)	\
-	{						\
-		.reg = TPS80031_CFG_INPUT_PUPD##_reg,	\
-		.pulldown_bit = _pulldown_bit,		\
-		.pullup_bit = _pullup_bit,		\
-	}
-
-static const struct tps80031_pupd_data tps80031_pupds[] = {
-	[TPS80031_PREQ1]		= PUPD_DATA(1, BIT(0),	BIT(1)),
-	[TPS80031_PREQ2A]		= PUPD_DATA(1, BIT(2),	BIT(3)),
-	[TPS80031_PREQ2B]		= PUPD_DATA(1, BIT(4),	BIT(5)),
-	[TPS80031_PREQ2C]		= PUPD_DATA(1, BIT(6),	BIT(7)),
-	[TPS80031_PREQ3]		= PUPD_DATA(2, BIT(0),	BIT(1)),
-	[TPS80031_NRES_WARM]		= PUPD_DATA(2, 0,	BIT(2)),
-	[TPS80031_PWM_FORCE]		= PUPD_DATA(2, BIT(5),	0),
-	[TPS80031_CHRG_EXT_CHRG_STATZ]	= PUPD_DATA(2, 0,	BIT(6)),
-	[TPS80031_SIM]			= PUPD_DATA(3, BIT(0),	BIT(1)),
-	[TPS80031_MMC]			= PUPD_DATA(3, BIT(2),	BIT(3)),
-	[TPS80031_GPADC_START]		= PUPD_DATA(3, BIT(4),	0),
-	[TPS80031_DVSI2C_SCL]		= PUPD_DATA(4, 0,	BIT(0)),
-	[TPS80031_DVSI2C_SDA]		= PUPD_DATA(4, 0,	BIT(1)),
-	[TPS80031_CTLI2C_SCL]		= PUPD_DATA(4, 0,	BIT(2)),
-	[TPS80031_CTLI2C_SDA]		= PUPD_DATA(4, 0,	BIT(3)),
-};
-static struct tps80031 *tps80031_power_off_dev;
-
-int tps80031_ext_power_req_config(struct device *dev,
-		unsigned long ext_ctrl_flag, int preq_bit,
-		int state_reg_add, int trans_reg_add)
-{
-	u8 res_ass_reg = 0;
-	int preq_mask_bit = 0;
-	int ret;
-
-	if (!(ext_ctrl_flag & TPS80031_EXT_PWR_REQ))
-		return 0;
-
-	if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ1) {
-		res_ass_reg = TPS80031_PREQ1_RES_ASS_A + (preq_bit >> 3);
-		preq_mask_bit = 5;
-	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ2) {
-		res_ass_reg = TPS80031_PREQ2_RES_ASS_A + (preq_bit >> 3);
-		preq_mask_bit = 6;
-	} else if (ext_ctrl_flag & TPS80031_PWR_REQ_INPUT_PREQ3) {
-		res_ass_reg = TPS80031_PREQ3_RES_ASS_A + (preq_bit >> 3);
-		preq_mask_bit = 7;
-	}
-
-	/* Configure REQ_ASS registers */
-	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1, res_ass_reg,
-					BIT(preq_bit & 0x7));
-	if (ret < 0) {
-		dev_err(dev, "reg 0x%02x setbit failed, err = %d\n",
-				res_ass_reg, ret);
-		return ret;
-	}
-
-	/* Unmask the PREQ */
-	ret = tps80031_clr_bits(dev, TPS80031_SLAVE_ID1,
-			TPS80031_PHOENIX_MSK_TRANSITION, BIT(preq_mask_bit));
-	if (ret < 0) {
-		dev_err(dev, "reg 0x%02x clrbit failed, err = %d\n",
-			TPS80031_PHOENIX_MSK_TRANSITION, ret);
-		return ret;
-	}
-
-	/* Switch regulator control to resource now */
-	if (ext_ctrl_flag & (TPS80031_PWR_REQ_INPUT_PREQ2 |
-					TPS80031_PWR_REQ_INPUT_PREQ3)) {
-		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, state_reg_add,
-						0x0, TPS80031_STATE_MASK);
-		if (ret < 0)
-			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
-				state_reg_add, ret);
-	} else {
-		ret = tps80031_update(dev, TPS80031_SLAVE_ID1, trans_reg_add,
-				TPS80031_TRANS_SLEEP_OFF,
-				TPS80031_TRANS_SLEEP_MASK);
-		if (ret < 0)
-			dev_err(dev, "reg 0x%02x update failed, err = %d\n",
-				trans_reg_add, ret);
-	}
-	return ret;
-}
-EXPORT_SYMBOL_GPL(tps80031_ext_power_req_config);
-
-static void tps80031_power_off(void)
-{
-	dev_info(tps80031_power_off_dev->dev, "switching off PMU\n");
-	tps80031_write(tps80031_power_off_dev->dev, TPS80031_SLAVE_ID1,
-				TPS80031_PHOENIX_DEV_ON, TPS80031_DEVOFF);
-}
-
-static void tps80031_pupd_init(struct tps80031 *tps80031,
-			       struct tps80031_platform_data *pdata)
-{
-	struct tps80031_pupd_init_data *pupd_init_data = pdata->pupd_init_data;
-	int data_size = pdata->pupd_init_data_size;
-	int i;
-
-	for (i = 0; i < data_size; ++i) {
-		struct tps80031_pupd_init_data *pupd_init = &pupd_init_data[i];
-		const struct tps80031_pupd_data *pupd =
-			&tps80031_pupds[pupd_init->input_pin];
-		u8 update_value = 0;
-		u8 update_mask = pupd->pulldown_bit | pupd->pullup_bit;
-
-		if (pupd_init->setting == TPS80031_PUPD_PULLDOWN)
-			update_value = pupd->pulldown_bit;
-		else if (pupd_init->setting == TPS80031_PUPD_PULLUP)
-			update_value = pupd->pullup_bit;
-
-		tps80031_update(tps80031->dev, TPS80031_SLAVE_ID1, pupd->reg,
-				update_value, update_mask);
-	}
-}
-
-static int tps80031_init_ext_control(struct tps80031 *tps80031,
-			struct tps80031_platform_data *pdata)
-{
-	struct device *dev = tps80031->dev;
-	int ret;
-	int i;
-
-	/* Clear all external control for this rail */
-	for (i = 0; i < 9; ++i) {
-		ret = tps80031_write(dev, TPS80031_SLAVE_ID1,
-				TPS80031_PREQ1_RES_ASS_A + i, 0);
-		if (ret < 0) {
-			dev_err(dev, "reg 0x%02x write failed, err = %d\n",
-				TPS80031_PREQ1_RES_ASS_A + i, ret);
-			return ret;
-		}
-	}
-
-	/* Mask the PREQ */
-	ret = tps80031_set_bits(dev, TPS80031_SLAVE_ID1,
-			TPS80031_PHOENIX_MSK_TRANSITION, 0x7 << 5);
-	if (ret < 0) {
-		dev_err(dev, "reg 0x%02x set_bits failed, err = %d\n",
-			TPS80031_PHOENIX_MSK_TRANSITION, ret);
-		return ret;
-	}
-	return ret;
-}
-
-static int tps80031_irq_init(struct tps80031 *tps80031, int irq, int irq_base)
-{
-	struct device *dev = tps80031->dev;
-	int i, ret;
-
-	/*
-	 * The MASK register used for updating status register when
-	 * interrupt occurs and LINE register used to pass the status
-	 * to actual interrupt line.  As per datasheet:
-	 * When INT_MSK_LINE [i] is set to 1, the associated interrupt
-	 * number i is INT line masked, which means that no interrupt is
-	 * generated on the INT line.
-	 * When INT_MSK_LINE [i] is set to 0, the associated interrupt
-	 * number i is  line enabled: An interrupt is generated on the
-	 * INT line.
-	 * In any case, the INT_STS [i] status bit may or may not be updated,
-	 * only linked to the INT_MSK_STS [i] configuration register bit.
-	 *
-	 * When INT_MSK_STS [i] is set to 1, the associated interrupt number
-	 * i is status masked, which means that no interrupt is stored in
-	 * the INT_STS[i] status bit. Note that no interrupt number i is
-	 * generated on the INT line, even if the INT_MSK_LINE [i] register
-	 * bit is set to 0.
-	 * When INT_MSK_STS [i] is set to 0, the associated interrupt number i
-	 * is status enabled: An interrupt status is updated in the INT_STS [i]
-	 * register. The interrupt may or may not be generated on the INT line,
-	 * depending on the INT_MSK_LINE [i] configuration register bit.
-	 */
-	for (i = 0; i < 3; i++)
-		tps80031_write(dev, TPS80031_SLAVE_ID2,
-				TPS80031_INT_MSK_STS_A + i, 0x00);
-
-	ret = regmap_add_irq_chip(tps80031->regmap[TPS80031_SLAVE_ID2], irq,
-			IRQF_ONESHOT, irq_base,
-			&tps80031_irq_chip, &tps80031->irq_data);
-	if (ret < 0) {
-		dev_err(dev, "add irq failed, err = %d\n", ret);
-		return ret;
-	}
-	return ret;
-}
-
-static bool rd_wr_reg_id0(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case TPS80031_SMPS1_CFG_FORCE ... TPS80031_SMPS2_CFG_VOLTAGE:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool rd_wr_reg_id1(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case TPS80031_SECONDS_REG ... TPS80031_RTC_RESET_STATUS_REG:
-	case TPS80031_VALIDITY0 ... TPS80031_VALIDITY7:
-	case TPS80031_PHOENIX_START_CONDITION ... TPS80031_KEY_PRESS_DUR_CFG:
-	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
-	case TPS80031_BROADCAST_ADDR_ALL ... TPS80031_BROADCAST_ADDR_CLK_RST:
-	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
-	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
-	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
-	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
-	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
-	case TPS80031_BACKUP_REG:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool is_volatile_reg_id1(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case TPS80031_SMPS4_CFG_TRANS ... TPS80031_SMPS3_CFG_VOLTAGE:
-	case TPS80031_VANA_CFG_TRANS ... TPS80031_LDO7_CFG_VOLTAGE:
-	case TPS80031_REGEN1_CFG_TRANS ... TPS80031_TMP_CFG_STATE:
-	case TPS80031_PREQ1_RES_ASS_A ... TPS80031_PREQ3_RES_ASS_C:
-	case TPS80031_SMPS_OFFSET ... TPS80031_BATDEBOUNCING:
-	case TPS80031_CFG_INPUT_PUPD1 ... TPS80031_CFG_SMPS_PD:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool rd_wr_reg_id2(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case TPS80031_USB_VENDOR_ID_LSB ... TPS80031_USB_OTG_REVISION:
-	case TPS80031_GPADC_CTRL ... TPS80031_CTRL_P1:
-	case TPS80031_RTCH0_LSB ... TPS80031_GPCH0_MSB:
-	case TPS80031_TOGGLE1 ... TPS80031_VIBMODE:
-	case TPS80031_PWM1ON ... TPS80031_PWM2OFF:
-	case TPS80031_FG_REG_00 ... TPS80031_FG_REG_11:
-	case TPS80031_INT_STS_A ... TPS80031_INT_MSK_STS_C:
-	case TPS80031_CONTROLLER_CTRL2 ... TPS80031_LED_PWM_CTRL2:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool rd_wr_reg_id3(struct device *dev, unsigned int reg)
-{
-	switch (reg) {
-	case TPS80031_GPADC_TRIM0 ... TPS80031_GPADC_TRIM18:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static const struct regmap_config tps80031_regmap_configs[] = {
-	{
-		.reg_bits = 8,
-		.val_bits = 8,
-		.writeable_reg = rd_wr_reg_id0,
-		.readable_reg = rd_wr_reg_id0,
-		.max_register = TPS80031_MAX_REGISTER,
-	},
-	{
-		.reg_bits = 8,
-		.val_bits = 8,
-		.writeable_reg = rd_wr_reg_id1,
-		.readable_reg = rd_wr_reg_id1,
-		.volatile_reg = is_volatile_reg_id1,
-		.max_register = TPS80031_MAX_REGISTER,
-	},
-	{
-		.reg_bits = 8,
-		.val_bits = 8,
-		.writeable_reg = rd_wr_reg_id2,
-		.readable_reg = rd_wr_reg_id2,
-		.max_register = TPS80031_MAX_REGISTER,
-	},
-	{
-		.reg_bits = 8,
-		.val_bits = 8,
-		.writeable_reg = rd_wr_reg_id3,
-		.readable_reg = rd_wr_reg_id3,
-		.max_register = TPS80031_MAX_REGISTER,
-	},
-};
-
-static int tps80031_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
-{
-	struct tps80031_platform_data *pdata = dev_get_platdata(&client->dev);
-	struct tps80031 *tps80031;
-	int ret;
-	uint8_t es_version;
-	uint8_t ep_ver;
-	int i;
-
-	if (!pdata) {
-		dev_err(&client->dev, "tps80031 requires platform data\n");
-		return -EINVAL;
-	}
-
-	tps80031 = devm_kzalloc(&client->dev, sizeof(*tps80031), GFP_KERNEL);
-	if (!tps80031)
-		return -ENOMEM;
-
-	for (i = 0; i < TPS80031_NUM_SLAVES; i++) {
-		if (tps80031_slave_address[i] == client->addr)
-			tps80031->clients[i] = client;
-		else
-			tps80031->clients[i] = devm_i2c_new_dummy_device(&client->dev,
-						client->adapter, tps80031_slave_address[i]);
-		if (IS_ERR(tps80031->clients[i])) {
-			dev_err(&client->dev, "can't attach client %d\n", i);
-			return PTR_ERR(tps80031->clients[i]);
-		}
-
-		i2c_set_clientdata(tps80031->clients[i], tps80031);
-		tps80031->regmap[i] = devm_regmap_init_i2c(tps80031->clients[i],
-					&tps80031_regmap_configs[i]);
-		if (IS_ERR(tps80031->regmap[i])) {
-			ret = PTR_ERR(tps80031->regmap[i]);
-			dev_err(&client->dev,
-				"regmap %d init failed, err %d\n", i, ret);
-			return ret;
-		}
-	}
-
-	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
-			TPS80031_JTAGVERNUM, &es_version);
-	if (ret < 0) {
-		dev_err(&client->dev,
-			"Silicon version number read failed: %d\n", ret);
-		return ret;
-	}
-
-	ret = tps80031_read(&client->dev, TPS80031_SLAVE_ID3,
-			TPS80031_EPROM_REV, &ep_ver);
-	if (ret < 0) {
-		dev_err(&client->dev,
-			"Silicon eeprom version read failed: %d\n", ret);
-		return ret;
-	}
-
-	dev_info(&client->dev, "ES version 0x%02x and EPROM version 0x%02x\n",
-					es_version, ep_ver);
-	tps80031->es_version = es_version;
-	tps80031->dev = &client->dev;
-	i2c_set_clientdata(client, tps80031);
-	tps80031->chip_info = id->driver_data;
-
-	ret = tps80031_irq_init(tps80031, client->irq, pdata->irq_base);
-	if (ret) {
-		dev_err(&client->dev, "IRQ init failed: %d\n", ret);
-		return ret;
-	}
-
-	tps80031_pupd_init(tps80031, pdata);
-
-	tps80031_init_ext_control(tps80031, pdata);
-
-	ret = mfd_add_devices(tps80031->dev, -1,
-			tps80031_cell, ARRAY_SIZE(tps80031_cell),
-			NULL, 0,
-			regmap_irq_get_domain(tps80031->irq_data));
-	if (ret < 0) {
-		dev_err(&client->dev, "mfd_add_devices failed: %d\n", ret);
-		goto fail_mfd_add;
-	}
-
-	if (pdata->use_power_off && !pm_power_off) {
-		tps80031_power_off_dev = tps80031;
-		pm_power_off = tps80031_power_off;
-	}
-	return 0;
-
-fail_mfd_add:
-	regmap_del_irq_chip(client->irq, tps80031->irq_data);
-	return ret;
-}
-
-static const struct i2c_device_id tps80031_id_table[] = {
-	{ "tps80031", TPS80031 },
-	{ "tps80032", TPS80032 },
-	{ }
-};
-
-static struct i2c_driver tps80031_driver = {
-	.driver	= {
-		.name			= "tps80031",
-		.suppress_bind_attrs	= true,
-	},
-	.probe		= tps80031_probe,
-	.id_table	= tps80031_id_table,
-};
-
-static int __init tps80031_init(void)
-{
-	return i2c_add_driver(&tps80031_driver);
-}
-subsys_initcall(tps80031_init);
diff --git a/include/linux/mfd/tps80031.h b/include/linux/mfd/tps80031.h
deleted file mode 100644
index 2c75c9c9318f..000000000000
--- a/include/linux/mfd/tps80031.h
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * tps80031.h -- TI TPS80031 and TI TPS80032 PMIC driver.
- *
- * Copyright (c) 2012, NVIDIA Corporation.
- *
- * Author: Laxman Dewangan <ldewangan@nvidia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation version 2.
- *
- * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind,
- * whether express or implied; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- * 02111-1307, USA
- */
-
-#ifndef __LINUX_MFD_TPS80031_H
-#define __LINUX_MFD_TPS80031_H
-
-#include <linux/device.h>
-#include <linux/regmap.h>
-
-/* Pull-ups/Pull-downs */
-#define TPS80031_CFG_INPUT_PUPD1			0xF0
-#define TPS80031_CFG_INPUT_PUPD2			0xF1
-#define TPS80031_CFG_INPUT_PUPD3			0xF2
-#define TPS80031_CFG_INPUT_PUPD4			0xF3
-#define TPS80031_CFG_LDO_PD1				0xF4
-#define TPS80031_CFG_LDO_PD2				0xF5
-#define TPS80031_CFG_SMPS_PD				0xF6
-
-/* Real Time Clock */
-#define TPS80031_SECONDS_REG				0x00
-#define TPS80031_MINUTES_REG				0x01
-#define TPS80031_HOURS_REG				0x02
-#define TPS80031_DAYS_REG				0x03
-#define TPS80031_MONTHS_REG				0x04
-#define TPS80031_YEARS_REG				0x05
-#define TPS80031_WEEKS_REG				0x06
-#define TPS80031_ALARM_SECONDS_REG			0x08
-#define TPS80031_ALARM_MINUTES_REG			0x09
-#define TPS80031_ALARM_HOURS_REG			0x0A
-#define TPS80031_ALARM_DAYS_REG				0x0B
-#define TPS80031_ALARM_MONTHS_REG			0x0C
-#define TPS80031_ALARM_YEARS_REG			0x0D
-#define TPS80031_RTC_CTRL_REG				0x10
-#define TPS80031_RTC_STATUS_REG				0x11
-#define TPS80031_RTC_INTERRUPTS_REG			0x12
-#define TPS80031_RTC_COMP_LSB_REG			0x13
-#define TPS80031_RTC_COMP_MSB_REG			0x14
-#define TPS80031_RTC_RESET_STATUS_REG			0x16
-
-/*PMC Master Module */
-#define TPS80031_PHOENIX_START_CONDITION		0x1F
-#define TPS80031_PHOENIX_MSK_TRANSITION			0x20
-#define TPS80031_STS_HW_CONDITIONS			0x21
-#define TPS80031_PHOENIX_LAST_TURNOFF_STS		0x22
-#define TPS80031_VSYSMIN_LO_THRESHOLD			0x23
-#define TPS80031_VSYSMIN_HI_THRESHOLD			0x24
-#define TPS80031_PHOENIX_DEV_ON				0x25
-#define TPS80031_STS_PWR_GRP_STATE			0x27
-#define TPS80031_PH_CFG_VSYSLOW				0x28
-#define TPS80031_PH_STS_BOOT				0x29
-#define TPS80031_PHOENIX_SENS_TRANSITION		0x2A
-#define TPS80031_PHOENIX_SEQ_CFG			0x2B
-#define TPS80031_PRIMARY_WATCHDOG_CFG			0X2C
-#define TPS80031_KEY_PRESS_DUR_CFG			0X2D
-#define TPS80031_SMPS_LDO_SHORT_STS			0x2E
-
-/* PMC Slave Module - Broadcast */
-#define TPS80031_BROADCAST_ADDR_ALL			0x31
-#define TPS80031_BROADCAST_ADDR_REF			0x32
-#define TPS80031_BROADCAST_ADDR_PROV			0x33
-#define TPS80031_BROADCAST_ADDR_CLK_RST			0x34
-
-/* PMC Slave Module  SMPS Regulators */
-#define TPS80031_SMPS4_CFG_TRANS			0x41
-#define TPS80031_SMPS4_CFG_STATE			0x42
-#define TPS80031_SMPS4_CFG_VOLTAGE			0x44
-#define TPS80031_VIO_CFG_TRANS				0x47
-#define TPS80031_VIO_CFG_STATE				0x48
-#define TPS80031_VIO_CFG_FORCE				0x49
-#define TPS80031_VIO_CFG_VOLTAGE			0x4A
-#define TPS80031_VIO_CFG_STEP				0x48
-#define TPS80031_SMPS1_CFG_TRANS			0x53
-#define TPS80031_SMPS1_CFG_STATE			0x54
-#define TPS80031_SMPS1_CFG_FORCE			0x55
-#define TPS80031_SMPS1_CFG_VOLTAGE			0x56
-#define TPS80031_SMPS1_CFG_STEP				0x57
-#define TPS80031_SMPS2_CFG_TRANS			0x59
-#define TPS80031_SMPS2_CFG_STATE			0x5A
-#define TPS80031_SMPS2_CFG_FORCE			0x5B
-#define TPS80031_SMPS2_CFG_VOLTAGE			0x5C
-#define TPS80031_SMPS2_CFG_STEP				0x5D
-#define TPS80031_SMPS3_CFG_TRANS			0x65
-#define TPS80031_SMPS3_CFG_STATE			0x66
-#define TPS80031_SMPS3_CFG_VOLTAGE			0x68
-
-/* PMC Slave Module  LDO Regulators */
-#define TPS80031_VANA_CFG_TRANS				0x81
-#define TPS80031_VANA_CFG_STATE				0x82
-#define TPS80031_VANA_CFG_VOLTAGE			0x83
-#define TPS80031_LDO2_CFG_TRANS				0x85
-#define TPS80031_LDO2_CFG_STATE				0x86
-#define TPS80031_LDO2_CFG_VOLTAGE			0x87
-#define TPS80031_LDO4_CFG_TRANS				0x89
-#define TPS80031_LDO4_CFG_STATE				0x8A
-#define TPS80031_LDO4_CFG_VOLTAGE			0x8B
-#define TPS80031_LDO3_CFG_TRANS				0x8D
-#define TPS80031_LDO3_CFG_STATE				0x8E
-#define TPS80031_LDO3_CFG_VOLTAGE			0x8F
-#define TPS80031_LDO6_CFG_TRANS				0x91
-#define TPS80031_LDO6_CFG_STATE				0x92
-#define TPS80031_LDO6_CFG_VOLTAGE			0x93
-#define TPS80031_LDOLN_CFG_TRANS			0x95
-#define TPS80031_LDOLN_CFG_STATE			0x96
-#define TPS80031_LDOLN_CFG_VOLTAGE			0x97
-#define TPS80031_LDO5_CFG_TRANS				0x99
-#define TPS80031_LDO5_CFG_STATE				0x9A
-#define TPS80031_LDO5_CFG_VOLTAGE			0x9B
-#define TPS80031_LDO1_CFG_TRANS				0x9D
-#define TPS80031_LDO1_CFG_STATE				0x9E
-#define TPS80031_LDO1_CFG_VOLTAGE			0x9F
-#define TPS80031_LDOUSB_CFG_TRANS			0xA1
-#define TPS80031_LDOUSB_CFG_STATE			0xA2
-#define TPS80031_LDOUSB_CFG_VOLTAGE			0xA3
-#define TPS80031_LDO7_CFG_TRANS				0xA5
-#define TPS80031_LDO7_CFG_STATE				0xA6
-#define TPS80031_LDO7_CFG_VOLTAGE			0xA7
-
-/* PMC Slave Module  External Control */
-#define TPS80031_REGEN1_CFG_TRANS			0xAE
-#define TPS80031_REGEN1_CFG_STATE			0xAF
-#define TPS80031_REGEN2_CFG_TRANS			0xB1
-#define TPS80031_REGEN2_CFG_STATE			0xB2
-#define TPS80031_SYSEN_CFG_TRANS			0xB4
-#define TPS80031_SYSEN_CFG_STATE			0xB5
-
-/* PMC Slave Module  Internal Control */
-#define TPS80031_NRESPWRON_CFG_TRANS			0xB7
-#define TPS80031_NRESPWRON_CFG_STATE			0xB8
-#define TPS80031_CLK32KAO_CFG_TRANS			0xBA
-#define TPS80031_CLK32KAO_CFG_STATE			0xBB
-#define TPS80031_CLK32KG_CFG_TRANS			0xBD
-#define TPS80031_CLK32KG_CFG_STATE			0xBE
-#define TPS80031_CLK32KAUDIO_CFG_TRANS			0xC0
-#define TPS80031_CLK32KAUDIO_CFG_STATE			0xC1
-#define TPS80031_VRTC_CFG_TRANS				0xC3
-#define TPS80031_VRTC_CFG_STATE				0xC4
-#define TPS80031_BIAS_CFG_TRANS				0xC6
-#define TPS80031_BIAS_CFG_STATE				0xC7
-#define TPS80031_VSYSMIN_HI_CFG_TRANS			0xC9
-#define TPS80031_VSYSMIN_HI_CFG_STATE			0xCA
-#define TPS80031_RC6MHZ_CFG_TRANS			0xCC
-#define TPS80031_RC6MHZ_CFG_STATE			0xCD
-#define TPS80031_TMP_CFG_TRANS				0xCF
-#define TPS80031_TMP_CFG_STATE				0xD0
-
-/* PMC Slave Module  resources assignment */
-#define TPS80031_PREQ1_RES_ASS_A			0xD7
-#define TPS80031_PREQ1_RES_ASS_B			0xD8
-#define TPS80031_PREQ1_RES_ASS_C			0xD9
-#define TPS80031_PREQ2_RES_ASS_A			0xDA
-#define TPS80031_PREQ2_RES_ASS_B			0xDB
-#define TPS80031_PREQ2_RES_ASS_C			0xDC
-#define TPS80031_PREQ3_RES_ASS_A			0xDD
-#define TPS80031_PREQ3_RES_ASS_B			0xDE
-#define TPS80031_PREQ3_RES_ASS_C			0xDF
-
-/* PMC Slave Module  Miscellaneous */
-#define TPS80031_SMPS_OFFSET				0xE0
-#define TPS80031_SMPS_MULT				0xE3
-#define TPS80031_MISC1					0xE4
-#define TPS80031_MISC2					0xE5
-#define TPS80031_BBSPOR_CFG				0xE6
-#define TPS80031_TMP_CFG				0xE7
-
-/* Battery Charging Controller and Indicator LED */
-#define TPS80031_CONTROLLER_CTRL2			0xDA
-#define TPS80031_CONTROLLER_VSEL_COMP			0xDB
-#define TPS80031_CHARGERUSB_VSYSREG			0xDC
-#define TPS80031_CHARGERUSB_VICHRG_PC			0xDD
-#define TPS80031_LINEAR_CHRG_STS			0xDE
-#define TPS80031_CONTROLLER_INT_MASK			0xE0
-#define TPS80031_CONTROLLER_CTRL1			0xE1
-#define TPS80031_CONTROLLER_WDG				0xE2
-#define TPS80031_CONTROLLER_STAT1			0xE3
-#define TPS80031_CHARGERUSB_INT_STATUS			0xE4
-#define TPS80031_CHARGERUSB_INT_MASK			0xE5
-#define TPS80031_CHARGERUSB_STATUS_INT1			0xE6
-#define TPS80031_CHARGERUSB_STATUS_INT2			0xE7
-#define TPS80031_CHARGERUSB_CTRL1			0xE8
-#define TPS80031_CHARGERUSB_CTRL2			0xE9
-#define TPS80031_CHARGERUSB_CTRL3			0xEA
-#define TPS80031_CHARGERUSB_STAT1			0xEB
-#define TPS80031_CHARGERUSB_VOREG			0xEC
-#define TPS80031_CHARGERUSB_VICHRG			0xED
-#define TPS80031_CHARGERUSB_CINLIMIT			0xEE
-#define TPS80031_CHARGERUSB_CTRLLIMIT1			0xEF
-#define TPS80031_CHARGERUSB_CTRLLIMIT2			0xF0
-#define TPS80031_LED_PWM_CTRL1				0xF4
-#define TPS80031_LED_PWM_CTRL2				0xF5
-
-/* USB On-The-Go  */
-#define TPS80031_BACKUP_REG				0xFA
-#define TPS80031_USB_VENDOR_ID_LSB			0x00
-#define TPS80031_USB_VENDOR_ID_MSB			0x01
-#define TPS80031_USB_PRODUCT_ID_LSB			0x02
-#define TPS80031_USB_PRODUCT_ID_MSB			0x03
-#define TPS80031_USB_VBUS_CTRL_SET			0x04
-#define TPS80031_USB_VBUS_CTRL_CLR			0x05
-#define TPS80031_USB_ID_CTRL_SET			0x06
-#define TPS80031_USB_ID_CTRL_CLR			0x07
-#define TPS80031_USB_VBUS_INT_SRC			0x08
-#define TPS80031_USB_VBUS_INT_LATCH_SET			0x09
-#define TPS80031_USB_VBUS_INT_LATCH_CLR			0x0A
-#define TPS80031_USB_VBUS_INT_EN_LO_SET			0x0B
-#define TPS80031_USB_VBUS_INT_EN_LO_CLR			0x0C
-#define TPS80031_USB_VBUS_INT_EN_HI_SET			0x0D
-#define TPS80031_USB_VBUS_INT_EN_HI_CLR			0x0E
-#define TPS80031_USB_ID_INT_SRC				0x0F
-#define TPS80031_USB_ID_INT_LATCH_SET			0x10
-#define TPS80031_USB_ID_INT_LATCH_CLR			0x11
-#define TPS80031_USB_ID_INT_EN_LO_SET			0x12
-#define TPS80031_USB_ID_INT_EN_LO_CLR			0x13
-#define TPS80031_USB_ID_INT_EN_HI_SET			0x14
-#define TPS80031_USB_ID_INT_EN_HI_CLR			0x15
-#define TPS80031_USB_OTG_ADP_CTRL			0x16
-#define TPS80031_USB_OTG_ADP_HIGH			0x17
-#define TPS80031_USB_OTG_ADP_LOW			0x18
-#define TPS80031_USB_OTG_ADP_RISE			0x19
-#define TPS80031_USB_OTG_REVISION			0x1A
-
-/* Gas Gauge */
-#define TPS80031_FG_REG_00				0xC0
-#define TPS80031_FG_REG_01				0xC1
-#define TPS80031_FG_REG_02				0xC2
-#define TPS80031_FG_REG_03				0xC3
-#define TPS80031_FG_REG_04				0xC4
-#define TPS80031_FG_REG_05				0xC5
-#define TPS80031_FG_REG_06				0xC6
-#define TPS80031_FG_REG_07				0xC7
-#define TPS80031_FG_REG_08				0xC8
-#define TPS80031_FG_REG_09				0xC9
-#define TPS80031_FG_REG_10				0xCA
-#define TPS80031_FG_REG_11				0xCB
-
-/* General Purpose ADC */
-#define TPS80031_GPADC_CTRL				0x2E
-#define TPS80031_GPADC_CTRL2				0x2F
-#define TPS80031_RTSELECT_LSB				0x32
-#define TPS80031_RTSELECT_ISB				0x33
-#define TPS80031_RTSELECT_MSB				0x34
-#define TPS80031_GPSELECT_ISB				0x35
-#define TPS80031_CTRL_P1				0x36
-#define TPS80031_RTCH0_LSB				0x37
-#define TPS80031_RTCH0_MSB				0x38
-#define TPS80031_RTCH1_LSB				0x39
-#define TPS80031_RTCH1_MSB				0x3A
-#define TPS80031_GPCH0_LSB				0x3B
-#define TPS80031_GPCH0_MSB				0x3C
-
-/* SIM, MMC and Battery Detection */
-#define TPS80031_SIMDEBOUNCING				0xEB
-#define TPS80031_SIMCTRL				0xEC
-#define TPS80031_MMCDEBOUNCING				0xED
-#define TPS80031_MMCCTRL				0xEE
-#define TPS80031_BATDEBOUNCING				0xEF
-
-/* Vibrator Driver and PWMs */
-#define TPS80031_VIBCTRL				0x9B
-#define TPS80031_VIBMODE				0x9C
-#define TPS80031_PWM1ON					0xBA
-#define TPS80031_PWM1OFF				0xBB
-#define TPS80031_PWM2ON					0xBD
-#define TPS80031_PWM2OFF				0xBE
-
-/* Control Interface */
-#define TPS80031_INT_STS_A				0xD0
-#define TPS80031_INT_STS_B				0xD1
-#define TPS80031_INT_STS_C				0xD2
-#define TPS80031_INT_MSK_LINE_A				0xD3
-#define TPS80031_INT_MSK_LINE_B				0xD4
-#define TPS80031_INT_MSK_LINE_C				0xD5
-#define TPS80031_INT_MSK_STS_A				0xD6
-#define TPS80031_INT_MSK_STS_B				0xD7
-#define TPS80031_INT_MSK_STS_C				0xD8
-#define TPS80031_TOGGLE1				0x90
-#define TPS80031_TOGGLE2				0x91
-#define TPS80031_TOGGLE3				0x92
-#define TPS80031_PWDNSTATUS1				0x93
-#define TPS80031_PWDNSTATUS2				0x94
-#define TPS80031_VALIDITY0				0x17
-#define TPS80031_VALIDITY1				0x18
-#define TPS80031_VALIDITY2				0x19
-#define TPS80031_VALIDITY3				0x1A
-#define TPS80031_VALIDITY4				0x1B
-#define TPS80031_VALIDITY5				0x1C
-#define TPS80031_VALIDITY6				0x1D
-#define TPS80031_VALIDITY7				0x1E
-
-/* Version number related register */
-#define TPS80031_JTAGVERNUM				0x87
-#define TPS80031_EPROM_REV				0xDF
-
-/* GPADC Trimming Bits. */
-#define TPS80031_GPADC_TRIM0				0xCC
-#define TPS80031_GPADC_TRIM1				0xCD
-#define TPS80031_GPADC_TRIM2				0xCE
-#define TPS80031_GPADC_TRIM3				0xCF
-#define TPS80031_GPADC_TRIM4				0xD0
-#define TPS80031_GPADC_TRIM5				0xD1
-#define TPS80031_GPADC_TRIM6				0xD2
-#define TPS80031_GPADC_TRIM7				0xD3
-#define TPS80031_GPADC_TRIM8				0xD4
-#define TPS80031_GPADC_TRIM9				0xD5
-#define TPS80031_GPADC_TRIM10				0xD6
-#define TPS80031_GPADC_TRIM11				0xD7
-#define TPS80031_GPADC_TRIM12				0xD8
-#define TPS80031_GPADC_TRIM13				0xD9
-#define TPS80031_GPADC_TRIM14				0xDA
-#define TPS80031_GPADC_TRIM15				0xDB
-#define TPS80031_GPADC_TRIM16				0xDC
-#define TPS80031_GPADC_TRIM17				0xDD
-#define TPS80031_GPADC_TRIM18				0xDE
-
-/* TPS80031_CONTROLLER_STAT1 bit fields */
-#define TPS80031_CONTROLLER_STAT1_BAT_TEMP		0
-#define TPS80031_CONTROLLER_STAT1_BAT_REMOVED		1
-#define TPS80031_CONTROLLER_STAT1_VBUS_DET		2
-#define TPS80031_CONTROLLER_STAT1_VAC_DET		3
-#define TPS80031_CONTROLLER_STAT1_FAULT_WDG		4
-#define TPS80031_CONTROLLER_STAT1_LINCH_GATED		6
-/* TPS80031_CONTROLLER_INT_MASK bit filed */
-#define TPS80031_CONTROLLER_INT_MASK_MVAC_DET		0
-#define TPS80031_CONTROLLER_INT_MASK_MVBUS_DET		1
-#define TPS80031_CONTROLLER_INT_MASK_MBAT_TEMP		2
-#define TPS80031_CONTROLLER_INT_MASK_MFAULT_WDG		3
-#define TPS80031_CONTROLLER_INT_MASK_MBAT_REMOVED	4
-#define TPS80031_CONTROLLER_INT_MASK_MLINCH_GATED	5
-
-#define TPS80031_CHARGE_CONTROL_SUB_INT_MASK		0x3F
-
-/* TPS80031_PHOENIX_DEV_ON bit field */
-#define TPS80031_DEVOFF					0x1
-
-#define TPS80031_EXT_CONTROL_CFG_TRANS			0
-#define TPS80031_EXT_CONTROL_CFG_STATE			1
-
-/* State register field */
-#define TPS80031_STATE_OFF				0x00
-#define TPS80031_STATE_ON				0x01
-#define TPS80031_STATE_MASK				0x03
-
-/* Trans register field */
-#define TPS80031_TRANS_ACTIVE_OFF			0x00
-#define TPS80031_TRANS_ACTIVE_ON			0x01
-#define TPS80031_TRANS_ACTIVE_MASK			0x03
-#define TPS80031_TRANS_SLEEP_OFF			0x00
-#define TPS80031_TRANS_SLEEP_ON				0x04
-#define TPS80031_TRANS_SLEEP_MASK			0x0C
-#define TPS80031_TRANS_OFF_OFF				0x00
-#define TPS80031_TRANS_OFF_ACTIVE			0x10
-#define TPS80031_TRANS_OFF_MASK				0x30
-
-#define TPS80031_EXT_PWR_REQ		(TPS80031_PWR_REQ_INPUT_PREQ1 | \
-					TPS80031_PWR_REQ_INPUT_PREQ2 | \
-					TPS80031_PWR_REQ_INPUT_PREQ3)
-
-/* TPS80031_BBSPOR_CFG bit field */
-#define TPS80031_BBSPOR_CHG_EN				0x8
-#define TPS80031_MAX_REGISTER				0xFF
-
-struct i2c_client;
-
-/* Supported chips */
-enum chips {
-	TPS80031 = 0x00000001,
-	TPS80032 = 0x00000002,
-};
-
-enum {
-	TPS80031_INT_PWRON,
-	TPS80031_INT_RPWRON,
-	TPS80031_INT_SYS_VLOW,
-	TPS80031_INT_RTC_ALARM,
-	TPS80031_INT_RTC_PERIOD,
-	TPS80031_INT_HOT_DIE,
-	TPS80031_INT_VXX_SHORT,
-	TPS80031_INT_SPDURATION,
-	TPS80031_INT_WATCHDOG,
-	TPS80031_INT_BAT,
-	TPS80031_INT_SIM,
-	TPS80031_INT_MMC,
-	TPS80031_INT_RES,
-	TPS80031_INT_GPADC_RT,
-	TPS80031_INT_GPADC_SW2_EOC,
-	TPS80031_INT_CC_AUTOCAL,
-	TPS80031_INT_ID_WKUP,
-	TPS80031_INT_VBUSS_WKUP,
-	TPS80031_INT_ID,
-	TPS80031_INT_VBUS,
-	TPS80031_INT_CHRG_CTRL,
-	TPS80031_INT_EXT_CHRG,
-	TPS80031_INT_INT_CHRG,
-	TPS80031_INT_RES2,
-	TPS80031_INT_BAT_TEMP_OVRANGE,
-	TPS80031_INT_BAT_REMOVED,
-	TPS80031_INT_VBUS_DET,
-	TPS80031_INT_VAC_DET,
-	TPS80031_INT_FAULT_WDG,
-	TPS80031_INT_LINCH_GATED,
-
-	/* Last interrupt id to get the end number */
-	TPS80031_INT_NR,
-};
-
-/* TPS80031 Slave IDs */
-#define TPS80031_NUM_SLAVES				4
-#define TPS80031_SLAVE_ID0				0
-#define TPS80031_SLAVE_ID1				1
-#define TPS80031_SLAVE_ID2				2
-#define TPS80031_SLAVE_ID3				3
-
-/* TPS80031 I2C addresses */
-#define TPS80031_I2C_ID0_ADDR				0x12
-#define TPS80031_I2C_ID1_ADDR				0x48
-#define TPS80031_I2C_ID2_ADDR				0x49
-#define TPS80031_I2C_ID3_ADDR				0x4A
-
-enum {
-	TPS80031_REGULATOR_VIO,
-	TPS80031_REGULATOR_SMPS1,
-	TPS80031_REGULATOR_SMPS2,
-	TPS80031_REGULATOR_SMPS3,
-	TPS80031_REGULATOR_SMPS4,
-	TPS80031_REGULATOR_VANA,
-	TPS80031_REGULATOR_LDO1,
-	TPS80031_REGULATOR_LDO2,
-	TPS80031_REGULATOR_LDO3,
-	TPS80031_REGULATOR_LDO4,
-	TPS80031_REGULATOR_LDO5,
-	TPS80031_REGULATOR_LDO6,
-	TPS80031_REGULATOR_LDO7,
-	TPS80031_REGULATOR_LDOLN,
-	TPS80031_REGULATOR_LDOUSB,
-	TPS80031_REGULATOR_VBUS,
-	TPS80031_REGULATOR_REGEN1,
-	TPS80031_REGULATOR_REGEN2,
-	TPS80031_REGULATOR_SYSEN,
-	TPS80031_REGULATOR_MAX,
-};
-
-/* Different configurations for the rails */
-enum {
-	/* USBLDO input selection */
-	TPS80031_USBLDO_INPUT_VSYS		= 0x00000001,
-	TPS80031_USBLDO_INPUT_PMID		= 0x00000002,
-
-	/* LDO3 output mode */
-	TPS80031_LDO3_OUTPUT_VIB		= 0x00000004,
-
-	/* VBUS configuration */
-	TPS80031_VBUS_DISCHRG_EN_PDN		= 0x00000004,
-	TPS80031_VBUS_SW_ONLY			= 0x00000008,
-	TPS80031_VBUS_SW_N_ID			= 0x00000010,
-};
-
-/* External controls requests */
-enum tps80031_ext_control {
-	TPS80031_PWR_REQ_INPUT_NONE		= 0x00000000,
-	TPS80031_PWR_REQ_INPUT_PREQ1		= 0x00000001,
-	TPS80031_PWR_REQ_INPUT_PREQ2		= 0x00000002,
-	TPS80031_PWR_REQ_INPUT_PREQ3		= 0x00000004,
-	TPS80031_PWR_OFF_ON_SLEEP		= 0x00000008,
-	TPS80031_PWR_ON_ON_SLEEP		= 0x00000010,
-};
-
-enum tps80031_pupd_pins {
-	TPS80031_PREQ1 = 0,
-	TPS80031_PREQ2A,
-	TPS80031_PREQ2B,
-	TPS80031_PREQ2C,
-	TPS80031_PREQ3,
-	TPS80031_NRES_WARM,
-	TPS80031_PWM_FORCE,
-	TPS80031_CHRG_EXT_CHRG_STATZ,
-	TPS80031_SIM,
-	TPS80031_MMC,
-	TPS80031_GPADC_START,
-	TPS80031_DVSI2C_SCL,
-	TPS80031_DVSI2C_SDA,
-	TPS80031_CTLI2C_SCL,
-	TPS80031_CTLI2C_SDA,
-};
-
-enum tps80031_pupd_settings {
-	TPS80031_PUPD_NORMAL,
-	TPS80031_PUPD_PULLDOWN,
-	TPS80031_PUPD_PULLUP,
-};
-
-struct tps80031 {
-	struct device		*dev;
-	unsigned long		chip_info;
-	int			es_version;
-	struct i2c_client	*clients[TPS80031_NUM_SLAVES];
-	struct regmap		*regmap[TPS80031_NUM_SLAVES];
-	struct regmap_irq_chip_data *irq_data;
-};
-
-struct tps80031_pupd_init_data {
-	int input_pin;
-	int setting;
-};
-
-/*
- * struct tps80031_regulator_platform_data - tps80031 regulator platform data.
- *
- * @reg_init_data: The regulator init data.
- * @ext_ctrl_flag: External control flag for sleep/power request control.
- * @config_flags: Configuration flag to configure the rails.
- *		  It should be ORed of config enums.
- */
-
-struct tps80031_regulator_platform_data {
-	struct regulator_init_data *reg_init_data;
-	unsigned int ext_ctrl_flag;
-	unsigned int config_flags;
-};
-
-struct tps80031_platform_data {
-	int irq_base;
-	bool use_power_off;
-	struct tps80031_pupd_init_data *pupd_init_data;
-	int pupd_init_data_size;
-	struct tps80031_regulator_platform_data
-			*regulator_pdata[TPS80031_REGULATOR_MAX];
-};
-
-static inline int tps80031_write(struct device *dev, int sid,
-		int reg, uint8_t val)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_write(tps80031->regmap[sid], reg, val);
-}
-
-static inline int tps80031_writes(struct device *dev, int sid, int reg,
-		int len, uint8_t *val)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_bulk_write(tps80031->regmap[sid], reg, val, len);
-}
-
-static inline int tps80031_read(struct device *dev, int sid,
-		int reg, uint8_t *val)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-	unsigned int ival;
-	int ret;
-
-	ret = regmap_read(tps80031->regmap[sid], reg, &ival);
-	if (ret < 0) {
-		dev_err(dev, "failed reading from reg 0x%02x\n", reg);
-		return ret;
-	}
-
-	*val = ival;
-	return ret;
-}
-
-static inline int tps80031_reads(struct device *dev, int sid,
-		int reg, int len, uint8_t *val)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_bulk_read(tps80031->regmap[sid], reg, val, len);
-}
-
-static inline int tps80031_set_bits(struct device *dev, int sid,
-		int reg, uint8_t bit_mask)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_update_bits(tps80031->regmap[sid], reg,
-				bit_mask, bit_mask);
-}
-
-static inline int tps80031_clr_bits(struct device *dev, int sid,
-		int reg, uint8_t bit_mask)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_update_bits(tps80031->regmap[sid], reg, bit_mask, 0);
-}
-
-static inline int tps80031_update(struct device *dev, int sid,
-		int reg, uint8_t val, uint8_t mask)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_update_bits(tps80031->regmap[sid], reg, mask, val);
-}
-
-static inline unsigned long tps80031_get_chip_info(struct device *dev)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return tps80031->chip_info;
-}
-
-static inline int tps80031_get_pmu_version(struct device *dev)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return tps80031->es_version;
-}
-
-static inline int tps80031_irq_get_virq(struct device *dev, int irq)
-{
-	struct tps80031 *tps80031 = dev_get_drvdata(dev);
-
-	return regmap_irq_get_virq(tps80031->irq_data, irq);
-}
-
-extern int tps80031_ext_power_req_config(struct device *dev,
-		unsigned long ext_ctrl_flag, int preq_bit,
-		int state_reg_add, int trans_reg_add);
-#endif /*__LINUX_MFD_TPS80031_H */
-- 
cgit v1.2.3


From d755ad8dc752d44545613ea04d660aed674e540d Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:00 -0400
Subject: NFS: Create a new nfs_alloc_fattr_with_label() function

For creating fattrs with the label field already allocated for us. I
also update nfs_free_fattr() to free the label in the end.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/getroot.c       | 17 ++++++-----------
 fs/nfs/inode.c         | 17 +++++++++++++++++
 fs/nfs/internal.h      |  9 ---------
 include/linux/nfs_fs.h | 13 +++++++++++++
 4 files changed, 36 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 59355c106ece..7604cb6a0ac2 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -80,18 +80,15 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 		goto out;
 
 	/* get the actual root for this mount */
-	fsinfo.fattr = nfs_alloc_fattr();
+	fsinfo.fattr = nfs_alloc_fattr_with_label(server);
 	if (fsinfo.fattr == NULL)
 		goto out_name;
 
-	fsinfo.fattr->label = nfs4_label_alloc(server, GFP_KERNEL);
-	if (IS_ERR(fsinfo.fattr->label))
-		goto out_fattr;
 	error = server->nfs_client->rpc_ops->getroot(server, ctx->mntfh, &fsinfo);
 	if (error < 0) {
 		dprintk("nfs_get_root: getattr error = %d\n", -error);
 		nfs_errorf(fc, "NFS: Couldn't getattr on root");
-		goto out_label;
+		goto out_fattr;
 	}
 
 	inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
@@ -99,12 +96,12 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 		dprintk("nfs_get_root: get root inode failed\n");
 		error = PTR_ERR(inode);
 		nfs_errorf(fc, "NFS: Couldn't get root inode");
-		goto out_label;
+		goto out_fattr;
 	}
 
 	error = nfs_superblock_set_dummy_root(s, inode);
 	if (error != 0)
-		goto out_label;
+		goto out_fattr;
 
 	/* root dentries normally start off anonymous and get spliced in later
 	 * if the dentry tree reaches them; however if the dentry already
@@ -115,7 +112,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 		dprintk("nfs_get_root: get root dentry failed\n");
 		error = PTR_ERR(root);
 		nfs_errorf(fc, "NFS: Couldn't get root dentry");
-		goto out_label;
+		goto out_fattr;
 	}
 
 	security_d_instantiate(root, inode);
@@ -154,8 +151,6 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 	nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label);
 	error = 0;
 
-out_label:
-	nfs4_label_free(fsinfo.fattr->label);
 out_fattr:
 	nfs_free_fattr(fsinfo.fattr);
 out_name:
@@ -165,5 +160,5 @@ out:
 error_splat_root:
 	dput(fc->root);
 	fc->root = NULL;
-	goto out_label;
+	goto out_fattr;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 0b5b1e44b2c4..84c7efa2ea87 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1606,6 +1606,23 @@ struct nfs_fattr *nfs_alloc_fattr(void)
 }
 EXPORT_SYMBOL_GPL(nfs_alloc_fattr);
 
+struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server)
+{
+	struct nfs_fattr *fattr = nfs_alloc_fattr();
+
+	if (!fattr)
+		return NULL;
+
+	fattr->label = nfs4_label_alloc(server, GFP_NOFS);
+	if (IS_ERR(fattr->label)) {
+		kfree(fattr);
+		return NULL;
+	}
+
+	return fattr;
+}
+EXPORT_SYMBOL_GPL(nfs_alloc_fattr_with_label);
+
 struct nfs_fh *nfs_alloc_fhandle(void)
 {
 	struct nfs_fh *fh;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 123078c76495..12f6acb483bb 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -342,14 +342,6 @@ nfs4_label_copy(struct nfs4_label *dst, struct nfs4_label *src)
 
 	return dst;
 }
-static inline void nfs4_label_free(struct nfs4_label *label)
-{
-	if (label) {
-		kfree(label->label);
-		kfree(label);
-	}
-	return;
-}
 
 static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
 {
@@ -358,7 +350,6 @@ static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
 }
 #else
 static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; }
-static inline void nfs4_label_free(void *label) {}
 static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi)
 {
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 739ca1ef934f..88c3aed8ad39 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -426,9 +426,22 @@ extern void nfs_fattr_set_barrier(struct nfs_fattr *fattr);
 extern unsigned long nfs_inc_attr_generation_counter(void);
 
 extern struct nfs_fattr *nfs_alloc_fattr(void);
+extern struct nfs_fattr *nfs_alloc_fattr_with_label(struct nfs_server *server);
+
+static inline void nfs4_label_free(struct nfs4_label *label)
+{
+#ifdef CONFIG_NFS_V4_SECURITY_LABEL
+	if (label) {
+		kfree(label->label);
+		kfree(label);
+	}
+#endif
+}
 
 static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
 {
+	if (fattr)
+		nfs4_label_free(fattr->label);
 	kfree(fattr);
 }
 
-- 
cgit v1.2.3


From b1db9a401d464d526d5941f0544e7c9ea37fa731 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:01 -0400
Subject: NFS: Remove the nfs4_label from the nfs_entry struct

And instead allocate the fattr using nfs_alloc_fattr_with_label()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c            | 21 +++++++--------------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 36cb1012c7e1..92530c3c1694 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -682,7 +682,8 @@ again:
 			nfs_set_verifier(dentry, dir_verifier);
 			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
 			if (!status)
-				nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
+				nfs_setsecurity(d_inode(dentry), entry->fattr,
+						entry->fattr->label);
 			goto out;
 		} else {
 			d_invalidate(dentry);
@@ -696,7 +697,7 @@ again:
 		goto out;
 	}
 
-	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label);
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->fattr->label);
 	alias = d_splice_alias(inode, dentry);
 	d_lookup_done(dentry);
 	if (alias) {
@@ -732,8 +733,8 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
 	xdr_set_scratch_page(&stream, scratch);
 
 	do {
-		if (entry->label)
-			entry->label->len = NFS4_MAXLABELLEN;
+		if (entry->fattr->label)
+			entry->fattr->label->len = NFS4_MAXLABELLEN;
 
 		status = xdr_decode(desc, entry, &stream);
 		if (status != 0)
@@ -838,21 +839,15 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 		return -ENOMEM;
 	entry->cookie = nfs_readdir_page_last_cookie(page);
 	entry->fh = nfs_alloc_fhandle();
-	entry->fattr = nfs_alloc_fattr();
+	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
 	entry->server = NFS_SERVER(inode);
 	if (entry->fh == NULL || entry->fattr == NULL)
 		goto out;
 
-	entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
-	if (IS_ERR(entry->label)) {
-		status = PTR_ERR(entry->label);
-		goto out;
-	}
-
 	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	pages = nfs_readdir_alloc_pages(array_size);
 	if (!pages)
-		goto out_release_label;
+		goto out;
 
 	do {
 		unsigned int pglen;
@@ -875,8 +870,6 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
 	} while (!status && nfs_readdir_page_needs_filling(page));
 
 	nfs_readdir_free_pages(pages, array_size);
-out_release_label:
-	nfs4_label_free(entry->label);
 out:
 	nfs_free_fattr(entry->fattr);
 	nfs_free_fhandle(entry->fh);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 63a12181e6c7..fba89f82e7b7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7467,7 +7467,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 		return -EAGAIN;
 
 	if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
-			NULL, entry->label, entry->server) < 0)
+			NULL, entry->fattr->label, entry->server) < 0)
 		return -EAGAIN;
 	if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
 		entry->ino = entry->fattr->mounted_on_fileid;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e9698b6278a5..9960f6628066 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -753,7 +753,6 @@ struct nfs_entry {
 	int			eof;
 	struct nfs_fh *		fh;
 	struct nfs_fattr *	fattr;
-	struct nfs4_label  *label;
 	unsigned char		d_type;
 	struct nfs_server *	server;
 };
-- 
cgit v1.2.3


From 68be1742c22983558f0f148a4467eb9127d56b86 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:02 -0400
Subject: NFS: Remove the nfs4_label from the nfs4_create_res struct

Instead, use the label embedded in the attached fattr.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c       | 12 +++++-------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 6 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 127388fabda8..2e07550dd0d4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4850,7 +4850,6 @@ struct nfs4_createdata {
 	struct nfs4_create_res res;
 	struct nfs_fh fh;
 	struct nfs_fattr fattr;
-	struct nfs4_label *label;
 };
 
 static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
@@ -4862,8 +4861,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 	if (data != NULL) {
 		struct nfs_server *server = NFS_SERVER(dir);
 
-		data->label = nfs4_label_alloc(server, GFP_KERNEL);
-		if (IS_ERR(data->label))
+		data->fattr.label = nfs4_label_alloc(server, GFP_KERNEL);
+		if (IS_ERR(data->fattr.label))
 			goto out_free;
 
 		data->msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE];
@@ -4874,12 +4873,11 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
 		data->arg.name = name;
 		data->arg.attrs = sattr;
 		data->arg.ftype = ftype;
-		data->arg.bitmask = nfs4_bitmask(server, data->label);
+		data->arg.bitmask = nfs4_bitmask(server, data->fattr.label);
 		data->arg.umask = current_umask();
 		data->res.server = server;
 		data->res.fh = &data->fh;
 		data->res.fattr = &data->fattr;
-		data->res.label = data->label;
 		nfs_fattr_init(data->res.fattr);
 	}
 	return data;
@@ -4901,14 +4899,14 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
 					      data->res.fattr->time_start,
 					      NFS_INO_INVALID_DATA);
 		spin_unlock(&dir->i_lock);
-		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
+		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.fattr->label);
 	}
 	return status;
 }
 
 static void nfs4_free_createdata(struct nfs4_createdata *data)
 {
-	nfs4_label_free(data->label);
+	nfs4_label_free(data->fattr.label);
 	kfree(data);
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index fba89f82e7b7..38c74833f263 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6353,7 +6353,7 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9960f6628066..5aba81b74c98 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1040,7 +1040,6 @@ struct nfs4_create_res {
 	const struct nfs_server *	server;
 	struct nfs_fh *			fh;
 	struct nfs_fattr *		fattr;
-	struct nfs4_label		*label;
 	struct nfs4_change_info		dir_cinfo;
 };
 
-- 
cgit v1.2.3


From aa7ca3b2de190675543d84adaa1ff74e7867c76f Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:03 -0400
Subject: NFS: Remove the nfs4_label from the nfs4_link_res struct

Again, use the fattr's label field instead.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4proc.c       | 16 +++-------------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 -
 3 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 2e07550dd0d4..bde5b5723046 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4790,7 +4790,6 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 	};
 	struct nfs4_link_res res = {
 		.server = server,
-		.label = NULL,
 	};
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
@@ -4799,18 +4798,12 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 	};
 	int status = -ENOMEM;
 
-	res.fattr = nfs_alloc_fattr();
+	res.fattr = nfs_alloc_fattr_with_label(server);
 	if (res.fattr == NULL)
 		goto out;
 
-	res.label = nfs4_label_alloc(server, GFP_KERNEL);
-	if (IS_ERR(res.label)) {
-		status = PTR_ERR(res.label);
-		goto out;
-	}
-
 	nfs4_inode_make_writeable(inode);
-	nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
+	nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.fattr->label), inode,
 				NFS_INO_INVALID_CHANGE);
 	status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
 	if (!status) {
@@ -4819,12 +4812,9 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 		nfs4_inc_nlink(inode);
 		status = nfs_post_op_update_inode(inode, res.fattr);
 		if (!status)
-			nfs_setsecurity(inode, res.fattr, res.label);
+			nfs_setsecurity(inode, res.fattr, res.fattr->label);
 	}
 
-
-	nfs4_label_free(res.label);
-
 out:
 	nfs_free_fattr(res.fattr);
 	return status;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38c74833f263..4c9d66fac3fd 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6323,7 +6323,7 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_restorefh(xdr);
 	if (status)
 		goto out;
-	decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5aba81b74c98..d55bf3fd5167 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1079,7 +1079,6 @@ struct nfs4_link_res {
 	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_label		*label;
 	struct nfs4_change_info		cinfo;
 	struct nfs_fattr *		dir_attr;
 };
-- 
cgit v1.2.3


From 9558a007dbc383d48e7f5a123d0b5ff656c71068 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:04 -0400
Subject: NFS: Remove the label from the nfs4_lookup_res struct

And usethe fattr's label field instead. I also adjust function calls to
remove labels along the way.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c            | 34 ++++++++++++----------------------
 fs/nfs/namespace.c      |  3 +--
 fs/nfs/nfs3proc.c       |  3 +--
 fs/nfs/nfs4proc.c       | 16 +++++++---------
 fs/nfs/nfs4xdr.c        |  4 ++--
 fs/nfs/proc.c           |  3 +--
 include/linux/nfs_xdr.h |  4 +---
 7 files changed, 25 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 92530c3c1694..8a327971d485 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1494,19 +1494,17 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 {
 	struct nfs_fh *fhandle;
 	struct nfs_fattr *fattr;
-	struct nfs4_label *label;
 	unsigned long dir_verifier;
 	int ret;
 
 	ret = -ENOMEM;
 	fhandle = nfs_alloc_fhandle();
-	fattr = nfs_alloc_fattr();
-	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
-	if (fhandle == NULL || fattr == NULL || IS_ERR(label))
+	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
+	if (fhandle == NULL || fattr == NULL)
 		goto out;
 
 	dir_verifier = nfs_save_change_attribute(dir);
-	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
+	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
 	if (ret < 0) {
 		switch (ret) {
 		case -ESTALE:
@@ -1525,7 +1523,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	if (nfs_refresh_inode(inode, fattr) < 0)
 		goto out;
 
-	nfs_setsecurity(inode, fattr, label);
+	nfs_setsecurity(inode, fattr, fattr->label);
 	nfs_set_verifier(dentry, dir_verifier);
 
 	/* set a readdirplus hint that we had a cache miss */
@@ -1534,7 +1532,6 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 out:
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fhandle);
-	nfs4_label_free(label);
 
 	/*
 	 * If the lookup failed despite the dentry change attribute being
@@ -1754,7 +1751,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	struct inode *inode = NULL;
 	struct nfs_fh *fhandle = NULL;
 	struct nfs_fattr *fattr = NULL;
-	struct nfs4_label *label = NULL;
 	unsigned long dir_verifier;
 	int error;
 
@@ -1773,27 +1769,23 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 
 	res = ERR_PTR(-ENOMEM);
 	fhandle = nfs_alloc_fhandle();
-	fattr = nfs_alloc_fattr();
+	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
 	if (fhandle == NULL || fattr == NULL)
 		goto out;
 
-	label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT);
-	if (IS_ERR(label))
-		goto out;
-
 	dir_verifier = nfs_save_change_attribute(dir);
 	trace_nfs_lookup_enter(dir, dentry, flags);
-	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, label);
+	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
 	if (error == -ENOENT)
 		goto no_entry;
 	if (error < 0) {
 		res = ERR_PTR(error);
-		goto out_label;
+		goto out;
 	}
-	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label);
 	res = ERR_CAST(inode);
 	if (IS_ERR(res))
-		goto out_label;
+		goto out;
 
 	/* Notify readdir to use READDIRPLUS */
 	nfs_force_use_readdirplus(dir);
@@ -1802,14 +1794,12 @@ no_entry:
 	res = d_splice_alias(inode, dentry);
 	if (res != NULL) {
 		if (IS_ERR(res))
-			goto out_label;
+			goto out;
 		dentry = res;
 	}
 	nfs_set_verifier(dentry, dir_verifier);
-out_label:
-	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
-	nfs4_label_free(label);
 out:
+	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
 	nfs_free_fattr(fattr);
 	nfs_free_fhandle(fhandle);
 	return res;
@@ -2058,7 +2048,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 	d_drop(dentry);
 
 	if (fhandle->size == 0) {
-		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr, NULL);
+		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
 		if (error)
 			goto out_error;
 	}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index bc0c698f3350..3295af4110f1 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -308,8 +308,7 @@ int nfs_submount(struct fs_context *fc, struct nfs_server *server)
 
 	/* Look it up again to get its attributes */
 	err = server->nfs_client->rpc_ops->lookup(d_inode(parent), dentry,
-						  ctx->mntfh, ctx->clone_data.fattr,
-						  NULL);
+						  ctx->mntfh, ctx->clone_data.fattr);
 	dput(parent);
 	if (err != 0)
 		return err;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index f7524310ddf4..717eb651f0fd 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -193,8 +193,7 @@ __nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
 
 static int
 nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
-		 struct nfs_fh *fhandle, struct nfs_fattr *fattr,
-		 struct nfs4_label *label)
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	unsigned short task_flags = 0;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bde5b5723046..7af73fd34b22 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4294,7 +4294,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
 		struct dentry *dentry, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr, struct nfs4_label *label)
+		struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 	int		       status;
@@ -4306,7 +4306,6 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
 	struct nfs4_lookup_res res = {
 		.server = server,
 		.fattr = fattr,
-		.label = label,
 		.fh = fhandle,
 	};
 	struct rpc_message msg = {
@@ -4323,7 +4322,7 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
 	if (nfs_lookup_is_soft_revalidate(dentry))
 		task_flags |= RPC_TASK_TIMEOUT;
 
-	args.bitmask = nfs4_bitmask(server, label);
+	args.bitmask = nfs4_bitmask(server, fattr->label);
 
 	nfs_fattr_init(fattr);
 
@@ -4345,7 +4344,7 @@ static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr)
 
 static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
 				   struct dentry *dentry, struct nfs_fh *fhandle,
-				   struct nfs_fattr *fattr, struct nfs4_label *label)
+				   struct nfs_fattr *fattr)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
@@ -4354,7 +4353,7 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir,
 	const struct qstr *name = &dentry->d_name;
 	int err;
 	do {
-		err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr, label);
+		err = _nfs4_proc_lookup(client, dir, dentry, fhandle, fattr);
 		trace_nfs4_lookup(dir, name, err);
 		switch (err) {
 		case -NFS4ERR_BADNAME:
@@ -4390,13 +4389,12 @@ out:
 }
 
 static int nfs4_proc_lookup(struct inode *dir, struct dentry *dentry,
-			    struct nfs_fh *fhandle, struct nfs_fattr *fattr,
-			    struct nfs4_label *label)
+			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	int status;
 	struct rpc_clnt *client = NFS_CLIENT(dir);
 
-	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, label);
+	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
 	if (client != NFS_CLIENT(dir)) {
 		rpc_shutdown_client(client);
 		nfs_fixup_secinfo_attributes(fattr);
@@ -4411,7 +4409,7 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
 	struct rpc_clnt *client = NFS_CLIENT(dir);
 	int status;
 
-	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr, NULL);
+	status = nfs4_proc_lookup_common(&client, dir, dentry, fhandle, fattr);
 	if (status < 0)
 		return ERR_PTR(status);
 	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4c9d66fac3fd..960e2b2a7a58 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6171,7 +6171,7 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
@@ -6229,7 +6229,7 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp,
 	status = decode_getfh(xdr, res->fh);
 	if (status == 0)
 		status = decode_getfattr_label(xdr, res->fattr,
-						res->label, res->server);
+						res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index ecc4e717808c..98a8901ede2e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -154,8 +154,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 static int
 nfs_proc_lookup(struct inode *dir, struct dentry *dentry,
-		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
-		struct nfs4_label *label)
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct nfs_diropargs	arg = {
 		.fh		= NFS_FH(dir),
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d55bf3fd5167..95219d5a8668 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1095,7 +1095,6 @@ struct nfs4_lookup_res {
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
 	struct nfs_fh *			fh;
-	struct nfs4_label		*label;
 };
 
 struct nfs4_lookupp_arg {
@@ -1740,8 +1739,7 @@ struct nfs_rpc_ops {
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
 			    struct iattr *);
 	int	(*lookup)  (struct inode *, struct dentry *,
-			    struct nfs_fh *, struct nfs_fattr *,
-			    struct nfs4_label *);
+			    struct nfs_fh *, struct nfs_fattr *);
 	int	(*lookupp) (struct inode *, struct nfs_fh *,
 			    struct nfs_fattr *, struct nfs4_label *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
-- 
cgit v1.2.3


From ba4bc8dc4d937df2b407393435a302550be0ad82 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:05 -0400
Subject: NFS: Remove the nfs4_label from the nfs4_lookupp_res struct

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/export.c         | 26 +++++++-------------------
 fs/nfs/nfs3proc.c       |  2 +-
 fs/nfs/nfs4proc.c       | 10 ++++------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  3 +--
 5 files changed, 14 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index d772c20bbfd1..895b404888dd 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -131,7 +131,6 @@ nfs_get_parent(struct dentry *dentry)
 	struct super_block *sb = inode->i_sb;
 	struct nfs_server *server = NFS_SB(sb);
 	struct nfs_fattr *fattr = NULL;
-	struct nfs4_label *label = NULL;
 	struct dentry *parent;
 	struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops;
 	struct nfs_fh fh;
@@ -139,31 +138,20 @@ nfs_get_parent(struct dentry *dentry)
 	if (!ops->lookupp)
 		return ERR_PTR(-EACCES);
 
-	fattr = nfs_alloc_fattr();
-	if (fattr == NULL) {
-		parent = ERR_PTR(-ENOMEM);
-		goto out;
-	}
-
-	label = nfs4_label_alloc(server, GFP_KERNEL);
-	if (IS_ERR(label)) {
-		parent = ERR_CAST(label);
-		goto out_free_fattr;
-	}
+	fattr = nfs_alloc_fattr_with_label(server);
+	if (fattr == NULL)
+		return ERR_PTR(-ENOMEM);
 
-	ret = ops->lookupp(inode, &fh, fattr, label);
+	ret = ops->lookupp(inode, &fh, fattr);
 	if (ret) {
 		parent = ERR_PTR(ret);
-		goto out_free_label;
+		goto out;
 	}
 
-	pinode = nfs_fhget(sb, &fh, fattr, label);
+	pinode = nfs_fhget(sb, &fh, fattr, fattr->label);
 	parent = d_obtain_alias(pinode);
-out_free_label:
-	nfs4_label_free(label);
-out_free_fattr:
-	nfs_free_fattr(fattr);
 out:
+	nfs_free_fattr(fattr);
 	return parent;
 }
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 717eb651f0fd..516f3340b226 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -208,7 +208,7 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
-			     struct nfs_fattr *fattr, struct nfs4_label *label)
+			     struct nfs_fattr *fattr)
 {
 	const char dotdot[] = "..";
 	const size_t len = strlen(dotdot);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7af73fd34b22..caa5a1467f94 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4416,8 +4416,7 @@ nfs4_proc_lookup_mountpoint(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_lookupp(struct inode *inode,
-		struct nfs_fh *fhandle, struct nfs_fattr *fattr,
-		struct nfs4_label *label)
+		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -4429,7 +4428,6 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 	struct nfs4_lookupp_res res = {
 		.server = server,
 		.fattr = fattr,
-		.label = label,
 		.fh = fhandle,
 	};
 	struct rpc_message msg = {
@@ -4442,7 +4440,7 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 	if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
 		task_flags |= RPC_TASK_TIMEOUT;
 
-	args.bitmask = nfs4_bitmask(server, label);
+	args.bitmask = nfs4_bitmask(server, fattr->label);
 
 	nfs_fattr_init(fattr);
 
@@ -4454,14 +4452,14 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 }
 
 static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
-			     struct nfs_fattr *fattr, struct nfs4_label *label)
+			     struct nfs_fattr *fattr)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
 	int err;
 	do {
-		err = _nfs4_proc_lookupp(inode, fhandle, fattr, label);
+		err = _nfs4_proc_lookupp(inode, fhandle, fattr);
 		trace_nfs4_lookupp(inode, err);
 		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 960e2b2a7a58..0044747f9314 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6201,7 +6201,7 @@ static int nfs4_xdr_dec_lookupp(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_getfh(xdr, res->fh);
 	if (status)
 		goto out;
-	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 95219d5a8668..f0a685d9b8bd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1108,7 +1108,6 @@ struct nfs4_lookupp_res {
 	const struct nfs_server		*server;
 	struct nfs_fattr		*fattr;
 	struct nfs_fh			*fh;
-	struct nfs4_label		*label;
 };
 
 struct nfs4_lookup_root_arg {
@@ -1741,7 +1740,7 @@ struct nfs_rpc_ops {
 	int	(*lookup)  (struct inode *, struct dentry *,
 			    struct nfs_fh *, struct nfs_fattr *);
 	int	(*lookupp) (struct inode *, struct nfs_fh *,
-			    struct nfs_fattr *, struct nfs4_label *);
+			    struct nfs_fattr *);
 	int	(*access)  (struct inode *, struct nfs_access_entry *);
 	int	(*readlink)(struct inode *, struct page *, unsigned int,
 			    unsigned int);
-- 
cgit v1.2.3


From 76baa2b29c7161bc65a3051d311297b7d7fc827a Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:06 -0400
Subject: NFS: Remove the f_label from the nfs4_opendata and nfs_openres

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs4_fs.h        |  1 -
 fs/nfs/nfs4proc.c       | 35 +++++++++++------------------------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 -
 4 files changed, 12 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba78df4b13d9..b621e29e6187 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -234,7 +234,6 @@ struct nfs4_opendata {
 	struct nfs4_string group_name;
 	struct nfs4_label *a_label;
 	struct nfs_fattr f_attr;
-	struct nfs4_label *f_label;
 	struct dentry *dir;
 	struct dentry *dentry;
 	struct nfs4_state_owner *owner;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index caa5a1467f94..cb0613e0ef8f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1329,7 +1329,6 @@ nfs4_map_atomic_open_claim(struct nfs_server *server,
 static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 {
 	p->o_res.f_attr = &p->f_attr;
-	p->o_res.f_label = p->f_label;
 	p->o_res.seqid = p->o_arg.seqid;
 	p->c_res.seqid = p->c_arg.seqid;
 	p->o_res.server = p->o_arg.server;
@@ -1355,8 +1354,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	if (p == NULL)
 		goto err;
 
-	p->f_label = nfs4_label_alloc(server, gfp_mask);
-	if (IS_ERR(p->f_label))
+	p->f_attr.label = nfs4_label_alloc(server, gfp_mask);
+	if (IS_ERR(p->f_attr.label))
 		goto err_free_p;
 
 	p->a_label = nfs4_label_alloc(server, gfp_mask);
@@ -1434,7 +1433,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 err_free_label:
 	nfs4_label_free(p->a_label);
 err_free_f:
-	nfs4_label_free(p->f_label);
+	nfs4_label_free(p->f_attr.label);
 err_free_p:
 	kfree(p);
 err:
@@ -1456,7 +1455,7 @@ static void nfs4_opendata_free(struct kref *kref)
 	nfs4_put_state_owner(p->owner);
 
 	nfs4_label_free(p->a_label);
-	nfs4_label_free(p->f_label);
+	nfs4_label_free(p->f_attr.label);
 
 	dput(p->dir);
 	dput(p->dentry);
@@ -2009,7 +2008,7 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data)
 		if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 			return ERR_PTR(-EAGAIN);
 		inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh,
-				&data->f_attr, data->f_label);
+				&data->f_attr, data->f_attr.label);
 		break;
 	default:
 		inode = d_inode(data->dentry);
@@ -2709,7 +2708,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data,
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
 		nfs4_sequence_free_slot(&o_res->seq_res);
 		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr,
-				o_res->f_label, NULL);
+				o_res->f_attr->label, NULL);
 	}
 	return 0;
 }
@@ -3125,7 +3124,6 @@ static int _nfs4_do_open(struct inode *dir,
 	enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
 	struct iattr *sattr = c->sattr;
 	struct nfs4_label *label = c->label;
-	struct nfs4_label *olabel = NULL;
 	int status;
 
 	/* Protect against reboot recovery conflicts */
@@ -3148,19 +3146,11 @@ static int _nfs4_do_open(struct inode *dir,
 	if (opendata == NULL)
 		goto err_put_state_owner;
 
-	if (label) {
-		olabel = nfs4_label_alloc(server, GFP_KERNEL);
-		if (IS_ERR(olabel)) {
-			status = PTR_ERR(olabel);
-			goto err_opendata_put;
-		}
-	}
-
 	if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) {
 		if (!opendata->f_attr.mdsthreshold) {
 			opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
 			if (!opendata->f_attr.mdsthreshold)
-				goto err_free_label;
+				goto err_opendata_put;
 		}
 		opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
 	}
@@ -3169,7 +3159,7 @@ static int _nfs4_do_open(struct inode *dir,
 
 	status = _nfs4_open_and_get_state(opendata, flags, ctx);
 	if (status != 0)
-		goto err_free_label;
+		goto err_opendata_put;
 	state = ctx->state;
 
 	if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) &&
@@ -3186,11 +3176,12 @@ static int _nfs4_do_open(struct inode *dir,
 			nfs_fattr_init(opendata->o_res.f_attr);
 			status = nfs4_do_setattr(state->inode, cred,
 					opendata->o_res.f_attr, sattr,
-					ctx, label, olabel);
+					ctx, label, opendata->o_res.f_attr->label);
 			if (status == 0) {
 				nfs_setattr_update_inode(state->inode, sattr,
 						opendata->o_res.f_attr);
-				nfs_setsecurity(state->inode, opendata->o_res.f_attr, olabel);
+				nfs_setsecurity(state->inode, opendata->o_res.f_attr,
+						opendata->o_res.f_attr->label);
 			}
 			sattr->ia_valid = ia_old;
 		}
@@ -3203,13 +3194,9 @@ static int _nfs4_do_open(struct inode *dir,
 		opendata->f_attr.mdsthreshold = NULL;
 	}
 
-	nfs4_label_free(olabel);
-
 	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	return 0;
-err_free_label:
-	nfs4_label_free(olabel);
 err_opendata_put:
 	nfs4_opendata_put(opendata);
 err_put_state_owner:
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0044747f9314..09bd1d121318 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6524,7 +6524,7 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 		goto out;
 	if (res->access_request)
 		decode_access(xdr, &res->access_supported, &res->access_result);
-	decode_getfattr_label(xdr, res->f_attr, res->f_label, res->server);
+	decode_getfattr_label(xdr, res->f_attr, res->f_attr->label, res->server);
 	if (res->lg_res)
 		decode_layoutget(xdr, rqstp, res->lg_res);
 out:
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f0a685d9b8bd..cb28e01ea41e 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -488,7 +488,6 @@ struct nfs_openres {
 	struct nfs4_change_info	cinfo;
 	__u32                   rflags;
 	struct nfs_fattr *      f_attr;
-	struct nfs4_label	*f_label;
 	struct nfs_seqid *	seqid;
 	const struct nfs_server *server;
 	fmode_t			delegation_type;
-- 
cgit v1.2.3


From 2ef61e0eaa333e4e9c348c41a4b7abfb34b8736d Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:07 -0400
Subject: NFS: Remove the nfs4_label from the nfs4_getattr_res

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/client.c         |  2 +-
 fs/nfs/dir.c            |  2 +-
 fs/nfs/export.c         | 18 ++++--------------
 fs/nfs/inode.c          | 20 +++++---------------
 fs/nfs/nfs3proc.c       |  3 +--
 fs/nfs/nfs4_fs.h        |  3 +--
 fs/nfs/nfs4file.c       |  2 +-
 fs/nfs/nfs4proc.c       | 25 +++++++++++--------------
 fs/nfs/nfs4xdr.c        |  2 +-
 fs/nfs/proc.c           |  3 +--
 include/linux/nfs_xdr.h |  4 +---
 11 files changed, 28 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 960b9d87648e..1e4dc1ab9312 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1048,7 +1048,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
 
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		error = ctx->nfs_mod->rpc_ops->getattr(server, ctx->mntfh,
-						       fattr, NULL, NULL);
+						       fattr, NULL);
 		if (error < 0) {
 			dprintk("nfs_create_server: getattr error = %d\n", -error);
 			goto error;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a327971d485..aa95a898ad0f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2056,7 +2056,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 	if (!(fattr->valid & NFS_ATTR_FATTR)) {
 		struct nfs_server *server = NFS_SB(dentry->d_sb);
 		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
-				fattr, NULL, NULL);
+				fattr, NULL);
 		if (error < 0)
 			goto out_error;
 	}
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 895b404888dd..a0462f7e7e35 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -64,7 +64,6 @@ static struct dentry *
 nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 		 int fh_len, int fh_type)
 {
-	struct nfs4_label *label = NULL;
 	struct nfs_fattr *fattr = NULL;
 	struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
 	size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
@@ -79,7 +78,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 	if (fh_len < len || fh_type != len)
 		return NULL;
 
-	fattr = nfs_alloc_fattr();
+	fattr = nfs_alloc_fattr_with_label(NFS_SB(sb));
 	if (fattr == NULL) {
 		dentry = ERR_PTR(-ENOMEM);
 		goto out;
@@ -95,28 +94,19 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 	if (inode)
 		goto out_found;
 
-	label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL);
-	if (IS_ERR(label)) {
-		dentry = ERR_CAST(label);
-		goto out_free_fattr;
-	}
-
 	rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops;
-	ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label, NULL);
+	ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, NULL);
 	if (ret) {
 		dprintk("%s: getattr failed %d\n", __func__, ret);
 		trace_nfs_fh_to_dentry(sb, server_fh, fattr->fileid, ret);
 		dentry = ERR_PTR(ret);
-		goto out_free_label;
+		goto out_free_fattr;
 	}
 
-	inode = nfs_fhget(sb, server_fh, fattr, label);
+	inode = nfs_fhget(sb, server_fh, fattr, fattr->label);
 
 out_found:
 	dentry = d_obtain_alias(inode);
-
-out_free_label:
-	nfs4_label_free(label);
 out_free_fattr:
 	nfs_free_fattr(fattr);
 out:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 84c7efa2ea87..7d9dca781956 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1194,7 +1194,6 @@ int
 __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
 	int		 status = -ESTALE;
-	struct nfs4_label *label = NULL;
 	struct nfs_fattr *fattr = NULL;
 	struct nfs_inode *nfsi = NFS_I(inode);
 
@@ -1216,20 +1215,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	}
 
 	status = -ENOMEM;
-	fattr = nfs_alloc_fattr();
+	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
 	if (fattr == NULL)
 		goto out;
 
 	nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
 
-	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
-	if (IS_ERR(label)) {
-		status = PTR_ERR(label);
-		goto out;
-	}
-
-	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr,
-			label, inode);
+	status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), fattr, inode);
 	if (status != 0) {
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) getattr failed, error=%d\n",
 			 inode->i_sb->s_id,
@@ -1246,7 +1238,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 			else
 				nfs_zap_caches(inode);
 		}
-		goto err_out;
+		goto out;
 	}
 
 	status = nfs_refresh_inode(inode, fattr);
@@ -1254,20 +1246,18 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Lu) refresh failed, error=%d\n",
 			 inode->i_sb->s_id,
 			 (unsigned long long)NFS_FILEID(inode), status);
-		goto err_out;
+		goto out;
 	}
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
 
-	nfs_setsecurity(inode, fattr, label);
+	nfs_setsecurity(inode, fattr, fattr->label);
 
 	dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
 		inode->i_sb->s_id,
 		(unsigned long long)NFS_FILEID(inode));
 
-err_out:
-	nfs4_label_free(label);
 out:
 	nfs_free_fattr(fattr);
 	trace_nfs_revalidate_inode_exit(inode, status);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 516f3340b226..7bae21a2ba05 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -100,8 +100,7 @@ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
  */
 static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr, struct nfs4_label *label,
-		struct inode *inode)
+		struct nfs_fattr *fattr, struct inode *inode)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_GETATTR],
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b621e29e6187..ed5eaca6801e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -316,8 +316,7 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid,
 		const struct nfs_lock_context *l_ctx,
 		fmode_t fmode);
 extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-			     struct nfs_fattr *fattr, struct nfs4_label *label,
-			     struct inode *inode);
+			     struct nfs_fattr *fattr, struct inode *inode);
 extern int update_open_stateid(struct nfs4_state *state,
 				const nfs4_stateid *open_stateid,
 				const nfs4_stateid *deleg_stateid,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 92a1b992a141..e2451f66024c 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -331,7 +331,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
 	if (!fattr)
 		return ERR_PTR(-ENOMEM);
 
-	status = nfs4_proc_getattr(server, src_fh, fattr, NULL, NULL);
+	status = nfs4_proc_getattr(server, src_fh, fattr, NULL);
 	if (status < 0) {
 		res = ERR_PTR(status);
 		goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cb0613e0ef8f..f0262397faec 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -93,7 +93,8 @@ struct nfs4_opendata;
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
-static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label, struct inode *inode);
+static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
+			      struct nfs_fattr *fattr, struct inode *inode);
 static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
 			    struct nfs_fattr *fattr, struct iattr *sattr,
 			    struct nfs_open_context *ctx, struct nfs4_label *ilabel,
@@ -2707,8 +2708,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data,
 	}
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
 		nfs4_sequence_free_slot(&o_res->seq_res);
-		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr,
-				o_res->f_attr->label, NULL);
+		nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, NULL);
 	}
 	return 0;
 }
@@ -4090,7 +4090,6 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
 {
 	int error;
 	struct nfs_fattr *fattr = info->fattr;
-	struct nfs4_label *label = fattr->label;
 
 	error = nfs4_server_capabilities(server, mntfh);
 	if (error < 0) {
@@ -4098,7 +4097,7 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *mntfh,
 		return error;
 	}
 
-	error = nfs4_proc_getattr(server, mntfh, fattr, label, NULL);
+	error = nfs4_proc_getattr(server, mntfh, fattr, NULL);
 	if (error < 0) {
 		dprintk("nfs4_get_root: getattr error = %d\n", -error);
 		goto out;
@@ -4161,8 +4160,7 @@ out:
 }
 
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-				struct nfs_fattr *fattr, struct nfs4_label *label,
-				struct inode *inode)
+				struct nfs_fattr *fattr, struct inode *inode)
 {
 	__u32 bitmask[NFS4_BITMASK_SZ];
 	struct nfs4_getattr_arg args = {
@@ -4171,7 +4169,6 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	};
 	struct nfs4_getattr_res res = {
 		.fattr = fattr,
-		.label = label,
 		.server = server,
 	};
 	struct rpc_message msg = {
@@ -4188,7 +4185,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 	if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
 		task_flags |= RPC_TASK_TIMEOUT;
 
-	nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
+	nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), inode, 0);
 	nfs_fattr_init(fattr);
 	nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
 	return nfs4_do_call_sync(server->client, server, &msg,
@@ -4196,15 +4193,14 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 }
 
 int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-				struct nfs_fattr *fattr, struct nfs4_label *label,
-				struct inode *inode)
+				struct nfs_fattr *fattr, struct inode *inode)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
 	int err;
 	do {
-		err = _nfs4_proc_getattr(server, fhandle, fattr, label, inode);
+		err = _nfs4_proc_getattr(server, fhandle, fattr, inode);
 		trace_nfs4_getattr(server, fhandle, fattr, err);
 		err = nfs4_handle_exception(server, err,
 				&exception);
@@ -5972,17 +5968,18 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
 					size_t buflen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_fattr fattr;
 	struct nfs4_label label = {0, 0, buflen, buf};
 
 	u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL };
+	struct nfs_fattr fattr = {
+		.label = &label,
+	};
 	struct nfs4_getattr_arg arg = {
 		.fh		= NFS_FH(inode),
 		.bitmask	= bitmask,
 	};
 	struct nfs4_getattr_res res = {
 		.fattr		= &fattr,
-		.label		= &label,
 		.server		= server,
 	};
 	struct rpc_message msg = {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 09bd1d121318..d1b61b76bc82 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6386,7 +6386,7 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
 	status = decode_putfh(xdr);
 	if (status)
 		goto out;
-	status = decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	status = decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 98a8901ede2e..baee21c2c091 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -100,8 +100,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
  */
 static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
-		struct nfs_fattr *fattr, struct nfs4_label *label,
-		struct inode *inode)
+		struct nfs_fattr *fattr, struct inode *inode)
 {
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_GETATTR],
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index cb28e01ea41e..817f1bf5f187 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1063,7 +1063,6 @@ struct nfs4_getattr_res {
 	struct nfs4_sequence_res	seq_res;
 	const struct nfs_server *	server;
 	struct nfs_fattr *		fattr;
-	struct nfs4_label		*label;
 };
 
 struct nfs4_link_arg {
@@ -1732,8 +1731,7 @@ struct nfs_rpc_ops {
 	int	(*submount) (struct fs_context *, struct nfs_server *);
 	int	(*try_get_tree) (struct fs_context *);
 	int	(*getattr) (struct nfs_server *, struct nfs_fh *,
-			    struct nfs_fattr *, struct nfs4_label *,
-			    struct inode *);
+			    struct nfs_fattr *, struct inode *);
 	int	(*setattr) (struct dentry *, struct nfs_fattr *,
 			    struct iattr *);
 	int	(*lookup)  (struct inode *, struct dentry *,
-- 
cgit v1.2.3


From 1b00ad657997c8984a9e627a3bd37ea14f20beb2 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:08 -0400
Subject: NFS: Remove the nfs4_label from the nfs_setattrres

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/inode.c          |  2 +-
 fs/nfs/nfs4proc.c       | 56 ++++++++++++++-----------------------------------
 fs/nfs/nfs4xdr.c        |  2 +-
 include/linux/nfs_xdr.h |  1 -
 4 files changed, 18 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d9dca781956..df109287f2e0 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -650,7 +650,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 	if (S_ISREG(inode->i_mode))
 		nfs_sync_inode(inode);
 
-	fattr = nfs_alloc_fattr();
+	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
 	if (fattr == NULL) {
 		error = -ENOMEM;
 		goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0262397faec..a0762ecc0c73 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -97,8 +97,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 			      struct nfs_fattr *fattr, struct inode *inode);
 static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
 			    struct nfs_fattr *fattr, struct iattr *sattr,
-			    struct nfs_open_context *ctx, struct nfs4_label *ilabel,
-			    struct nfs4_label *olabel);
+			    struct nfs_open_context *ctx, struct nfs4_label *ilabel);
 #ifdef CONFIG_NFS_V4_1
 static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp,
 		const struct cred *cred,
@@ -3176,7 +3175,7 @@ static int _nfs4_do_open(struct inode *dir,
 			nfs_fattr_init(opendata->o_res.f_attr);
 			status = nfs4_do_setattr(state->inode, cred,
 					opendata->o_res.f_attr, sattr,
-					ctx, label, opendata->o_res.f_attr->label);
+					ctx, label);
 			if (status == 0) {
 				nfs_setattr_update_inode(state->inode, sattr,
 						opendata->o_res.f_attr);
@@ -3341,8 +3340,7 @@ zero_stateid:
 
 static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
 			   struct nfs_fattr *fattr, struct iattr *sattr,
-			   struct nfs_open_context *ctx, struct nfs4_label *ilabel,
-			   struct nfs4_label *olabel)
+			   struct nfs_open_context *ctx, struct nfs4_label *ilabel)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	__u32 bitmask[NFS4_BITMASK_SZ];
@@ -3356,7 +3354,6 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
 	};
 	struct nfs_setattrres  res = {
 		.fattr		= fattr,
-		.label		= olabel,
 		.server		= server,
 	};
 	struct nfs4_exception exception = {
@@ -3373,7 +3370,7 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
 		adjust_flags |= NFS_INO_INVALID_OTHER;
 
 	do {
-		nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
+		nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
 					inode, adjust_flags);
 
 		err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
@@ -4232,7 +4229,6 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	struct inode *inode = d_inode(dentry);
 	const struct cred *cred = NULL;
 	struct nfs_open_context *ctx = NULL;
-	struct nfs4_label *label = NULL;
 	int status;
 
 	if (pnfs_ld_layoutret_on_setattr(inode) &&
@@ -4258,20 +4254,15 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 			cred = ctx->cred;
 	}
 
-	label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
-	if (IS_ERR(label))
-		return PTR_ERR(label);
-
 	/* Return any delegations if we're going to change ACLs */
 	if ((sattr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
 		nfs4_inode_make_writeable(inode);
 
-	status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL, label);
+	status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL);
 	if (status == 0) {
 		nfs_setattr_update_inode(inode, sattr, fattr);
-		nfs_setsecurity(inode, fattr, label);
+		nfs_setsecurity(inode, fattr, fattr->label);
 	}
-	nfs4_label_free(label);
 	return status;
 }
 
@@ -6021,8 +6012,7 @@ static int nfs4_get_security_label(struct inode *inode, void *buf,
 
 static int _nfs4_do_set_security_label(struct inode *inode,
 		struct nfs4_label *ilabel,
-		struct nfs_fattr *fattr,
-		struct nfs4_label *olabel)
+		struct nfs_fattr *fattr)
 {
 
 	struct iattr sattr = {0};
@@ -6037,7 +6027,6 @@ static int _nfs4_do_set_security_label(struct inode *inode,
 	};
 	struct nfs_setattrres res = {
 		.fattr		= fattr,
-		.label		= olabel,
 		.server		= server,
 	};
 	struct rpc_message msg = {
@@ -6058,15 +6047,13 @@ static int _nfs4_do_set_security_label(struct inode *inode,
 
 static int nfs4_do_set_security_label(struct inode *inode,
 		struct nfs4_label *ilabel,
-		struct nfs_fattr *fattr,
-		struct nfs4_label *olabel)
+		struct nfs_fattr *fattr)
 {
 	struct nfs4_exception exception = { };
 	int err;
 
 	do {
-		err = _nfs4_do_set_security_label(inode, ilabel,
-				fattr, olabel);
+		err = _nfs4_do_set_security_label(inode, ilabel, fattr);
 		trace_nfs4_set_security_label(inode, err);
 		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
@@ -6077,32 +6064,21 @@ static int nfs4_do_set_security_label(struct inode *inode,
 static int
 nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
 {
-	struct nfs4_label ilabel, *olabel = NULL;
-	struct nfs_fattr fattr;
+	struct nfs4_label ilabel = {0, 0, buflen, (char *)buf };
+	struct nfs_fattr *fattr;
 	int status;
 
 	if (!nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL))
 		return -EOPNOTSUPP;
 
-	nfs_fattr_init(&fattr);
-
-	ilabel.pi = 0;
-	ilabel.lfs = 0;
-	ilabel.label = (char *)buf;
-	ilabel.len = buflen;
-
-	olabel = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
-	if (IS_ERR(olabel)) {
-		status = -PTR_ERR(olabel);
-		goto out;
-	}
+	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
+	if (fattr == NULL)
+		return -ENOMEM;
 
-	status = nfs4_do_set_security_label(inode, &ilabel, &fattr, olabel);
+	status = nfs4_do_set_security_label(inode, &ilabel, fattr);
 	if (status == 0)
-		nfs_setsecurity(inode, &fattr, olabel);
+		nfs_setsecurity(inode, fattr, fattr->label);
 
-	nfs4_label_free(olabel);
-out:
 	return status;
 }
 #endif	/* CONFIG_NFS_V4_SECURITY_LABEL */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d1b61b76bc82..e3df7ada5988 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6608,7 +6608,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp,
 	status = decode_setattr(xdr);
 	if (status)
 		goto out;
-	decode_getfattr_label(xdr, res->fattr, res->label, res->server);
+	decode_getfattr_label(xdr, res->fattr, res->fattr->label, res->server);
 out:
 	return status;
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 817f1bf5f187..967a0098f0a9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -832,7 +832,6 @@ struct nfs_getaclres {
 struct nfs_setattrres {
 	struct nfs4_sequence_res	seq_res;
 	struct nfs_fattr *              fattr;
-	struct nfs4_label		*label;
 	const struct nfs_server *	server;
 };
 
-- 
cgit v1.2.3


From d91bfc46426d3d772fc0d9d165e3435fd0f0a79e Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:09 -0400
Subject: NFS: Remove the nfs4_label argument from nfs_instantiate()

Pull the label from the fattr instead.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 5 ++---
 fs/nfs/nfs4proc.c      | 2 +-
 fs/nfs/proc.c          | 8 ++++----
 include/linux/nfs_fs.h | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index aa95a898ad0f..48ea69af9446 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2075,12 +2075,11 @@ EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
  * Code common to create, mkdir, and mknod.
  */
 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
-				struct nfs_fattr *fattr,
-				struct nfs4_label *label)
+				struct nfs_fattr *fattr)
 {
 	struct dentry *d;
 
-	d = nfs_add_or_obtain(dentry, fhandle, fattr, label);
+	d = nfs_add_or_obtain(dentry, fhandle, fattr, fattr->label);
 	if (IS_ERR(d))
 		return PTR_ERR(d);
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a0762ecc0c73..4d09c81502cf 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -4859,7 +4859,7 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
 					      data->res.fattr->time_start,
 					      NFS_INO_INVALID_DATA);
 		spin_unlock(&dir->i_lock);
-		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.fattr->label);
+		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
 	}
 	return status;
 }
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index baee21c2c091..73dcaa99fa9b 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -255,7 +255,7 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
-		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
+		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
 	nfs_free_createdata(data);
 out:
 	dprintk("NFS reply create: %d\n", status);
@@ -302,7 +302,7 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	}
 	if (status == 0)
-		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
+		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
 	nfs_free_createdata(data);
 out:
 	dprintk("NFS reply mknod: %d\n", status);
@@ -434,7 +434,7 @@ nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
 	 * should fill in the data with a LOOKUP call on the wire.
 	 */
 	if (status == 0)
-		status = nfs_instantiate(dentry, fh, fattr, NULL);
+		status = nfs_instantiate(dentry, fh, fattr);
 
 out_free:
 	nfs_free_fattr(fattr);
@@ -463,7 +463,7 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 	nfs_mark_for_revalidate(dir);
 	if (status == 0)
-		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, NULL);
+		status = nfs_instantiate(dentry, data->res.fh, data->res.fattr);
 	nfs_free_createdata(data);
 out:
 	dprintk("NFS reply mkdir: %d\n", status);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 88c3aed8ad39..a8a9b71aeea6 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -532,7 +532,7 @@ extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
 			struct nfs_fh *fh, struct nfs_fattr *fattr,
 			struct nfs4_label *label);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
-			struct nfs_fattr *fattr, struct nfs4_label *label);
+			struct nfs_fattr *fattr);
 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
 extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res,
-- 
cgit v1.2.3


From cc6f32989c3202349b90edde0c4702b098410fe8 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:10 -0400
Subject: NFS: Remove the nfs4_label argument from nfs_add_or_obtain()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 7 +++----
 fs/nfs/nfs3proc.c      | 2 +-
 include/linux/nfs_fs.h | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 48ea69af9446..1c74f9d2f3a1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2036,8 +2036,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 
 struct dentry *
 nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
-				struct nfs_fattr *fattr,
-				struct nfs4_label *label)
+				struct nfs_fattr *fattr)
 {
 	struct dentry *parent = dget_parent(dentry);
 	struct inode *dir = d_inode(parent);
@@ -2060,7 +2059,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 		if (error < 0)
 			goto out_error;
 	}
-	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label);
 	d = d_splice_alias(inode, dentry);
 out:
 	dput(parent);
@@ -2079,7 +2078,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
 {
 	struct dentry *d;
 
-	d = nfs_add_or_obtain(dentry, fhandle, fattr, fattr->label);
+	d = nfs_add_or_obtain(dentry, fhandle, fattr);
 	if (IS_ERR(d))
 		return PTR_ERR(d);
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 7bae21a2ba05..7100514d306b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -321,7 +321,7 @@ nfs3_do_create(struct inode *dir, struct dentry *dentry, struct nfs3_createdata
 	if (status != 0)
 		return ERR_PTR(status);
 
-	return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr, NULL);
+	return nfs_add_or_obtain(dentry, data->res.fh, data->res.fattr);
 }
 
 static void nfs3_free_createdata(struct nfs3_createdata *data)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a8a9b71aeea6..6eda001b306b 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -529,8 +529,7 @@ extern void nfs_set_verifier(struct dentry * dentry, unsigned long verf);
 extern void nfs_clear_verifier_delegated(struct inode *inode);
 #endif /* IS_ENABLED(CONFIG_NFS_V4) */
 extern struct dentry *nfs_add_or_obtain(struct dentry *dentry,
-			struct nfs_fh *fh, struct nfs_fattr *fattr,
-			struct nfs4_label *label);
+			struct nfs_fh *fh, struct nfs_fattr *fattr);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh,
 			struct nfs_fattr *fattr);
 extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags);
-- 
cgit v1.2.3


From cf7ab00aabbf9c8f1ec72edff15849ddc23aa6a7 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:11 -0400
Subject: NFS: Remove the nfs4_label argument from nfs_fhget()

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           | 6 +++---
 fs/nfs/export.c        | 4 ++--
 fs/nfs/getroot.c       | 2 +-
 fs/nfs/inode.c         | 4 ++--
 fs/nfs/nfs4file.c      | 3 +--
 fs/nfs/nfs4proc.c      | 2 +-
 include/linux/nfs_fs.h | 2 +-
 7 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 1c74f9d2f3a1..bd89f39e8ba9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -697,7 +697,7 @@ again:
 		goto out;
 	}
 
-	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->fattr->label);
+	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
 	alias = d_splice_alias(inode, dentry);
 	d_lookup_done(dentry);
 	if (alias) {
@@ -1782,7 +1782,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 		res = ERR_PTR(error);
 		goto out;
 	}
-	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label);
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	res = ERR_CAST(inode);
 	if (IS_ERR(res))
 		goto out;
@@ -2059,7 +2059,7 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
 		if (error < 0)
 			goto out_error;
 	}
-	inode = nfs_fhget(dentry->d_sb, fhandle, fattr, fattr->label);
+	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
 	d = d_splice_alias(inode, dentry);
 out:
 	dput(parent);
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index a0462f7e7e35..171c424cb6d5 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -103,7 +103,7 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 		goto out_free_fattr;
 	}
 
-	inode = nfs_fhget(sb, server_fh, fattr, fattr->label);
+	inode = nfs_fhget(sb, server_fh, fattr);
 
 out_found:
 	dentry = d_obtain_alias(inode);
@@ -138,7 +138,7 @@ nfs_get_parent(struct dentry *dentry)
 		goto out;
 	}
 
-	pinode = nfs_fhget(sb, &fh, fattr, fattr->label);
+	pinode = nfs_fhget(sb, &fh, fattr);
 	parent = d_obtain_alias(pinode);
 out:
 	nfs_free_fattr(fattr);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 7604cb6a0ac2..0aedee201166 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -91,7 +91,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 		goto out_fattr;
 	}
 
-	inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr, NULL);
+	inode = nfs_fhget(s, ctx->mntfh, fsinfo.fattr);
 	if (IS_ERR(inode)) {
 		dprintk("nfs_get_root: get root inode failed\n");
 		error = PTR_ERR(inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df109287f2e0..be28f0251dee 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -448,7 +448,7 @@ static void nfs_inode_init_dir(struct nfs_inode *nfsi)
  * instead of inode number.
  */
 struct inode *
-nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label)
+nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 {
 	struct nfs_find_desc desc = {
 		.fh	= fh,
@@ -581,7 +581,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
 			   fattr->size != 0)
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
 
-		nfs_setsecurity(inode, fattr, label);
+		nfs_setsecurity(inode, fattr, fattr->label);
 
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = now;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index e2451f66024c..e79ae4cbc395 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -344,8 +344,7 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt,
 		goto out;
 	snprintf(read_name, len, SSC_READ_NAME_BODY, read_name_gen++);
 
-	r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr,
-			NULL);
+	r_ino = nfs_fhget(ss_mnt->mnt_root->d_inode->i_sb, src_fh, fattr);
 	if (IS_ERR(r_ino)) {
 		res = ERR_CAST(r_ino);
 		goto out_free_name;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4d09c81502cf..b25adb2250ef 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2008,7 +2008,7 @@ nfs4_opendata_get_inode(struct nfs4_opendata *data)
 		if (!(data->f_attr.valid & NFS_ATTR_FATTR))
 			return ERR_PTR(-EAGAIN);
 		inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh,
-				&data->f_attr, data->f_attr.label);
+				&data->f_attr);
 		break;
 	default:
 		inode = d_inode(data->dentry);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6eda001b306b..c36c6a559fc9 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -388,7 +388,7 @@ extern void nfs_zap_caches(struct inode *);
 extern void nfs_set_inode_stale(struct inode *inode);
 extern void nfs_invalidate_atime(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
-				struct nfs_fattr *, struct nfs4_label *);
+				struct nfs_fattr *);
 struct inode *nfs_ilookup(struct super_block *sb, struct nfs_fattr *, struct nfs_fh *);
 extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
-- 
cgit v1.2.3


From dd225cb3b02b827271a2284f89102fc81efcbf6f Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@Netapp.com>
Date: Fri, 22 Oct 2021 13:11:12 -0400
Subject: NFS: Remove the nfs4_label argument from nfs_setsecurity

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c           |  5 ++---
 fs/nfs/getroot.c       |  2 +-
 fs/nfs/inode.c         | 20 +++++++++-----------
 fs/nfs/nfs4proc.c      |  9 ++++-----
 include/linux/nfs_fs.h |  3 +--
 5 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bd89f39e8ba9..731d31015b6a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -682,8 +682,7 @@ again:
 			nfs_set_verifier(dentry, dir_verifier);
 			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
 			if (!status)
-				nfs_setsecurity(d_inode(dentry), entry->fattr,
-						entry->fattr->label);
+				nfs_setsecurity(d_inode(dentry), entry->fattr);
 			goto out;
 		} else {
 			d_invalidate(dentry);
@@ -1523,7 +1522,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
 	if (nfs_refresh_inode(inode, fattr) < 0)
 		goto out;
 
-	nfs_setsecurity(inode, fattr, fattr->label);
+	nfs_setsecurity(inode, fattr);
 	nfs_set_verifier(dentry, dir_verifier);
 
 	/* set a readdirplus hint that we had a cache miss */
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 0aedee201166..11ff2b2e060f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -148,7 +148,7 @@ int nfs_get_root(struct super_block *s, struct fs_context *fc)
 		!(kflags_out & SECURITY_LSM_NATIVE_LABELS))
 		server->caps &= ~NFS_CAP_SECURITY_LABEL;
 
-	nfs_setsecurity(inode, fsinfo.fattr, fsinfo.fattr->label);
+	nfs_setsecurity(inode, fsinfo.fattr);
 	error = 0;
 
 out_fattr:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index be28f0251dee..dd53704c3f40 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -355,23 +355,22 @@ static void nfs_clear_label_invalid(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 }
 
-void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-					struct nfs4_label *label)
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr)
 {
 	int error;
 
-	if (label == NULL)
+	if (fattr->label == NULL)
 		return;
 
 	if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) {
-		error = security_inode_notifysecctx(inode, label->label,
-				label->len);
+		error = security_inode_notifysecctx(inode, fattr->label->label,
+				fattr->label->len);
 		if (error)
 			printk(KERN_ERR "%s() %s %d "
 					"security_inode_notifysecctx() %d\n",
 					__func__,
-					(char *)label->label,
-					label->len, error);
+					(char *)fattr->label->label,
+					fattr->label->len, error);
 		nfs_clear_label_invalid(inode);
 	}
 }
@@ -398,8 +397,7 @@ struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags)
 }
 EXPORT_SYMBOL_GPL(nfs4_label_alloc);
 #else
-void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-					struct nfs4_label *label)
+void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr)
 {
 }
 #endif
@@ -581,7 +579,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			   fattr->size != 0)
 			nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
 
-		nfs_setsecurity(inode, fattr, fattr->label);
+		nfs_setsecurity(inode, fattr);
 
 		nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 		nfsi->attrtimeo_timestamp = now;
@@ -1252,7 +1250,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 	if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
 		nfs_zap_acl_cache(inode);
 
-	nfs_setsecurity(inode, fattr, fattr->label);
+	nfs_setsecurity(inode, fattr);
 
 	dfprintk(PAGECACHE, "NFS: (%s/%Lu) revalidation complete\n",
 		inode->i_sb->s_id,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b25adb2250ef..535436dbdc9a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3179,8 +3179,7 @@ static int _nfs4_do_open(struct inode *dir,
 			if (status == 0) {
 				nfs_setattr_update_inode(state->inode, sattr,
 						opendata->o_res.f_attr);
-				nfs_setsecurity(state->inode, opendata->o_res.f_attr,
-						opendata->o_res.f_attr->label);
+				nfs_setsecurity(state->inode, opendata->o_res.f_attr);
 			}
 			sattr->ia_valid = ia_old;
 		}
@@ -4261,7 +4260,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 	status = nfs4_do_setattr(inode, cred, fattr, sattr, ctx, NULL);
 	if (status == 0) {
 		nfs_setattr_update_inode(inode, sattr, fattr);
-		nfs_setsecurity(inode, fattr, fattr->label);
+		nfs_setsecurity(inode, fattr);
 	}
 	return status;
 }
@@ -4782,7 +4781,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
 		nfs4_inc_nlink(inode);
 		status = nfs_post_op_update_inode(inode, res.fattr);
 		if (!status)
-			nfs_setsecurity(inode, res.fattr, res.fattr->label);
+			nfs_setsecurity(inode, res.fattr);
 	}
 
 out:
@@ -6077,7 +6076,7 @@ nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen)
 
 	status = nfs4_do_set_security_label(inode, &ilabel, fattr);
 	if (status == 0)
-		nfs_setsecurity(inode, fattr, fattr->label);
+		nfs_setsecurity(inode, fattr);
 
 	return status;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c36c6a559fc9..05f249f20f55 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -409,8 +409,7 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map
 extern int nfs_revalidate_mapping_rcu(struct inode *inode);
 extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
-extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
-				struct nfs4_label *label);
+extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, const struct cred *cred, fmode_t mode);
-- 
cgit v1.2.3


From 3990ed4c426652fcd469f8c9dc08156294b36c28 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 5 Nov 2021 18:40:14 -0700
Subject: bpf: Stop caching subprog index in the bpf_pseudo_func insn

This patch is to fix an out-of-bound access issue when jit-ing the
bpf_pseudo_func insn (i.e. ld_imm64 with src_reg == BPF_PSEUDO_FUNC)

In jit_subprog(), it currently reuses the subprog index cached in
insn[1].imm.  This subprog index is an index into a few array related
to subprogs.  For example, in jit_subprog(), it is an index to the newly
allocated 'struct bpf_prog **func' array.

The subprog index was cached in insn[1].imm after add_subprog().  However,
this could become outdated (and too big in this case) if some subprogs
are completely removed during dead code elimination (in
adjust_subprog_starts_after_remove).  The cached index in insn[1].imm
is not updated accordingly and causing out-of-bound issue in the later
jit_subprog().

Unlike bpf_pseudo_'func' insn, the current bpf_pseudo_'call' insn
is handling the DCE properly by calling find_subprog(insn->imm) to
figure out the index instead of caching the subprog index.
The existing bpf_adj_branches() will adjust the insn->imm
whenever insn is added or removed.

Instead of having two ways handling subprog index,
this patch is to make bpf_pseudo_func works more like
bpf_pseudo_call.

First change is to stop caching the subprog index result
in insn[1].imm after add_subprog().  The verification
process will use find_subprog(insn->imm) to figure
out the subprog index.

Second change is in bpf_adj_branches() and have it to
adjust the insn->imm for the bpf_pseudo_func insn also
whenever insn is added or removed.

Third change is in jit_subprog().  Like the bpf_pseudo_call handling,
bpf_pseudo_func temporarily stores the find_subprog() result
in insn->off.  It is fine because the prog's insn has been finalized
at this point.  insn->off will be reset back to 0 later to avoid
confusing the userspace prog dump tool.

Fixes: 69c087ba6225 ("bpf: Add bpf_for_each_map_elem() helper")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211106014014.651018-1-kafai@fb.com
---
 include/linux/bpf.h   |  6 ++++++
 kernel/bpf/core.c     |  7 +++++++
 kernel/bpf/verifier.c | 37 ++++++++++++++-----------------------
 3 files changed, 27 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2be6dfd68df9..f715e8863f4d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -484,6 +484,12 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
 	aux->ctx_field_size = size;
 }
 
+static inline bool bpf_pseudo_func(const struct bpf_insn *insn)
+{
+	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+	       insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
 struct bpf_prog_ops {
 	int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
 			union bpf_attr __user *uattr);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 327e3996eadb..2405e39d800f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -390,6 +390,13 @@ static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old,
 			i = end_new;
 			insn = prog->insnsi + end_old;
 		}
+		if (bpf_pseudo_func(insn)) {
+			ret = bpf_adj_delta_to_imm(insn, pos, end_old,
+						   end_new, i, probe_pass);
+			if (ret)
+				return ret;
+			continue;
+		}
 		code = insn->code;
 		if ((BPF_CLASS(code) != BPF_JMP &&
 		     BPF_CLASS(code) != BPF_JMP32) ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5f8d9128860a..890b3ec375a3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -240,12 +240,6 @@ static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 }
 
-static bool bpf_pseudo_func(const struct bpf_insn *insn)
-{
-	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
-	       insn->src_reg == BPF_PSEUDO_FUNC;
-}
-
 struct bpf_call_arg_meta {
 	struct bpf_map *map_ptr;
 	bool raw_mode;
@@ -1960,16 +1954,10 @@ static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 			return -EPERM;
 		}
 
-		if (bpf_pseudo_func(insn)) {
+		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
 			ret = add_subprog(env, i + insn->imm + 1);
-			if (ret >= 0)
-				/* remember subprog */
-				insn[1].imm = ret;
-		} else if (bpf_pseudo_call(insn)) {
-			ret = add_subprog(env, i + insn->imm + 1);
-		} else {
+		else
 			ret = add_kfunc_call(env, insn->imm, insn->off);
-		}
 
 		if (ret < 0)
 			return ret;
@@ -9387,7 +9375,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 
 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
 		struct bpf_prog_aux *aux = env->prog->aux;
-		u32 subprogno = insn[1].imm;
+		u32 subprogno = find_subprog(env,
+					     env->insn_idx + insn->imm + 1);
 
 		if (!aux->func_info) {
 			verbose(env, "missing btf func_info\n");
@@ -12557,14 +12546,9 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		return 0;
 
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
-		if (bpf_pseudo_func(insn)) {
-			env->insn_aux_data[i].call_imm = insn->imm;
-			/* subprog is encoded in insn[1].imm */
+		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
 			continue;
-		}
 
-		if (!bpf_pseudo_call(insn))
-			continue;
 		/* Upon error here we cannot fall back to interpreter but
 		 * need a hard reject of the program. Thus -EFAULT is
 		 * propagated in any case.
@@ -12585,6 +12569,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		env->insn_aux_data[i].call_imm = insn->imm;
 		/* point imm to __bpf_call_base+1 from JITs point of view */
 		insn->imm = 1;
+		if (bpf_pseudo_func(insn))
+			/* jit (e.g. x86_64) may emit fewer instructions
+			 * if it learns a u32 imm is the same as a u64 imm.
+			 * Force a non zero here.
+			 */
+			insn[1].imm = 1;
 	}
 
 	err = bpf_prog_alloc_jited_linfo(prog);
@@ -12669,7 +12659,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 		insn = func[i]->insnsi;
 		for (j = 0; j < func[i]->len; j++, insn++) {
 			if (bpf_pseudo_func(insn)) {
-				subprog = insn[1].imm;
+				subprog = insn->off;
 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
 				continue;
@@ -12720,7 +12710,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
 		if (bpf_pseudo_func(insn)) {
 			insn[0].imm = env->insn_aux_data[i].call_imm;
-			insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
+			insn[1].imm = insn->off;
+			insn->off = 0;
 			continue;
 		}
 		if (!bpf_pseudo_call(insn))
-- 
cgit v1.2.3


From 8587ca6f34152ea650bad4b2db68456601159024 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 5 Nov 2021 13:35:07 -0700
Subject: mm: move kvmalloc-related functions to slab.h

Not all files in the kernel should include mm.h.  Migrating callers from
kmalloc to kvmalloc is easier if the kvmalloc functions are in slab.h.

[akpm@linux-foundation.org: move the new kvrealloc() also]
[akpm@linux-foundation.org: drivers/hwmon/occ/p9_sbe.c needs slab.h]

Link: https://lkml.kernel.org/r/20210622215757.3525604-1-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/occ/p9_sbe.c |  1 +
 drivers/of/kexec.c         |  1 +
 include/linux/mm.h         | 34 ----------------------------------
 include/linux/slab.h       | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 36 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/occ/p9_sbe.c b/drivers/hwmon/occ/p9_sbe.c
index f6387cc0b754..6c540b24b32f 100644
--- a/drivers/hwmon/occ/p9_sbe.c
+++ b/drivers/hwmon/occ/p9_sbe.c
@@ -3,6 +3,7 @@
 
 #include <linux/device.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 #include <linux/fsi-occ.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 761fd870d1db..053e241f593c 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -16,6 +16,7 @@
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/random.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 
 #define RNG_SEED_SIZE		128
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..32b2ecc47408 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -799,40 +799,6 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 }
 #endif
 
-extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
-static inline void *kvmalloc(size_t size, gfp_t flags)
-{
-	return kvmalloc_node(size, flags, NUMA_NO_NODE);
-}
-static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
-{
-	return kvmalloc_node(size, flags | __GFP_ZERO, node);
-}
-static inline void *kvzalloc(size_t size, gfp_t flags)
-{
-	return kvmalloc(size, flags | __GFP_ZERO);
-}
-
-static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
-{
-	size_t bytes;
-
-	if (unlikely(check_mul_overflow(n, size, &bytes)))
-		return NULL;
-
-	return kvmalloc(bytes, flags);
-}
-
-static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
-{
-	return kvmalloc_array(n, size, flags | __GFP_ZERO);
-}
-
-extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
-		gfp_t flags);
-extern void kvfree(const void *addr);
-extern void kvfree_sensitive(const void *addr, size_t len);
-
 static inline int head_compound_mapcount(struct page *head)
 {
 	return atomic_read(compound_mapcount_ptr(head)) + 1;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 083f3ce550bc..c0d46b6fa12a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -732,6 +732,40 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
+extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
+static inline void *kvmalloc(size_t size, gfp_t flags)
+{
+	return kvmalloc_node(size, flags, NUMA_NO_NODE);
+}
+static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
+{
+	return kvmalloc_node(size, flags | __GFP_ZERO, node);
+}
+static inline void *kvzalloc(size_t size, gfp_t flags)
+{
+	return kvmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+	size_t bytes;
+
+	if (unlikely(check_mul_overflow(n, size, &bytes)))
+		return NULL;
+
+	return kvmalloc(bytes, flags);
+}
+
+static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
+{
+	return kvmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
+extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
+		gfp_t flags);
+extern void kvfree(const void *addr);
+extern void kvfree_sensitive(const void *addr, size_t len);
+
 unsigned int kmem_cache_size(struct kmem_cache *s);
 void __init kmem_cache_init_late(void);
 
-- 
cgit v1.2.3


From b47291ef02b0bee85ffb7efd6c336060ad1fe1a4 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 5 Nov 2021 13:35:17 -0700
Subject: mm, slub: change percpu partial accounting from objects to pages

With CONFIG_SLUB_CPU_PARTIAL enabled, SLUB keeps a percpu list of
partial slabs that can be promoted to cpu slab when the previous one is
depleted, without accessing the shared partial list.  A slab can be
added to this list by 1) refill of an empty list from get_partial_node()
- once we really have to access the shared partial list, we acquire
multiple slabs to amortize the cost of locking, and 2) first free to a
previously full slab - instead of putting the slab on a shared partial
list, we can more cheaply freeze it and put it on the per-cpu list.

To control how large a percpu partial list can grow for a kmem cache,
set_cpu_partial() calculates a target number of free objects on each
cpu's percpu partial list, and this can be also set by the sysfs file
cpu_partial.

However, the tracking of actual number of objects is imprecise, in order
to limit overhead from cpu X freeing an objects to a slab on percpu
partial list of cpu Y.  Basically, the percpu partial slabs form a
single linked list, and when we add a new slab to the list with current
head "oldpage", we set in the struct page of the slab we're adding:

    page->pages = oldpage->pages + 1; // this is precise
    page->pobjects = oldpage->pobjects + (page->objects - page->inuse);
    page->next = oldpage;

Thus the real number of free objects in the slab (objects - inuse) is
only determined at the moment of adding the slab to the percpu partial
list, and further freeing doesn't update the pobjects counter nor
propagate it to the current list head.  As Jann reports [1], this can
easily lead to large inaccuracies, where the target number of objects
(up to 30 by default) can translate to the same number of (empty) slab
pages on the list.  In case 2) above, we put a slab with 1 free object
on the list, thus only increase page->pobjects by 1, even if there are
subsequent frees on the same slab.  Jann has noticed this in practice
and so did we [2] when investigating significant increase of kmemcg
usage after switching from SLAB to SLUB.

While this is no longer a problem in kmemcg context thanks to the
accounting rewrite in 5.9, the memory waste is still not ideal and it's
questionable whether it makes sense to perform free object count based
control when object counts can easily become so much inaccurate.  So
this patch converts the accounting to be based on number of pages only
(which is precise) and removes the page->pobjects field completely.
This is also ultimately simpler.

To retain the existing set_cpu_partial() heuristic, first calculate the
target number of objects as previously, but then convert it to target
number of pages by assuming the pages will be half-filled on average.
This assumption might obviously also be inaccurate in practice, but
cannot degrade to actual number of pages being equal to the target
number of objects.

We could also skip the intermediate step with target number of objects
and rewrite the heuristic in terms of pages.  However we still have the
sysfs file cpu_partial which uses number of objects and could break
existing users if it suddenly becomes number of pages, so this patch
doesn't do that.

In practice, after this patch the heuristics limit the size of percpu
partial list up to 2 pages.  In case of a reported regression (which
would mean some workload has benefited from the previous imprecise
object based counting), we can tune the heuristics to get a better
compromise within the new scheme, while still avoid the unexpectedly
long percpu partial lists.

[1] https://lore.kernel.org/linux-mm/CAG48ez2Qx5K1Cab-m8BdSibp6wLTip6ro4=-umR7BLsEgjEYzA@mail.gmail.com/
[2] https://lore.kernel.org/all/2f0f46e8-2535-410a-1859-e9cfa4e57c18@suse.cz/

==========
Evaluation
==========

Mel was kind enough to run v1 through mmtests machinery for netperf
(localhost) and hackbench and, for most significant results see below.
So there are some apparent regressions, especially with hackbench, which
I think ultimately boils down to having shorter percpu partial lists on
average and some benchmarks benefiting from longer ones.  Monitoring
slab usage also indicated less memory usage by slab.  Based on that, the
following patch will bump the defaults to allow longer percpu partial
lists than after this patch.

However the goal is certainly not such that we would limit the percpu
partial lists to 30 pages just because previously a specific alloc/free
pattern could lead to the limit of 30 objects translate to a limit to 30
pages - that would make little sense.  This is a correctness patch, and
if a workload benefits from larger lists, the sysfs tuning knobs are
still there to allow that.

Netperf

  2-socket Intel(R) Xeon(R) Gold 5218R CPU @ 2.10GHz (20 cores, 40 threads per socket), 384GB RAM
  TCP-RR:
    hmean before 127045.79 after 121092.94 (-4.69%, worse)
    stddev before  2634.37 after   1254.08
  UDP-RR:
    hmean before 166985.45 after 160668.94 ( -3.78%, worse)
    stddev before 4059.69 after 1943.63

  2-socket Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz (20 cores, 40 threads per socket), 512GB RAM
  TCP-RR:
    hmean before 84173.25 after 76914.72 ( -8.62%, worse)
  UDP-RR:
    hmean before 93571.12 after 96428.69 ( 3.05%, better)
    stddev before 23118.54 after 16828.14

  2-socket Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz (12 cores, 24 threads per socket), 64GB RAM
  TCP-RR:
    hmean before 49984.92 after 48922.27 ( -2.13%, worse)
    stddev before 6248.15 after 4740.51
  UDP-RR:
    hmean before 61854.31 after 68761.81 ( 11.17%, better)
    stddev before 4093.54 after 5898.91

  other machines - within 2%

Hackbench

  (results before and after the patch, negative % means worse)

  2-socket AMD EPYC 7713 (64 cores, 128 threads per core), 256GB RAM
  hackbench-process-sockets
  Amean 	1 	0.5380	0.5583	( -3.78%)
  Amean 	4 	0.7510	0.8150	( -8.52%)
  Amean 	7 	0.7930	0.9533	( -20.22%)
  Amean 	12 	0.7853	1.1313	( -44.06%)
  Amean 	21 	1.1520	1.4993	( -30.15%)
  Amean 	30 	1.6223	1.9237	( -18.57%)
  Amean 	48 	2.6767	2.9903	( -11.72%)
  Amean 	79 	4.0257	5.1150	( -27.06%)
  Amean 	110	5.5193	7.4720	( -35.38%)
  Amean 	141	7.2207	9.9840	( -38.27%)
  Amean 	172	8.4770	12.1963	( -43.88%)
  Amean 	203	9.6473	14.3137	( -48.37%)
  Amean 	234	11.3960	18.7917	( -64.90%)
  Amean 	265	13.9627	22.4607	( -60.86%)
  Amean 	296	14.9163	26.0483	( -74.63%)

  hackbench-thread-sockets
  Amean 	1 	0.5597	0.5877	( -5.00%)
  Amean 	4 	0.7913	0.8960	( -13.23%)
  Amean 	7 	0.8190	1.0017	( -22.30%)
  Amean 	12 	0.9560	1.1727	( -22.66%)
  Amean 	21 	1.7587	1.5660	( 10.96%)
  Amean 	30 	2.4477	1.9807	( 19.08%)
  Amean 	48 	3.4573	3.0630	( 11.41%)
  Amean 	79 	4.7903	5.1733	( -8.00%)
  Amean 	110	6.1370	7.4220	( -20.94%)
  Amean 	141	7.5777	9.2617	( -22.22%)
  Amean 	172	9.2280	11.0907	( -20.18%)
  Amean 	203	10.2793	13.3470	( -29.84%)
  Amean 	234	11.2410	17.1070	( -52.18%)
  Amean 	265	12.5970	23.3323	( -85.22%)
  Amean 	296	17.1540	24.2857	( -41.57%)

  2-socket Intel(R) Xeon(R) Gold 5218R CPU @ 2.10GHz (20 cores, 40 threads
  per socket), 384GB RAM
  hackbench-process-sockets
  Amean 	1 	0.5760	0.4793	( 16.78%)
  Amean 	4 	0.9430	0.9707	( -2.93%)
  Amean 	7 	1.5517	1.8843	( -21.44%)
  Amean 	12 	2.4903	2.7267	( -9.49%)
  Amean 	21 	3.9560	4.2877	( -8.38%)
  Amean 	30 	5.4613	5.8343	( -6.83%)
  Amean 	48 	8.5337	9.2937	( -8.91%)
  Amean 	79 	14.0670	15.2630	( -8.50%)
  Amean 	110	19.2253	21.2467	( -10.51%)
  Amean 	141	23.7557	25.8550	( -8.84%)
  Amean 	172	28.4407	29.7603	( -4.64%)
  Amean 	203	33.3407	33.9927	( -1.96%)
  Amean 	234	38.3633	39.1150	( -1.96%)
  Amean 	265	43.4420	43.8470	( -0.93%)
  Amean 	296	48.3680	48.9300	( -1.16%)

  hackbench-thread-sockets
  Amean 	1 	0.6080	0.6493	( -6.80%)
  Amean 	4 	1.0000	1.0513	( -5.13%)
  Amean 	7 	1.6607	2.0260	( -22.00%)
  Amean 	12 	2.7637	2.9273	( -5.92%)
  Amean 	21 	5.0613	4.5153	( 10.79%)
  Amean 	30 	6.3340	6.1140	( 3.47%)
  Amean 	48 	9.0567	9.5577	( -5.53%)
  Amean 	79 	14.5657	15.7983	( -8.46%)
  Amean 	110	19.6213	21.6333	( -10.25%)
  Amean 	141	24.1563	26.2697	( -8.75%)
  Amean 	172	28.9687	30.2187	( -4.32%)
  Amean 	203	33.9763	34.6970	( -2.12%)
  Amean 	234	38.8647	39.3207	( -1.17%)
  Amean 	265	44.0813	44.1507	( -0.16%)
  Amean 	296	49.2040	49.4330	( -0.47%)

  2-socket Intel(R) Xeon(R) CPU E5-2698 v4 @ 2.20GHz (20 cores, 40 threads
  per socket), 512GB RAM
  hackbench-process-sockets
  Amean 	1 	0.5027	0.5017	( 0.20%)
  Amean 	4 	1.1053	1.2033	( -8.87%)
  Amean 	7 	1.8760	2.1820	( -16.31%)
  Amean 	12 	2.9053	3.1810	( -9.49%)
  Amean 	21 	4.6777	4.9920	( -6.72%)
  Amean 	30 	6.5180	6.7827	( -4.06%)
  Amean 	48 	10.0710	10.5227	( -4.48%)
  Amean 	79 	16.4250	17.5053	( -6.58%)
  Amean 	110	22.6203	24.4617	( -8.14%)
  Amean 	141	28.0967	31.0363	( -10.46%)
  Amean 	172	34.4030	36.9233	( -7.33%)
  Amean 	203	40.5933	43.0850	( -6.14%)
  Amean 	234	46.6477	48.7220	( -4.45%)
  Amean 	265	53.0530	53.9597	( -1.71%)
  Amean 	296	59.2760	59.9213	( -1.09%)

  hackbench-thread-sockets
  Amean 	1 	0.5363	0.5330	( 0.62%)
  Amean 	4 	1.1647	1.2157	( -4.38%)
  Amean 	7 	1.9237	2.2833	( -18.70%)
  Amean 	12 	2.9943	3.3110	( -10.58%)
  Amean 	21 	4.9987	5.1880	( -3.79%)
  Amean 	30 	6.7583	7.0043	( -3.64%)
  Amean 	48 	10.4547	10.8353	( -3.64%)
  Amean 	79 	16.6707	17.6790	( -6.05%)
  Amean 	110	22.8207	24.4403	( -7.10%)
  Amean 	141	28.7090	31.0533	( -8.17%)
  Amean 	172	34.9387	36.8260	( -5.40%)
  Amean 	203	41.1567	43.0450	( -4.59%)
  Amean 	234	47.3790	48.5307	( -2.43%)
  Amean 	265	53.9543	54.6987	( -1.38%)
  Amean 	296	60.0820	60.2163	( -0.22%)

  1-socket Intel(R) Xeon(R) CPU E3-1240 v5 @ 3.50GHz (4 cores, 8 threads),
  32 GB RAM
  hackbench-process-sockets
  Amean 	1 	1.4760	1.5773	( -6.87%)
  Amean 	3 	3.9370	4.0910	( -3.91%)
  Amean 	5 	6.6797	6.9357	( -3.83%)
  Amean 	7 	9.3367	9.7150	( -4.05%)
  Amean 	12	15.7627	16.1400	( -2.39%)
  Amean 	18	23.5360	23.6890	( -0.65%)
  Amean 	24	31.0663	31.3137	( -0.80%)
  Amean 	30	38.7283	39.0037	( -0.71%)
  Amean 	32	41.3417	41.6097	( -0.65%)

  hackbench-thread-sockets
  Amean 	1 	1.5250	1.6043	( -5.20%)
  Amean 	3 	4.0897	4.2603	( -4.17%)
  Amean 	5 	6.7760	7.0933	( -4.68%)
  Amean 	7 	9.4817	9.9157	( -4.58%)
  Amean 	12	15.9610	16.3937	( -2.71%)
  Amean 	18	23.9543	24.3417	( -1.62%)
  Amean 	24	31.4400	31.7217	( -0.90%)
  Amean 	30	39.2457	39.5467	( -0.77%)
  Amean 	32	41.8267	42.1230	( -0.71%)

  2-socket Intel(R) Xeon(R) CPU E5-2670 v3 @ 2.30GHz (12 cores, 24 threads
  per socket), 64GB RAM
  hackbench-process-sockets
  Amean 	1 	1.0347	1.0880	( -5.15%)
  Amean 	4 	1.7267	1.8527	( -7.30%)
  Amean 	7 	2.6707	2.8110	( -5.25%)
  Amean 	12 	4.1617	4.3383	( -4.25%)
  Amean 	21 	7.0070	7.2600	( -3.61%)
  Amean 	30 	9.9187	10.2397	( -3.24%)
  Amean 	48 	15.6710	16.3923	( -4.60%)
  Amean 	79 	24.7743	26.1247	( -5.45%)
  Amean 	110	34.3000	35.9307	( -4.75%)
  Amean 	141	44.2043	44.8010	( -1.35%)
  Amean 	172	54.2430	54.7260	( -0.89%)
  Amean 	192	60.6557	60.9777	( -0.53%)

  hackbench-thread-sockets
  Amean 	1 	1.0610	1.1353	( -7.01%)
  Amean 	4 	1.7543	1.9140	( -9.10%)
  Amean 	7 	2.7840	2.9573	( -6.23%)
  Amean 	12 	4.3813	4.4937	( -2.56%)
  Amean 	21 	7.3460	7.5350	( -2.57%)
  Amean 	30 	10.2313	10.5190	( -2.81%)
  Amean 	48 	15.9700	16.5940	( -3.91%)
  Amean 	79 	25.3973	26.6637	( -4.99%)
  Amean 	110	35.1087	36.4797	( -3.91%)
  Amean 	141	45.8220	46.3053	( -1.05%)
  Amean 	172	55.4917	55.7320	( -0.43%)
  Amean 	192	62.7490	62.5410	( 0.33%)

Link: https://lkml.kernel.org/r/20211012134651.11258-1-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reported-by: Jann Horn <jannh@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 --
 include/linux/slub_def.h | 13 ++-----
 mm/slub.c                | 89 ++++++++++++++++++++++++++++++++----------------
 3 files changed, 61 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..68ffa064b7a8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -124,10 +124,8 @@ struct page {
 					struct page *next;
 #ifdef CONFIG_64BIT
 					int pages;	/* Nr of pages left */
-					int pobjects;	/* Approximate count */
 #else
 					short int pages;
-					short int pobjects;
 #endif
 				};
 			};
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 85499f0586b0..0fa751b946fa 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -99,6 +99,8 @@ struct kmem_cache {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
 	/* Number of per cpu partial objects to keep around */
 	unsigned int cpu_partial;
+	/* Number of per cpu partial pages to keep around */
+	unsigned int cpu_partial_pages;
 #endif
 	struct kmem_cache_order_objects oo;
 
@@ -141,17 +143,6 @@ struct kmem_cache {
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
-#ifdef CONFIG_SLUB_CPU_PARTIAL
-#define slub_cpu_partial(s)		((s)->cpu_partial)
-#define slub_set_cpu_partial(s, n)		\
-({						\
-	slub_cpu_partial(s) = (n);		\
-})
-#else
-#define slub_cpu_partial(s)		(0)
-#define slub_set_cpu_partial(s, n)
-#endif /* CONFIG_SLUB_CPU_PARTIAL */
-
 #ifdef CONFIG_SYSFS
 #define SLAB_SUPPORTS_SYSFS
 void sysfs_slab_unlink(struct kmem_cache *);
diff --git a/mm/slub.c b/mm/slub.c
index b6a1790812f7..0df81ea24b91 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -414,6 +414,29 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
 	return x.x & OO_MASK;
 }
 
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
+{
+	unsigned int nr_pages;
+
+	s->cpu_partial = nr_objects;
+
+	/*
+	 * We take the number of objects but actually limit the number of
+	 * pages on the per cpu partial list, in order to limit excessive
+	 * growth of the list. For simplicity we assume that the pages will
+	 * be half-full.
+	 */
+	nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
+	s->cpu_partial_pages = nr_pages;
+}
+#else
+static inline void
+slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
+{
+}
+#endif /* CONFIG_SLUB_CPU_PARTIAL */
+
 /*
  * Per slab locking using the pagelock
  */
@@ -2052,7 +2075,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
  */
 static inline void *acquire_slab(struct kmem_cache *s,
 		struct kmem_cache_node *n, struct page *page,
-		int mode, int *objects)
+		int mode)
 {
 	void *freelist;
 	unsigned long counters;
@@ -2068,7 +2091,6 @@ static inline void *acquire_slab(struct kmem_cache *s,
 	freelist = page->freelist;
 	counters = page->counters;
 	new.counters = counters;
-	*objects = new.objects - new.inuse;
 	if (mode) {
 		new.inuse = page->objects;
 		new.freelist = NULL;
@@ -2106,9 +2128,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 {
 	struct page *page, *page2;
 	void *object = NULL;
-	unsigned int available = 0;
 	unsigned long flags;
-	int objects;
+	unsigned int partial_pages = 0;
 
 	/*
 	 * Racy check. If we mistakenly see no partial slabs then we
@@ -2126,11 +2147,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 		if (!pfmemalloc_match(page, gfpflags))
 			continue;
 
-		t = acquire_slab(s, n, page, object == NULL, &objects);
+		t = acquire_slab(s, n, page, object == NULL);
 		if (!t)
 			break;
 
-		available += objects;
 		if (!object) {
 			*ret_page = page;
 			stat(s, ALLOC_FROM_PARTIAL);
@@ -2138,10 +2158,15 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 		} else {
 			put_cpu_partial(s, page, 0);
 			stat(s, CPU_PARTIAL_NODE);
+			partial_pages++;
 		}
+#ifdef CONFIG_SLUB_CPU_PARTIAL
 		if (!kmem_cache_has_cpu_partial(s)
-			|| available > slub_cpu_partial(s) / 2)
+			|| partial_pages > s->cpu_partial_pages / 2)
 			break;
+#else
+		break;
+#endif
 
 	}
 	spin_unlock_irqrestore(&n->list_lock, flags);
@@ -2546,14 +2571,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 	struct page *page_to_unfreeze = NULL;
 	unsigned long flags;
 	int pages = 0;
-	int pobjects = 0;
 
 	local_lock_irqsave(&s->cpu_slab->lock, flags);
 
 	oldpage = this_cpu_read(s->cpu_slab->partial);
 
 	if (oldpage) {
-		if (drain && oldpage->pobjects > slub_cpu_partial(s)) {
+		if (drain && oldpage->pages >= s->cpu_partial_pages) {
 			/*
 			 * Partial array is full. Move the existing set to the
 			 * per node partial list. Postpone the actual unfreezing
@@ -2562,16 +2586,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 			page_to_unfreeze = oldpage;
 			oldpage = NULL;
 		} else {
-			pobjects = oldpage->pobjects;
 			pages = oldpage->pages;
 		}
 	}
 
 	pages++;
-	pobjects += page->objects - page->inuse;
 
 	page->pages = pages;
-	page->pobjects = pobjects;
 	page->next = oldpage;
 
 	this_cpu_write(s->cpu_slab->partial, page);
@@ -3991,6 +4012,8 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
 static void set_cpu_partial(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_CPU_PARTIAL
+	unsigned int nr_objects;
+
 	/*
 	 * cpu_partial determined the maximum number of objects kept in the
 	 * per cpu partial lists of a processor.
@@ -4000,24 +4023,22 @@ static void set_cpu_partial(struct kmem_cache *s)
 	 * filled up again with minimal effort. The slab will never hit the
 	 * per node partial lists and therefore no locking will be required.
 	 *
-	 * This setting also determines
-	 *
-	 * A) The number of objects from per cpu partial slabs dumped to the
-	 *    per node list when we reach the limit.
-	 * B) The number of objects in cpu partial slabs to extract from the
-	 *    per node list when we run out of per cpu objects. We only fetch
-	 *    50% to keep some capacity around for frees.
+	 * For backwards compatibility reasons, this is determined as number
+	 * of objects, even though we now limit maximum number of pages, see
+	 * slub_set_cpu_partial()
 	 */
 	if (!kmem_cache_has_cpu_partial(s))
-		slub_set_cpu_partial(s, 0);
+		nr_objects = 0;
 	else if (s->size >= PAGE_SIZE)
-		slub_set_cpu_partial(s, 2);
+		nr_objects = 2;
 	else if (s->size >= 1024)
-		slub_set_cpu_partial(s, 6);
+		nr_objects = 6;
 	else if (s->size >= 256)
-		slub_set_cpu_partial(s, 13);
+		nr_objects = 13;
 	else
-		slub_set_cpu_partial(s, 30);
+		nr_objects = 30;
+
+	slub_set_cpu_partial(s, nr_objects);
 #endif
 }
 
@@ -5392,7 +5413,12 @@ SLAB_ATTR(min_partial);
 
 static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
 {
-	return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
+	unsigned int nr_partial = 0;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+	nr_partial = s->cpu_partial;
+#endif
+
+	return sysfs_emit(buf, "%u\n", nr_partial);
 }
 
 static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
@@ -5463,12 +5489,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
 
 		page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
 
-		if (page) {
+		if (page)
 			pages += page->pages;
-			objects += page->pobjects;
-		}
 	}
 
+	/* Approximate half-full pages , see slub_set_cpu_partial() */
+	objects = (pages * oo_objects(s->oo)) / 2;
 	len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
 
 #ifdef CONFIG_SMP
@@ -5476,9 +5502,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
 		struct page *page;
 
 		page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
-		if (page)
+		if (page) {
+			pages = READ_ONCE(page->pages);
+			objects = (pages * oo_objects(s->oo)) / 2;
 			len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
-					     cpu, page->pobjects, page->pages);
+					     cpu, objects, pages);
+		}
 	}
 #endif
 	len += sysfs_emit_at(buf, len, "\n");
-- 
cgit v1.2.3


From 96c84dde362a3e4ff67a12eaac2ea6e88e963c07 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Nov 2021 13:35:30 -0700
Subject: mm: don't include <linux/dax.h> in <linux/mempolicy.h>

Not required at all, and having this causes a huge kernel rebuild as
soon as something in dax.h changes.

Link: https://lkml.kernel.org/r/20210921082253.1859794-1-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h | 1 -
 mm/memory-failure.c       | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 4091692bed8c..097bbbb37493 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -8,7 +8,6 @@
 
 #include <linux/sched.h>
 #include <linux/mmzone.h>
-#include <linux/dax.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index bdbbb32211a5..8376cfe0efce 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -39,6 +39,7 @@
 #include <linux/kernel-page-flags.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
+#include <linux/dax.h>
 #include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/export.h>
-- 
cgit v1.2.3


From 7857ccdf94e94f1dd56496b90084fdcaa5e655a4 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:35:33 -0700
Subject: lib/stackdepot: include gfp.h

Patch series "stackdepot, kasan, workqueue: Avoid expanding stackdepot
slabs when holding raw_spin_lock", v2.

Shuah Khan reported [1]:

 | When CONFIG_PROVE_RAW_LOCK_NESTING=y and CONFIG_KASAN are enabled,
 | kasan_record_aux_stack() runs into "BUG: Invalid wait context" when
 | it tries to allocate memory attempting to acquire spinlock in page
 | allocation code while holding workqueue pool raw_spinlock.
 |
 | There are several instances of this problem when block layer tries
 | to __queue_work(). Call trace from one of these instances is below:
 |
 |     kblockd_mod_delayed_work_on()
 |       mod_delayed_work_on()
 |         __queue_delayed_work()
 |           __queue_work() (rcu_read_lock, raw_spin_lock pool->lock held)
 |             insert_work()
 |               kasan_record_aux_stack()
 |                 kasan_save_stack()
 |                   stack_depot_save()
 |                     alloc_pages()
 |                       __alloc_pages()
 |                         get_page_from_freelist()
 |                           rm_queue()
 |                             rm_queue_pcplist()
 |                               local_lock_irqsave(&pagesets.lock, flags);
 |                               [ BUG: Invalid wait context triggered ]

PROVE_RAW_LOCK_NESTING is pointing out that (on RT kernels) the locking
rules are being violated.  More generally, memory is being allocated
from a non-preemptive context (raw_spin_lock'd c-s) where it is not
allowed.

To properly fix this, we must prevent stackdepot from replenishing its
"stack slab" pool if memory allocations cannot be done in the current
context: it's a bug to use either GFP_ATOMIC nor GFP_NOWAIT in certain
non-preemptive contexts, including raw_spin_locks (see gfp.h and commit
ab00db216c9c7).

The only downside is that saving a stack trace may fail if: stackdepot
runs out of space AND the same stack trace has not been recorded before.
I expect this to be unlikely, and a simple experiment (boot the kernel)
didn't result in any failure to record stack trace from insert_work().

The series includes a few minor fixes to stackdepot that I noticed in
preparing the series.  It then introduces __stack_depot_save(), which
exposes the option to force stackdepot to not allocate any memory.
Finally, KASAN is changed to use the new stackdepot interface and
provide kasan_record_aux_stack_noalloc(), which is then used by
workqueue code.

[1] https://lkml.kernel.org/r/20210902200134.25603-1-skhan@linuxfoundation.org

This patch (of 6):

<linux/stackdepot.h> refers to gfp_t, but doesn't include gfp.h.

Fix it by including <linux/gfp.h>.

Link: https://lkml.kernel.org/r/20210913112609.2651084-1-elver@google.com
Link: https://lkml.kernel.org/r/20210913112609.2651084-2-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Tested-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Walter Wu <walter-zh.wu@mediatek.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Taras Madan <tarasmadan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stackdepot.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 6bb4bc1a5f54..97b36dc53301 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -11,6 +11,8 @@
 #ifndef _LINUX_STACKDEPOT_H
 #define _LINUX_STACKDEPOT_H
 
+#include <linux/gfp.h>
+
 typedef u32 depot_stack_handle_t;
 
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
-- 
cgit v1.2.3


From 11ac25c62cd2f3bb8da9e1df2e71afdebe76f093 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:35:39 -0700
Subject: lib/stackdepot: introduce __stack_depot_save()

Add __stack_depot_save(), which provides more fine-grained control over
stackdepot's memory allocation behaviour, in case stackdepot runs out of
"stack slabs".

Normally stackdepot uses alloc_pages() in case it runs out of space;
passing can_alloc==false to __stack_depot_save() prohibits this, at the
cost of more likely failure to record a stack trace.

Link: https://lkml.kernel.org/r/20210913112609.2651084-4-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Tested-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Taras Madan <tarasmadan@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: Walter Wu <walter-zh.wu@mediatek.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stackdepot.h |  4 ++++
 lib/stackdepot.c           | 43 +++++++++++++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 97b36dc53301..b2f7e7c6ba54 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -15,6 +15,10 @@
 
 typedef u32 depot_stack_handle_t;
 
+depot_stack_handle_t __stack_depot_save(unsigned long *entries,
+					unsigned int nr_entries,
+					gfp_t gfp_flags, bool can_alloc);
+
 depot_stack_handle_t stack_depot_save(unsigned long *entries,
 				      unsigned int nr_entries, gfp_t gfp_flags);
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index c80a9f734253..bda58597e375 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -248,17 +248,28 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 EXPORT_SYMBOL_GPL(stack_depot_fetch);
 
 /**
- * stack_depot_save - Save a stack trace from an array
+ * __stack_depot_save - Save a stack trace from an array
  *
  * @entries:		Pointer to storage array
  * @nr_entries:		Size of the storage array
  * @alloc_flags:	Allocation gfp flags
+ * @can_alloc:		Allocate stack slabs (increased chance of failure if false)
+ *
+ * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is
+ * %true, is allowed to replenish the stack slab pool in case no space is left
+ * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids
+ * any allocations and will fail if no space is left to store the stack trace.
+ *
+ * Context: Any context, but setting @can_alloc to %false is required if
+ *          alloc_pages() cannot be used from the current context. Currently
+ *          this is the case from contexts where neither %GFP_ATOMIC nor
+ *          %GFP_NOWAIT can be used (NMI, raw_spin_lock).
  *
- * Return: The handle of the stack struct stored in depot
+ * Return: The handle of the stack struct stored in depot, 0 on failure.
  */
-depot_stack_handle_t stack_depot_save(unsigned long *entries,
-				      unsigned int nr_entries,
-				      gfp_t alloc_flags)
+depot_stack_handle_t __stack_depot_save(unsigned long *entries,
+					unsigned int nr_entries,
+					gfp_t alloc_flags, bool can_alloc)
 {
 	struct stack_record *found = NULL, **bucket;
 	depot_stack_handle_t retval = 0;
@@ -291,7 +302,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 	 * The smp_load_acquire() here pairs with smp_store_release() to
 	 * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
 	 */
-	if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+	if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) {
 		/*
 		 * Zero out zone modifiers, as we don't have specific zone
 		 * requirements. Keep the flags related to allocation in atomic
@@ -339,6 +350,26 @@ exit:
 fast_exit:
 	return retval;
 }
+EXPORT_SYMBOL_GPL(__stack_depot_save);
+
+/**
+ * stack_depot_save - Save a stack trace from an array
+ *
+ * @entries:		Pointer to storage array
+ * @nr_entries:		Size of the storage array
+ * @alloc_flags:	Allocation gfp flags
+ *
+ * Context: Contexts where allocations via alloc_pages() are allowed.
+ *          See __stack_depot_save() for more details.
+ *
+ * Return: The handle of the stack struct stored in depot, 0 on failure.
+ */
+depot_stack_handle_t stack_depot_save(unsigned long *entries,
+				      unsigned int nr_entries,
+				      gfp_t alloc_flags)
+{
+	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
+}
 EXPORT_SYMBOL_GPL(stack_depot_save);
 
 static inline int in_irqentry_text(unsigned long ptr)
-- 
cgit v1.2.3


From 7cb3007ce2da27ec02a1a3211941e7fe6875b642 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:35:46 -0700
Subject: kasan: generic: introduce kasan_record_aux_stack_noalloc()

Introduce a variant of kasan_record_aux_stack() that does not do any
memory allocation through stackdepot.  This will permit using it in
contexts that cannot allocate any memory.

Link: https://lkml.kernel.org/r/20210913112609.2651084-6-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Tested-by: Shuah Khan <skhan@linuxfoundation.org>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Taras Madan <tarasmadan@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: Walter Wu <walter-zh.wu@mediatek.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kasan.h |  2 ++
 mm/kasan/generic.c    | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index dd874a1ee862..736d7b458996 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -370,12 +370,14 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {}
 void kasan_cache_shrink(struct kmem_cache *cache);
 void kasan_cache_shutdown(struct kmem_cache *cache);
 void kasan_record_aux_stack(void *ptr);
+void kasan_record_aux_stack_noalloc(void *ptr);
 
 #else /* CONFIG_KASAN_GENERIC */
 
 static inline void kasan_cache_shrink(struct kmem_cache *cache) {}
 static inline void kasan_cache_shutdown(struct kmem_cache *cache) {}
 static inline void kasan_record_aux_stack(void *ptr) {}
+static inline void kasan_record_aux_stack_noalloc(void *ptr) {}
 
 #endif /* CONFIG_KASAN_GENERIC */
 
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 2a8e59e6326d..84a038b07c6f 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -328,7 +328,7 @@ DEFINE_ASAN_SET_SHADOW(f3);
 DEFINE_ASAN_SET_SHADOW(f5);
 DEFINE_ASAN_SET_SHADOW(f8);
 
-void kasan_record_aux_stack(void *addr)
+static void __kasan_record_aux_stack(void *addr, bool can_alloc)
 {
 	struct page *page = kasan_addr_to_page(addr);
 	struct kmem_cache *cache;
@@ -345,7 +345,17 @@ void kasan_record_aux_stack(void *addr)
 		return;
 
 	alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
-	alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, true);
+	alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc);
+}
+
+void kasan_record_aux_stack(void *addr)
+{
+	return __kasan_record_aux_stack(addr, true);
+}
+
+void kasan_record_aux_stack_noalloc(void *addr)
+{
+	return __kasan_record_aux_stack(addr, false);
 }
 
 void kasan_set_free_info(struct kmem_cache *cache,
-- 
cgit v1.2.3


From 86cffecdeaa278444870c8745ab166a65865dbf0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:19 -0700
Subject: Compiler Attributes: add __alloc_size() for better bounds checking

GCC and Clang can use the "alloc_size" attribute to better inform the
results of __builtin_object_size() (for compile-time constant values).
Clang can additionally use alloc_size to inform the results of
__builtin_dynamic_object_size() (for run-time values).

Because GCC sees the frequent use of struct_size() as an allocator size
argument, and notices it can return SIZE_MAX (the overflow indication),
it complains about these call sites overflowing (since SIZE_MAX is
greater than the default -Walloc-size-larger-than=PTRDIFF_MAX).  This
isn't helpful since we already know a SIZE_MAX will be caught at
run-time (this was an intentional design).  To deal with this, we must
disable this check as it is both a false positive and redundant.  (Clang
does not have this warning option.)

Unfortunately, just checking the -Wno-alloc-size-larger-than is not
sufficient to make the __alloc_size attribute behave correctly under
older GCC versions.  The attribute itself must be disabled in those
situations too, as there appears to be no way to reliably silence the
SIZE_MAX constant expression cases for GCC versions less than 9.1:

   In file included from ./include/linux/resource_ext.h:11,
                    from ./include/linux/pci.h:40,
                    from drivers/net/ethernet/intel/ixgbe/ixgbe.h:9,
                    from drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c:4:
   In function 'kmalloc_node',
       inlined from 'ixgbe_alloc_q_vector' at ./include/linux/slab.h:743:9:
   ./include/linux/slab.h:618:9: error: argument 1 value '18446744073709551615' exceeds maximum object size 9223372036854775807 [-Werror=alloc-size-larger-than=]
     return __kmalloc_node(size, flags, node);
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   ./include/linux/slab.h: In function 'ixgbe_alloc_q_vector':
   ./include/linux/slab.h:455:7: note: in a call to allocation function '__kmalloc_node' declared here
    void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_slab_alignment __malloc;
          ^~~~~~~~~~~~~~

Specifically:
 '-Wno-alloc-size-larger-than' is not correctly handled by GCC < 9.1
    https://godbolt.org/z/hqsfG7q84 (doesn't disable)
    https://godbolt.org/z/P9jdrPTYh (doesn't admit to not knowing about option)
    https://godbolt.org/z/465TPMWKb (only warns when other warnings appear)

 '-Walloc-size-larger-than=18446744073709551615' is not handled by GCC < 8.2
    https://godbolt.org/z/73hh1EPxz (ignores numeric value)

Since anything marked with __alloc_size would also qualify for marking
with __malloc, just include __malloc along with it to avoid redundant
markings.  (Suggested by Linus Torvalds.)

Finally, make sure checkpatch.pl doesn't get confused about finding the
__alloc_size attribute on functions.  (Thanks to Joe Perches.)

Link: https://lkml.kernel.org/r/20210930222704.2631604-3-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Makefile                            | 15 +++++++++++++++
 include/linux/compiler-gcc.h        |  8 ++++++++
 include/linux/compiler_attributes.h | 10 ++++++++++
 include/linux/compiler_types.h      | 12 ++++++++++++
 scripts/checkpatch.pl               |  3 ++-
 5 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Makefile b/Makefile
index ed6e7ec60eff..d09ac84a170b 100644
--- a/Makefile
+++ b/Makefile
@@ -1008,6 +1008,21 @@ ifdef CONFIG_CC_IS_GCC
 KBUILD_CFLAGS += -Wno-maybe-uninitialized
 endif
 
+ifdef CONFIG_CC_IS_GCC
+# The allocators already balk at large sizes, so silence the compiler
+# warnings for bounds checks involving those possible values. While
+# -Wno-alloc-size-larger-than would normally be used here, earlier versions
+# of gcc (<9.1) weirdly don't handle the option correctly when _other_
+# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
+# doesn't work (as it is documented to), silently resolving to "0" prior to
+# version 9.1 (and producing an error more recently). Numeric values larger
+# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
+# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
+# choice, we must perform a versioned check to disable this warning.
+# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
+KBUILD_CFLAGS += $(call cc-ifversion, -ge, 0901, -Wno-alloc-size-larger-than)
+endif
+
 # disable invalid "can't wrap" optimizations for signed / pointers
 KBUILD_CFLAGS	+= -fno-strict-overflow
 
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index bd2b881c6b63..b9d5f9c373a0 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -144,3 +144,11 @@
 #else
 #define __diag_GCC_8(s)
 #endif
+
+/*
+ * Prior to 9.1, -Wno-alloc-size-larger-than (and therefore the "alloc_size"
+ * attribute) do not work, and must be disabled.
+ */
+#if GCC_VERSION < 90100
+#undef __alloc_size__
+#endif
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index e6ec63403965..3de06a8fae73 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -33,6 +33,15 @@
 #define __aligned(x)                    __attribute__((__aligned__(x)))
 #define __aligned_largest               __attribute__((__aligned__))
 
+/*
+ * Note: do not use this directly. Instead, use __alloc_size() since it is conditionally
+ * available and includes other attributes.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alloc_005fsize-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#alloc-size
+ */
+#define __alloc_size__(x, ...)		__attribute__((__alloc_size__(x, ## __VA_ARGS__)))
+
 /*
  * Note: users of __always_inline currently do not write "inline" themselves,
  * which seems to be required by gcc to apply the attribute according
@@ -153,6 +162,7 @@
 
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#malloc
  */
 #define __malloc                        __attribute__((__malloc__))
 
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index b6ff83a714ca..4f2203c4a257 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -250,6 +250,18 @@ struct ftrace_likely_data {
 # define __cficanonical
 #endif
 
+/*
+ * Any place that could be marked with the "alloc_size" attribute is also
+ * a place to be marked with the "malloc" attribute. Do this as part of the
+ * __alloc_size macro to avoid redundant attributes and to avoid missing a
+ * __malloc marking.
+ */
+#ifdef __alloc_size__
+# define __alloc_size(x, ...)	__alloc_size__(x, ## __VA_ARGS__) __malloc
+#else
+# define __alloc_size(x, ...)	__malloc
+#endif
+
 #ifndef asm_volatile_goto
 #define asm_volatile_goto(x...) asm goto(x)
 #endif
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index c27d2312cfc3..88cb294dc447 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -489,7 +489,8 @@ our $Attribute	= qr{
 			____cacheline_aligned|
 			____cacheline_aligned_in_smp|
 			____cacheline_internodealigned_in_smp|
-			__weak
+			__weak|
+			__alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\)
 		  }x;
 our $Modifier;
 our $Inline	= qr{inline|__always_inline|noinline|__inline|__inline__};
-- 
cgit v1.2.3


From 72d67229f522e3331d1eabd9f58d36ae080eb228 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:23 -0700
Subject: slab: clean up function prototypes

Based on feedback from Joe Perches and Linus Torvalds, regularize the
slab function prototypes before making attribute changes.

Link: https://lkml.kernel.org/r/20210930222704.2631604-4-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Daniel Micay <danielmicay@gmail.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: Joe Perches <joe@perches.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 68 ++++++++++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index c0d46b6fa12a..d05de03bdcdd 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -152,8 +152,8 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
 			slab_flags_t flags,
 			unsigned int useroffset, unsigned int usersize,
 			void (*ctor)(void *));
-void kmem_cache_destroy(struct kmem_cache *);
-int kmem_cache_shrink(struct kmem_cache *);
+void kmem_cache_destroy(struct kmem_cache *s);
+int kmem_cache_shrink(struct kmem_cache *s);
 
 /*
  * Please use this macro to create slab caches. Simply specify the
@@ -181,11 +181,11 @@ int kmem_cache_shrink(struct kmem_cache *);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void * __must_check krealloc(const void *, size_t, gfp_t);
-void kfree(const void *);
-void kfree_sensitive(const void *);
-size_t __ksize(const void *);
-size_t ksize(const void *);
+void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags);
+void kfree(const void *objp);
+void kfree_sensitive(const void *objp);
+size_t __ksize(const void *objp);
+size_t ksize(const void *objp);
 #ifdef CONFIG_PRINTK
 bool kmem_valid_obj(void *object);
 void kmem_dump_obj(void *object);
@@ -426,8 +426,8 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
 #endif /* !CONFIG_SLOB */
 
 void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment __malloc;
-void kmem_cache_free(struct kmem_cache *, void *);
+void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
+void kmem_cache_free(struct kmem_cache *s, void *objp);
 
 /*
  * Bulk allocation and freeing operations. These are accelerated in an
@@ -436,8 +436,8 @@ void kmem_cache_free(struct kmem_cache *, void *);
  *
  * Note that interrupts must be enabled when calling these functions.
  */
-void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **);
-int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **);
+void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p);
+int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p);
 
 /*
  * Caller must not use kfree_bulk() on memory not originally allocated
@@ -450,7 +450,8 @@ static __always_inline void kfree_bulk(size_t size, void **p)
 
 #ifdef CONFIG_NUMA
 void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment __malloc;
+void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
+									 __malloc;
 #else
 static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
@@ -464,25 +465,24 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
 #endif
 
 #ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment __malloc;
+extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
+				   __assume_slab_alignment __malloc;
 
 #ifdef CONFIG_NUMA
-extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
-					   gfp_t gfpflags,
-					   int node, size_t size) __assume_slab_alignment __malloc;
+extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+					 int node, size_t size) __assume_slab_alignment __malloc;
 #else
-static __always_inline void *
-kmem_cache_alloc_node_trace(struct kmem_cache *s,
-			      gfp_t gfpflags,
-			      int node, size_t size)
+static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
+							 gfp_t gfpflags, int node,
+							 size_t size)
 {
 	return kmem_cache_alloc_trace(s, gfpflags, size);
 }
 #endif /* CONFIG_NUMA */
 
 #else /* CONFIG_TRACING */
-static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
-		gfp_t flags, size_t size)
+static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags,
+						    size_t size)
 {
 	void *ret = kmem_cache_alloc(s, flags);
 
@@ -490,10 +490,8 @@ static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
 	return ret;
 }
 
-static __always_inline void *
-kmem_cache_alloc_node_trace(struct kmem_cache *s,
-			      gfp_t gfpflags,
-			      int node, size_t size)
+static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
+							 int node, size_t size)
 {
 	void *ret = kmem_cache_alloc_node(s, gfpflags, node);
 
@@ -502,13 +500,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc;
+extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
+									 __malloc;
 
 #ifdef CONFIG_TRACING
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment __malloc;
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+				__assume_page_alignment __malloc;
 #else
-static __always_inline void *
-kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
 {
 	return kmalloc_order(size, flags, order);
 }
@@ -638,8 +637,8 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
  * @new_size: new size of a single member of the array
  * @flags: the type of memory to allocate (see kmalloc)
  */
-static __must_check inline void *
-krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags)
+static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size,
+						 gfp_t flags)
 {
 	size_t bytes;
 
@@ -668,7 +667,7 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
+extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
 
@@ -691,7 +690,8 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
 
 
 #ifdef CONFIG_NUMA
-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
+extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
+					 unsigned long caller);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			_RET_IP_)
-- 
cgit v1.2.3


From c37495d6254c237578db3121dcf79857e033f8ff Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:27 -0700
Subject: slab: add __alloc_size attributes for better bounds checking

As already done in GrapheneOS, add the __alloc_size attribute for
regular kmalloc interfaces, to provide additional hinting for better
bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler
optimizations.

Link: https://lkml.kernel.org/r/20210930222704.2631604-5-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 61 ++++++++++++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index d05de03bdcdd..b5bf0537975b 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -181,7 +181,7 @@ int kmem_cache_shrink(struct kmem_cache *s);
 /*
  * Common kmalloc functions provided by all allocators
  */
-void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags);
+void * __must_check krealloc(const void *objp, size_t new_size, gfp_t flags) __alloc_size(2);
 void kfree(const void *objp);
 void kfree_sensitive(const void *objp);
 size_t __ksize(const void *objp);
@@ -425,7 +425,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
 #define kmalloc_index(s) __kmalloc_index(s, true)
 #endif /* !CONFIG_SLOB */
 
-void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
+void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1);
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t flags) __assume_slab_alignment __malloc;
 void kmem_cache_free(struct kmem_cache *s, void *objp);
 
@@ -449,11 +449,12 @@ static __always_inline void kfree_bulk(size_t size, void **p)
 }
 
 #ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment __malloc;
+void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment
+							 __alloc_size(1);
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment
 									 __malloc;
 #else
-static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __kmalloc(size, flags);
 }
@@ -466,23 +467,23 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f
 
 #ifdef CONFIG_TRACING
 extern void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size)
-				   __assume_slab_alignment __malloc;
+				   __assume_slab_alignment __alloc_size(3);
 
 #ifdef CONFIG_NUMA
 extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags,
-					 int node, size_t size) __assume_slab_alignment __malloc;
+					 int node, size_t size) __assume_slab_alignment
+								__alloc_size(4);
 #else
-static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
-							 gfp_t gfpflags, int node,
-							 size_t size)
+static __always_inline __alloc_size(4) void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
+						 gfp_t gfpflags, int node, size_t size)
 {
 	return kmem_cache_alloc_trace(s, gfpflags, size);
 }
 #endif /* CONFIG_NUMA */
 
 #else /* CONFIG_TRACING */
-static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags,
-						    size_t size)
+static __always_inline __alloc_size(3) void *kmem_cache_alloc_trace(struct kmem_cache *s,
+								    gfp_t flags, size_t size)
 {
 	void *ret = kmem_cache_alloc(s, flags);
 
@@ -501,19 +502,20 @@ static __always_inline void *kmem_cache_alloc_node_trace(struct kmem_cache *s, g
 #endif /* CONFIG_TRACING */
 
 extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment
-									 __malloc;
+									 __alloc_size(1);
 
 #ifdef CONFIG_TRACING
 extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
-				__assume_page_alignment __malloc;
+				__assume_page_alignment __alloc_size(1);
 #else
-static __always_inline void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+static __always_inline __alloc_size(1) void *kmalloc_order_trace(size_t size, gfp_t flags,
+								 unsigned int order)
 {
 	return kmalloc_order(size, flags, order);
 }
 #endif
 
-static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
+static __always_inline __alloc_size(1) void *kmalloc_large(size_t size, gfp_t flags)
 {
 	unsigned int order = get_order(size);
 	return kmalloc_order_trace(size, flags, order);
@@ -573,7 +575,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
  *	Try really hard to succeed the allocation but fail
  *	eventually.
  */
-static __always_inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline __alloc_size(1) void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 #ifndef CONFIG_SLOB
@@ -595,7 +597,7 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
 	return __kmalloc(size, flags);
 }
 
-static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline __alloc_size(1) void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 #ifndef CONFIG_SLOB
 	if (__builtin_constant_p(size) &&
@@ -619,7 +621,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
 	size_t bytes;
 
@@ -637,8 +639,10 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
  * @new_size: new size of a single member of the array
  * @flags: the type of memory to allocate (see kmalloc)
  */
-static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t new_size,
-						 gfp_t flags)
+static inline __alloc_size(2, 3) void * __must_check krealloc_array(void *p,
+								    size_t new_n,
+								    size_t new_size,
+								    gfp_t flags)
 {
 	size_t bytes;
 
@@ -654,7 +658,7 @@ static inline void * __must_check krealloc_array(void *p, size_t new_n, size_t n
  * @size: element size.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kcalloc(size_t n, size_t size, gfp_t flags)
 {
 	return kmalloc_array(n, size, flags | __GFP_ZERO);
 }
@@ -667,12 +671,13 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller);
+extern void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
+				   __alloc_size(1);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
 
-static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
-				       int node)
+static inline __alloc_size(1, 2) void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
+							  int node)
 {
 	size_t bytes;
 
@@ -683,7 +688,7 @@ static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags,
 	return __kmalloc_node(bytes, flags, node);
 }
 
-static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
+static inline __alloc_size(1, 2) void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
 {
 	return kmalloc_array_node(n, size, flags | __GFP_ZERO, node);
 }
@@ -691,7 +696,7 @@ static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node)
 
 #ifdef CONFIG_NUMA
 extern void *__kmalloc_node_track_caller(size_t size, gfp_t flags, int node,
-					 unsigned long caller);
+					 unsigned long caller) __alloc_size(1);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			_RET_IP_)
@@ -716,7 +721,7 @@ static inline void *kmem_cache_zalloc(struct kmem_cache *k, gfp_t flags)
  * @size: how many bytes of memory are required.
  * @flags: the type of memory to allocate (see kmalloc).
  */
-static inline void *kzalloc(size_t size, gfp_t flags)
+static inline __alloc_size(1) void *kzalloc(size_t size, gfp_t flags)
 {
 	return kmalloc(size, flags | __GFP_ZERO);
 }
@@ -727,7 +732,7 @@ static inline void *kzalloc(size_t size, gfp_t flags)
  * @flags: the type of memory to allocate (see kmalloc).
  * @node: memory node from which to allocate
  */
-static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
+static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int node)
 {
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
-- 
cgit v1.2.3


From 56bcf40f91c7d7f53b77abe161a7d18ef9f981ba Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:31 -0700
Subject: mm/kvmalloc: add __alloc_size attributes for better bounds checking

As already done in GrapheneOS, add the __alloc_size attribute for
regular kvmalloc interfaces, to provide additional hinting for better
bounds checking, assisting CONFIG_FORTIFY_SOURCE and other compiler
optimizations.

Link: https://lkml.kernel.org/r/20210930222704.2631604-6-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index b5bf0537975b..837cb16232ef 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -737,21 +737,21 @@ static inline __alloc_size(1) void *kzalloc_node(size_t size, gfp_t flags, int n
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
-extern void *kvmalloc_node(size_t size, gfp_t flags, int node);
-static inline void *kvmalloc(size_t size, gfp_t flags)
+extern void *kvmalloc_node(size_t size, gfp_t flags, int node) __alloc_size(1);
+static inline __alloc_size(1) void *kvmalloc(size_t size, gfp_t flags)
 {
 	return kvmalloc_node(size, flags, NUMA_NO_NODE);
 }
-static inline void *kvzalloc_node(size_t size, gfp_t flags, int node)
+static inline __alloc_size(1) void *kvzalloc_node(size_t size, gfp_t flags, int node)
 {
 	return kvmalloc_node(size, flags | __GFP_ZERO, node);
 }
-static inline void *kvzalloc(size_t size, gfp_t flags)
+static inline __alloc_size(1) void *kvzalloc(size_t size, gfp_t flags)
 {
 	return kvmalloc(size, flags | __GFP_ZERO);
 }
 
-static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
 {
 	size_t bytes;
 
@@ -761,13 +761,13 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags)
 	return kvmalloc(bytes, flags);
 }
 
-static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
+static inline __alloc_size(1, 2) void *kvcalloc(size_t n, size_t size, gfp_t flags)
 {
 	return kvmalloc_array(n, size, flags | __GFP_ZERO);
 }
 
-extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
-		gfp_t flags);
+extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize, gfp_t flags)
+		      __alloc_size(3);
 extern void kvfree(const void *addr);
 extern void kvfree_sensitive(const void *addr, size_t len);
 
-- 
cgit v1.2.3


From 894f24bb569afd4fe4a874c636f82d47f1c9beed Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:34 -0700
Subject: mm/vmalloc: add __alloc_size attributes for better bounds checking

As already done in GrapheneOS, add the __alloc_size attribute for
appropriate vmalloc allocator interfaces, to provide additional hinting
for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other
compiler optimizations.

Link: https://lkml.kernel.org/r/20210930222704.2631604-7-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 671d402c3778..0ed56fc10c11 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -136,21 +136,21 @@ static inline void vmalloc_init(void)
 static inline unsigned long vmalloc_nr_pages(void) { return 0; }
 #endif
 
-extern void *vmalloc(unsigned long size);
-extern void *vzalloc(unsigned long size);
-extern void *vmalloc_user(unsigned long size);
-extern void *vmalloc_node(unsigned long size, int node);
-extern void *vzalloc_node(unsigned long size, int node);
-extern void *vmalloc_32(unsigned long size);
-extern void *vmalloc_32_user(unsigned long size);
-extern void *__vmalloc(unsigned long size, gfp_t gfp_mask);
+extern void *vmalloc(unsigned long size) __alloc_size(1);
+extern void *vzalloc(unsigned long size) __alloc_size(1);
+extern void *vmalloc_user(unsigned long size) __alloc_size(1);
+extern void *vmalloc_node(unsigned long size, int node) __alloc_size(1);
+extern void *vzalloc_node(unsigned long size, int node) __alloc_size(1);
+extern void *vmalloc_32(unsigned long size) __alloc_size(1);
+extern void *vmalloc_32_user(unsigned long size) __alloc_size(1);
+extern void *__vmalloc(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
 extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
-			const void *caller);
+			const void *caller) __alloc_size(1);
 void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
-		int node, const void *caller);
-void *vmalloc_no_huge(unsigned long size);
+		int node, const void *caller) __alloc_size(1);
+void *vmalloc_no_huge(unsigned long size) __alloc_size(1);
 
 extern void vfree(const void *addr);
 extern void vfree_atomic(const void *addr);
-- 
cgit v1.2.3


From abd58f38dfb4771ef06e25d71a84aa0932c52f1a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:38 -0700
Subject: mm/page_alloc: add __alloc_size attributes for better bounds checking

As already done in GrapheneOS, add the __alloc_size attribute for
appropriate page allocator interfaces, to provide additional hinting for
better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other
compiler optimizations.

Link: https://lkml.kernel.org/r/20210930222704.2631604-8-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 55b2ec1f965a..fbd4abc33f24 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -608,9 +608,9 @@ static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
-void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
+void *alloc_pages_exact(size_t size, gfp_t gfp_mask) __alloc_size(1);
 void free_pages_exact(void *virt, size_t size);
-void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
+__meminit void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) __alloc_size(1);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask), 0)
-- 
cgit v1.2.3


From 17197dd460469c6fca66e274f5cd51ee43bb6ddd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 5 Nov 2021 13:36:42 -0700
Subject: percpu: add __alloc_size attributes for better bounds checking

As already done in GrapheneOS, add the __alloc_size attribute for
appropriate percpu allocator interfaces, to provide additional hinting
for better bounds checking, assisting CONFIG_FORTIFY_SOURCE and other
compiler optimizations.

Note that due to the implementation of the percpu API, this is unlikely
to ever actually provide compile-time checking beyond very simple
non-SMP builds.  But, since they are technically allocators, mark them
as such.

Link: https://lkml.kernel.org/r/20210930222704.2631604-9-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Co-developed-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dwaipayan Ray <dwaipayanray1@gmail.com>
Cc: Joe Perches <joe@perches.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexandre Bounine <alex.bou9@gmail.com>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jing Xiangfeng <jingxiangfeng@huawei.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5e76af742c80..98a9371133f8 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -123,7 +123,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
-extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align) __alloc_size(1);
 extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr);
 extern bool is_kernel_percpu_address(unsigned long addr);
 
@@ -131,8 +131,8 @@ extern bool is_kernel_percpu_address(unsigned long addr);
 extern void __init setup_per_cpu_areas(void);
 #endif
 
-extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
-extern void __percpu *__alloc_percpu(size_t size, size_t align);
+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1);
+extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1);
 extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
-- 
cgit v1.2.3


From 0b3ea0926afb8dde70cfab00316ae0a70b93a7cc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Nov 2021 13:36:58 -0700
Subject: fs: explicitly unregister per-superblock BDIs

Add a new SB_I_ flag to mark superblocks that have an ephemeral bdi
associated with them, and unregister it when the superblock is shut
down.

Link: https://lkml.kernel.org/r/20211021124441.668816-4-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/super.c         | 3 +++
 include/linux/fs.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index bcef3a6f4c4b..3bfc0f8fbd5b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -476,6 +476,8 @@ void generic_shutdown_super(struct super_block *sb)
 	spin_unlock(&sb_lock);
 	up_write(&sb->s_umount);
 	if (sb->s_bdi != &noop_backing_dev_info) {
+		if (sb->s_iflags & SB_I_PERSB_BDI)
+			bdi_unregister(sb->s_bdi);
 		bdi_put(sb->s_bdi);
 		sb->s_bdi = &noop_backing_dev_info;
 	}
@@ -1562,6 +1564,7 @@ int super_setup_bdi_name(struct super_block *sb, char *fmt, ...)
 	}
 	WARN_ON(sb->s_bdi != &noop_backing_dev_info);
 	sb->s_bdi = bdi;
+	sb->s_iflags |= SB_I_PERSB_BDI;
 
 	return 0;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e7a633353fd2..226de651f52e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1443,6 +1443,7 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_I_UNTRUSTED_MOUNTER		0x00000040
 
 #define SB_I_SKIP_SYNC	0x00000100	/* Skip superblock at global sync */
+#define SB_I_PERSB_BDI	0x00000200	/* has a per-sb bdi */
 
 /* Possible states of 'frozen' field */
 enum {
-- 
cgit v1.2.3


From efee17134ca464639a2f5b4d036ce40caf1b247a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 5 Nov 2021 13:37:04 -0700
Subject: mm: simplify bdi refcounting

Move grabbing and releasing the bdi refcount out of the common
wb_init/wb_exit helpers into code that is only used for the non-default
memcg driven bdi_writeback structures.

[hch@lst.de: add comment]
  Link: https://lkml.kernel.org/r/20211027074207.GA12793@lst.de
[akpm@linux-foundation.org: fix typo]

Link: https://lkml.kernel.org/r/20211021124441.668816-6-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev-defs.h |  3 +++
 mm/backing-dev.c                 | 13 +++++--------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 33207004cfde..993c5628a726 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -103,6 +103,9 @@ struct wb_completion {
  * change as blkcg is disabled and enabled higher up in the hierarchy, a wb
  * is tested for blkcg after lookup and removed from index on mismatch so
  * that a new wb for the combination can be created.
+ *
+ * Each bdi_writeback that is not embedded into the backing_dev_info must hold
+ * a reference to the parent backing_dev_info.  See cgwb_create() for details.
  */
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 768e9ae489f6..5ccb25089808 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -291,8 +291,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 
 	memset(wb, 0, sizeof(*wb));
 
-	if (wb != &bdi->wb)
-		bdi_get(bdi);
 	wb->bdi = bdi;
 	wb->last_old_flush = jiffies;
 	INIT_LIST_HEAD(&wb->b_dirty);
@@ -316,7 +314,7 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
 
 	err = fprop_local_init_percpu(&wb->completions, gfp);
 	if (err)
-		goto out_put_bdi;
+		return err;
 
 	for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&wb->stat[i], 0, gfp);
@@ -330,9 +328,6 @@ out_destroy_stat:
 	while (i--)
 		percpu_counter_destroy(&wb->stat[i]);
 	fprop_local_destroy_percpu(&wb->completions);
-out_put_bdi:
-	if (wb != &bdi->wb)
-		bdi_put(bdi);
 	return err;
 }
 
@@ -373,8 +368,6 @@ static void wb_exit(struct bdi_writeback *wb)
 		percpu_counter_destroy(&wb->stat[i]);
 
 	fprop_local_destroy_percpu(&wb->completions);
-	if (wb != &wb->bdi->wb)
-		bdi_put(wb->bdi);
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -397,6 +390,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
 						release_work);
 	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
+	struct backing_dev_info *bdi = wb->bdi;
 
 	mutex_lock(&wb->bdi->cgwb_release_mutex);
 	wb_shutdown(wb);
@@ -416,6 +410,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
+	bdi_put(bdi);
 	WARN_ON_ONCE(!list_empty(&wb->b_attached));
 	kfree_rcu(wb, rcu);
 }
@@ -497,6 +492,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 	INIT_LIST_HEAD(&wb->b_attached);
 	INIT_WORK(&wb->release_work, cgwb_release_workfn);
 	set_bit(WB_registered, &wb->state);
+	bdi_get(bdi);
 
 	/*
 	 * The root wb determines the registered state of the whole bdi and
@@ -528,6 +524,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 	goto out_put;
 
 err_fprop_exit:
+	bdi_put(bdi);
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 err_ref_exit:
 	percpu_ref_exit(&wb->refcnt);
-- 
cgit v1.2.3


From e80216d9f1f5c90b3cab834cd4fb492d731f70aa Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Fri, 5 Nov 2021 13:37:56 -0700
Subject: mm: memcontrol: remove the kmem states

Now the kmem states is only used to indicate whether the kmem is
offline.  However, we can set ->kmemcg_id to -1 to indicate whether the
kmem is offline.  Finally, we can remove the kmem states to simplify the
code.

Link: https://lkml.kernel.org/r/20211025125259.56624-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 7 -------
 mm/memcontrol.c            | 7 ++-----
 2 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 3096c9a0ee01..f207f98bdb76 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -180,12 +180,6 @@ struct mem_cgroup_thresholds {
 	struct mem_cgroup_threshold_ary *spare;
 };
 
-enum memcg_kmem_state {
-	KMEM_NONE,
-	KMEM_ALLOCATED,
-	KMEM_ONLINE,
-};
-
 #if defined(CONFIG_SMP)
 struct memcg_padding {
 	char x[0];
@@ -318,7 +312,6 @@ struct mem_cgroup {
 
 #ifdef CONFIG_MEMCG_KMEM
 	int kmemcg_id;
-	enum memcg_kmem_state kmem_state;
 	struct obj_cgroup __rcu *objcg;
 	struct list_head objcg_list; /* list of inherited objcgs */
 #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1e2d3f378edd..b4a17b7a10d3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3626,7 +3626,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
 		return 0;
 
 	BUG_ON(memcg->kmemcg_id >= 0);
-	BUG_ON(memcg->kmem_state);
 
 	memcg_id = memcg_alloc_cache_id();
 	if (memcg_id < 0)
@@ -3643,7 +3642,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
 	static_branch_enable(&memcg_kmem_enabled_key);
 
 	memcg->kmemcg_id = memcg_id;
-	memcg->kmem_state = KMEM_ONLINE;
 
 	return 0;
 }
@@ -3653,11 +3651,9 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	struct mem_cgroup *parent;
 	int kmemcg_id;
 
-	if (memcg->kmem_state != KMEM_ONLINE)
+	if (memcg->kmemcg_id == -1)
 		return;
 
-	memcg->kmem_state = KMEM_ALLOCATED;
-
 	parent = parent_mem_cgroup(memcg);
 	if (!parent)
 		parent = root_mem_cgroup;
@@ -3676,6 +3672,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 	memcg_drain_all_list_lrus(kmemcg_id, parent);
 
 	memcg_free_cache_id(kmemcg_id);
+	memcg->kmemcg_id = -1;
 }
 #else
 static int memcg_online_kmem(struct mem_cgroup *memcg)
-- 
cgit v1.2.3


From f1dc0db296bd25960273649fc6ef2ecbf5aaa0e0 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Fri, 5 Nov 2021 13:38:15 -0700
Subject: mm: use __pfn_to_section() instead of open coding it

It is defined in the same file just a few lines above.

Link: https://lkml.kernel.org/r/4598487.Rc0NezkW7i@mobilepool36.emlix.com
Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 6a1d79d84675..832aa49d0d8e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1481,7 +1481,7 @@ static inline int pfn_valid(unsigned long pfn)
 
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
-	ms = __nr_to_section(pfn_to_section_nr(pfn));
+	ms = __pfn_to_section(pfn);
 	if (!valid_section(ms))
 		return 0;
 	/*
@@ -1496,7 +1496,7 @@ static inline int pfn_in_present_section(unsigned long pfn)
 {
 	if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
 		return 0;
-	return present_section(__nr_to_section(pfn_to_section_nr(pfn)));
+	return present_section(__pfn_to_section(pfn));
 }
 
 static inline unsigned long next_present_section_nr(unsigned long section_nr)
-- 
cgit v1.2.3


From 232a6a1c0619d1b4d9cd8d21949b2f13821be0af Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 5 Nov 2021 13:38:31 -0700
Subject: mm: drop first_index/last_index in zap_details

The first_index/last_index parameters in zap_details are actually only
used in unmap_mapping_range_tree().  At the meantime, this function is
only called by unmap_mapping_pages() once.

Instead of passing these two variables through the whole stack of page
zapping code, remove them from zap_details and let them simply be
parameters of unmap_mapping_range_tree(), which is inlined.

Link: https://lkml.kernel.org/r/20210915181535.11238-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Liam Howlett <liam.howlett@oracle.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 --
 mm/memory.c        | 31 ++++++++++++++++++-------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 32b2ecc47408..c5b600d7f85b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1688,8 +1688,6 @@ extern void user_shm_unlock(size_t, struct ucounts *);
  */
 struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
-	pgoff_t	first_index;			/* Lowest page->index to unmap */
-	pgoff_t last_index;			/* Highest page->index to unmap */
 	struct page *single_page;		/* Locked page to be unmapped */
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index cf7b059b57a7..d4ed701e3e5d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3321,20 +3321,20 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 }
 
 static inline void unmap_mapping_range_tree(struct rb_root_cached *root,
+					    pgoff_t first_index,
+					    pgoff_t last_index,
 					    struct zap_details *details)
 {
 	struct vm_area_struct *vma;
 	pgoff_t vba, vea, zba, zea;
 
-	vma_interval_tree_foreach(vma, root,
-			details->first_index, details->last_index) {
-
+	vma_interval_tree_foreach(vma, root, first_index, last_index) {
 		vba = vma->vm_pgoff;
 		vea = vba + vma_pages(vma) - 1;
-		zba = details->first_index;
+		zba = first_index;
 		if (zba < vba)
 			zba = vba;
-		zea = details->last_index;
+		zea = last_index;
 		if (zea > vea)
 			zea = vea;
 
@@ -3360,18 +3360,22 @@ void unmap_mapping_page(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct zap_details details = { };
+	pgoff_t	first_index;
+	pgoff_t	last_index;
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageTail(page));
 
+	first_index = page->index;
+	last_index = page->index + thp_nr_pages(page) - 1;
+
 	details.check_mapping = mapping;
-	details.first_index = page->index;
-	details.last_index = page->index + thp_nr_pages(page) - 1;
 	details.single_page = page;
 
 	i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-		unmap_mapping_range_tree(&mapping->i_mmap, &details);
+		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+					 last_index, &details);
 	i_mmap_unlock_write(mapping);
 }
 
@@ -3391,16 +3395,17 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 		pgoff_t nr, bool even_cows)
 {
 	struct zap_details details = { };
+	pgoff_t	first_index = start;
+	pgoff_t	last_index = start + nr - 1;
 
 	details.check_mapping = even_cows ? NULL : mapping;
-	details.first_index = start;
-	details.last_index = start + nr - 1;
-	if (details.last_index < details.first_index)
-		details.last_index = ULONG_MAX;
+	if (last_index < first_index)
+		last_index = ULONG_MAX;
 
 	i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)))
-		unmap_mapping_range_tree(&mapping->i_mmap, &details);
+		unmap_mapping_range_tree(&mapping->i_mmap, first_index,
+					 last_index, &details);
 	i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL_GPL(unmap_mapping_pages);
-- 
cgit v1.2.3


From 91b61ef333cf43f96b3522a086c9ac925763d6e5 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 5 Nov 2021 13:38:34 -0700
Subject: mm: add zap_skip_check_mapping() helper

Use the helper for the checks.  Rename "check_mapping" into
"zap_mapping" because "check_mapping" looks like a bool but in fact it
stores the mapping itself.  When it's set, we check the mapping (it must
be non-NULL).  When it's cleared we skip the check, which works like the
old way.

Move the duplicated comments to the helper too.

Link: https://lkml.kernel.org/r/20210915181538.11288-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Alistair Popple <apopple@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 16 +++++++++++++++-
 mm/memory.c        | 29 ++++++-----------------------
 2 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c5b600d7f85b..6e29deb1e0d4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1687,10 +1687,24 @@ extern void user_shm_unlock(size_t, struct ucounts *);
  * Parameter block passed down to zap_pte_range in exceptional cases.
  */
 struct zap_details {
-	struct address_space *check_mapping;	/* Check page->mapping if set */
+	struct address_space *zap_mapping;	/* Check page->mapping if set */
 	struct page *single_page;		/* Locked page to be unmapped */
 };
 
+/*
+ * We set details->zap_mappings when we want to unmap shared but keep private
+ * pages. Return true if skip zapping this page, false otherwise.
+ */
+static inline bool
+zap_skip_check_mapping(struct zap_details *details, struct page *page)
+{
+	if (!details || !page)
+		return false;
+
+	return details->zap_mapping &&
+	    (details->zap_mapping != page_rmapping(page));
+}
+
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 			     pte_t pte);
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index d4ed701e3e5d..48b2e048d267 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1333,16 +1333,8 @@ again:
 			struct page *page;
 
 			page = vm_normal_page(vma, addr, ptent);
-			if (unlikely(details) && page) {
-				/*
-				 * unmap_shared_mapping_pages() wants to
-				 * invalidate cache without truncating:
-				 * unmap shared but keep private pages.
-				 */
-				if (details->check_mapping &&
-				    details->check_mapping != page_rmapping(page))
-					continue;
-			}
+			if (unlikely(zap_skip_check_mapping(details, page)))
+				continue;
 			ptent = ptep_get_and_clear_full(mm, addr, pte,
 							tlb->fullmm);
 			tlb_remove_tlb_entry(tlb, pte, addr);
@@ -1375,17 +1367,8 @@ again:
 		    is_device_exclusive_entry(entry)) {
 			struct page *page = pfn_swap_entry_to_page(entry);
 
-			if (unlikely(details && details->check_mapping)) {
-				/*
-				 * unmap_shared_mapping_pages() wants to
-				 * invalidate cache without truncating:
-				 * unmap shared but keep private pages.
-				 */
-				if (details->check_mapping !=
-				    page_rmapping(page))
-					continue;
-			}
-
+			if (unlikely(zap_skip_check_mapping(details, page)))
+				continue;
 			pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 			rss[mm_counter(page)]--;
 
@@ -3369,7 +3352,7 @@ void unmap_mapping_page(struct page *page)
 	first_index = page->index;
 	last_index = page->index + thp_nr_pages(page) - 1;
 
-	details.check_mapping = mapping;
+	details.zap_mapping = mapping;
 	details.single_page = page;
 
 	i_mmap_lock_write(mapping);
@@ -3398,7 +3381,7 @@ void unmap_mapping_pages(struct address_space *mapping, pgoff_t start,
 	pgoff_t	first_index = start;
 	pgoff_t	last_index = start + nr - 1;
 
-	details.check_mapping = even_cows ? NULL : mapping;
+	details.zap_mapping = even_cows ? NULL : mapping;
 	if (last_index < first_index)
 		last_index = ULONG_MAX;
 
-- 
cgit v1.2.3


From e26e0cc30b48cad5f2704f6a22fa5cac9fdaaece Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Fri, 5 Nov 2021 13:39:00 -0700
Subject: memory: remove unused CONFIG_MEM_BLOCK_SIZE

Commit 3947be1969a9 ("[PATCH] memory hotplug: sysfs and add/remove
functions") defines CONFIG_MEM_BLOCK_SIZE, but this has never been
utilized anywhere.

It is a good practice to keep the CONFIG_* defines exclusively for the
Kbuild system.  So, drop this unused definition.

This issue was noticed due to running ./scripts/checkkconfigsymbols.py.

Link: https://lkml.kernel.org/r/20211006120354.7468-1-lukas.bulwahn@gmail.com
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index 182c606adb06..053a530c7bdd 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -140,7 +140,6 @@ typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
 extern int walk_memory_blocks(unsigned long start, unsigned long size,
 			      void *arg, walk_memory_blocks_func_t func);
 extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
-#define CONFIG_MEM_BLOCK_SIZE	(PAGES_PER_SECTION<<PAGE_SHIFT)
 
 extern int memory_group_register_static(int nid, unsigned long max_pages);
 extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
-- 
cgit v1.2.3


From 2e86f78b117a019881a420705a9d0ef126253083 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Fri, 5 Nov 2021 13:39:10 -0700
Subject: include/linux/io-mapping.h: remove fallback for writecombine

The fallback was introduced in commit 80c33624e472 ("io-mapping: Fixup
for different names of writecombine") to fix the build on microblaze.

5 years later, it seems all archs now provide a pgprot_writecombine(),
so just remove the other possible fallbacks.  For microblaze,
pgprot_writecombine() is available since commit 97ccedd793ac
("microblaze: Provide pgprot_device/writecombine macros for nommu").

This is build-tested on microblaze with a hack to always build
mm/io-mapping.o and without DIYing on an x86-only macro
(_PAGE_CACHE_MASK)

Link: https://lkml.kernel.org/r/20211020204838.1142908-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/io-mapping.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index e9743cfd8585..66a774d2710e 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -132,13 +132,7 @@ io_mapping_init_wc(struct io_mapping *iomap,
 
 	iomap->base = base;
 	iomap->size = size;
-#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */
-	iomap->prot = pgprot_noncached_wc(PAGE_KERNEL);
-#elif defined(pgprot_writecombine)
 	iomap->prot = pgprot_writecombine(PAGE_KERNEL);
-#else
-	iomap->prot = pgprot_noncached(PAGE_KERNEL);
-#endif
 
 	return iomap;
 }
-- 
cgit v1.2.3


From bd1a8fb2d43f7c293383f76691d7a55f7f89d9da Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 5 Nov 2021 13:39:22 -0700
Subject: mm/vmalloc: don't allow VM_NO_GUARD on vmap()

The vmalloc guard pages are added on top of each allocation, thereby
isolating any two allocations from one another.  The top guard of the
lower allocation is the bottom guard guard of the higher allocation etc.

Therefore VM_NO_GUARD is dangerous; it breaks the basic premise of
isolating separate allocations.

There are only two in-tree users of this flag, neither of which use it
through the exported interface.  Ensure it stays this way.

Link: https://lkml.kernel.org/r/YUMfdA36fuyZ+/xt@hirez.programming.kicks-ass.net
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Hildenbrand <david@redhat.com>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmalloc.h | 2 +-
 mm/vmalloc.c            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 0ed56fc10c11..6e022cc712e6 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -22,7 +22,7 @@ struct notifier_block;		/* in notifier.h */
 #define VM_USERMAP		0x00000008	/* suitable for remap_vmalloc_range */
 #define VM_DMA_COHERENT		0x00000010	/* dma_alloc_coherent */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
-#define VM_NO_GUARD		0x00000040      /* don't add guard page */
+#define VM_NO_GUARD		0x00000040      /* ***DANGEROUS*** don't add guard page */
 #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
 #define VM_FLUSH_RESET_PERMS	0x00000100	/* reset direct map and flush TLB on unmap, can't be freed in atomic context */
 #define VM_MAP_PUT_PAGES	0x00000200	/* put pages and free array in vfree */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f43c88fa08cf..4a11abd9e70f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2743,6 +2743,13 @@ void *vmap(struct page **pages, unsigned int count,
 
 	might_sleep();
 
+	/*
+	 * Your top guard is someone else's bottom guard. Not having a top
+	 * guard compromises someone else's mappings too.
+	 */
+	if (WARN_ON_ONCE(flags & VM_NO_GUARD))
+		flags &= ~VM_NO_GUARD;
+
 	if (count > totalram_pages())
 		return NULL;
 
-- 
cgit v1.2.3


From 3252b1d8309ea42bc6329d9341072ecf1c9505c0 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 5 Nov 2021 13:39:47 -0700
Subject: kasan: arm64: fix pcpu_page_first_chunk crash with KASAN_VMALLOC

With KASAN_VMALLOC and NEED_PER_CPU_PAGE_FIRST_CHUNK the kernel crashes:

  Unable to handle kernel paging request at virtual address ffff7000028f2000
  ...
  swapper pgtable: 64k pages, 48-bit VAs, pgdp=0000000042440000
  [ffff7000028f2000] pgd=000000063e7c0003, p4d=000000063e7c0003, pud=000000063e7c0003, pmd=000000063e7b0003, pte=0000000000000000
  Internal error: Oops: 96000007 [#1] PREEMPT SMP
  Modules linked in:
  CPU: 0 PID: 0 Comm: swapper Not tainted 5.13.0-rc4-00003-gc6e6e28f3f30-dirty #62
  Hardware name: linux,dummy-virt (DT)
  pstate: 200000c5 (nzCv daIF -PAN -UAO -TCO BTYPE=--)
  pc : kasan_check_range+0x90/0x1a0
  lr : memcpy+0x88/0xf4
  sp : ffff80001378fe20
  ...
  Call trace:
   kasan_check_range+0x90/0x1a0
   pcpu_page_first_chunk+0x3f0/0x568
   setup_per_cpu_areas+0xb8/0x184
   start_kernel+0x8c/0x328

The vm area used in vm_area_register_early() has no kasan shadow memory,
Let's add a new kasan_populate_early_vm_area_shadow() function to
populate the vm area shadow memory to fix the issue.

[wangkefeng.wang@huawei.com: fix redefinition of 'kasan_populate_early_vm_area_shadow']
  Link: https://lkml.kernel.org/r/20211011123211.3936196-1-wangkefeng.wang@huawei.com

Link: https://lkml.kernel.org/r/20210910053354.26721-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Marco Elver <elver@google.com>		[KASAN]
Acked-by: Andrey Konovalov <andreyknvl@gmail.com>	[KASAN]
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/mm/kasan_init.c | 16 ++++++++++++++++
 include/linux/kasan.h      |  6 ++++++
 mm/kasan/shadow.c          |  5 +++++
 mm/vmalloc.c               |  1 +
 4 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 61b52a92b8b6..5b996ca4d996 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -287,6 +287,22 @@ static void __init kasan_init_depth(void)
 	init_task.kasan_depth = 0;
 }
 
+#ifdef CONFIG_KASAN_VMALLOC
+void __init kasan_populate_early_vm_area_shadow(void *start, unsigned long size)
+{
+	unsigned long shadow_start, shadow_end;
+
+	if (!is_vmalloc_or_module_addr(start))
+		return;
+
+	shadow_start = (unsigned long)kasan_mem_to_shadow(start);
+	shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
+	shadow_end = (unsigned long)kasan_mem_to_shadow(start + size);
+	shadow_end = ALIGN(shadow_end, PAGE_SIZE);
+	kasan_map_populate(shadow_start, shadow_end, NUMA_NO_NODE);
+}
+#endif
+
 void __init kasan_init(void)
 {
 	kasan_init_shadow();
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 736d7b458996..65487a3b2a71 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -436,6 +436,8 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
 			   unsigned long free_region_start,
 			   unsigned long free_region_end);
 
+void kasan_populate_early_vm_area_shadow(void *start, unsigned long size);
+
 #else /* CONFIG_KASAN_VMALLOC */
 
 static inline int kasan_populate_vmalloc(unsigned long start,
@@ -453,6 +455,10 @@ static inline void kasan_release_vmalloc(unsigned long start,
 					 unsigned long free_region_start,
 					 unsigned long free_region_end) {}
 
+static inline void kasan_populate_early_vm_area_shadow(void *start,
+						       unsigned long size)
+{ }
+
 #endif /* CONFIG_KASAN_VMALLOC */
 
 #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 8d95ee52d019..4a4929b29a23 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -254,6 +254,11 @@ core_initcall(kasan_memhotplug_init);
 
 #ifdef CONFIG_KASAN_VMALLOC
 
+void __init __weak kasan_populate_early_vm_area_shadow(void *start,
+						       unsigned long size)
+{
+}
+
 static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
 				      void *unused)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 74da45a8beec..75fa100f5424 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2291,6 +2291,7 @@ void __init vm_area_register_early(struct vm_struct *vm, size_t align)
 	vm->addr = (void *)addr;
 	vm->next = *p;
 	*p = vm;
+	kasan_populate_early_vm_area_shadow(vm->addr, vm->size);
 }
 
 static void vmap_init_free_space(void)
-- 
cgit v1.2.3


From c00b6b9610991c042ff4c3153daaa3ea8522c210 Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Fri, 5 Nov 2021 13:39:53 -0700
Subject: mm/vmalloc: introduce alloc_pages_bulk_array_mempolicy to accelerate
 memory allocation

Commit ffb29b1c255a ("mm/vmalloc: fix numa spreading for large hash
tables") can cause significant performance regressions in some
situations as Andrew mentioned in [1].  The main situation is vmalloc,
vmalloc will allocate pages with NUMA_NO_NODE by default, that will
result in alloc page one by one;

In order to solve this, __alloc_pages_bulk and mempolicy should be
considered at the same time.

1) If node is specified in memory allocation request, it will alloc all
   pages by __alloc_pages_bulk.

2) If interleaving allocate memory, it will cauculate how many pages
   should be allocated in each node, and use __alloc_pages_bulk to alloc
   pages in each node.

[1]: https://lore.kernel.org/lkml/CALvZod4G3SzP3kWxQYn0fj+VgG-G3yWXz=gz17+3N57ru1iajw@mail.gmail.com/t/#m750c8e3231206134293b089feaa090590afa0f60

[akpm@linux-foundation.org: coding style fixes]
[akpm@linux-foundation.org: make two functions static]
[akpm@linux-foundation.org: fix CONFIG_NUMA=n build]

Link: https://lkml.kernel.org/r/20211021080744.874701-3-chenwandun@huawei.com
Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  4 +++
 mm/mempolicy.c      | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/vmalloc.c        | 20 ++++++++++---
 3 files changed, 102 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fbd4abc33f24..c1b262725dca 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -535,6 +535,10 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 				struct list_head *page_list,
 				struct page **page_array);
 
+unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+				unsigned long nr_pages,
+				struct page **page_array);
+
 /* Bulk allocate order-0 pages */
 static inline unsigned long
 alloc_pages_bulk_list(gfp_t gfp, unsigned long nr_pages, struct list_head *list)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d12e0608fced..ce722333d9f6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2196,6 +2196,88 @@ struct page *alloc_pages(gfp_t gfp, unsigned order)
 }
 EXPORT_SYMBOL(alloc_pages);
 
+static unsigned long alloc_pages_bulk_array_interleave(gfp_t gfp,
+		struct mempolicy *pol, unsigned long nr_pages,
+		struct page **page_array)
+{
+	int nodes;
+	unsigned long nr_pages_per_node;
+	int delta;
+	int i;
+	unsigned long nr_allocated;
+	unsigned long total_allocated = 0;
+
+	nodes = nodes_weight(pol->nodes);
+	nr_pages_per_node = nr_pages / nodes;
+	delta = nr_pages - nodes * nr_pages_per_node;
+
+	for (i = 0; i < nodes; i++) {
+		if (delta) {
+			nr_allocated = __alloc_pages_bulk(gfp,
+					interleave_nodes(pol), NULL,
+					nr_pages_per_node + 1, NULL,
+					page_array);
+			delta--;
+		} else {
+			nr_allocated = __alloc_pages_bulk(gfp,
+					interleave_nodes(pol), NULL,
+					nr_pages_per_node, NULL, page_array);
+		}
+
+		page_array += nr_allocated;
+		total_allocated += nr_allocated;
+	}
+
+	return total_allocated;
+}
+
+static unsigned long alloc_pages_bulk_array_preferred_many(gfp_t gfp, int nid,
+		struct mempolicy *pol, unsigned long nr_pages,
+		struct page **page_array)
+{
+	gfp_t preferred_gfp;
+	unsigned long nr_allocated = 0;
+
+	preferred_gfp = gfp | __GFP_NOWARN;
+	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+
+	nr_allocated  = __alloc_pages_bulk(preferred_gfp, nid, &pol->nodes,
+					   nr_pages, NULL, page_array);
+
+	if (nr_allocated < nr_pages)
+		nr_allocated += __alloc_pages_bulk(gfp, numa_node_id(), NULL,
+				nr_pages - nr_allocated, NULL,
+				page_array + nr_allocated);
+	return nr_allocated;
+}
+
+/* alloc pages bulk and mempolicy should be considered at the
+ * same time in some situation such as vmalloc.
+ *
+ * It can accelerate memory allocation especially interleaving
+ * allocate memory.
+ */
+unsigned long alloc_pages_bulk_array_mempolicy(gfp_t gfp,
+		unsigned long nr_pages, struct page **page_array)
+{
+	struct mempolicy *pol = &default_policy;
+
+	if (!in_interrupt() && !(gfp & __GFP_THISNODE))
+		pol = get_task_policy(current);
+
+	if (pol->mode == MPOL_INTERLEAVE)
+		return alloc_pages_bulk_array_interleave(gfp, pol,
+							 nr_pages, page_array);
+
+	if (pol->mode == MPOL_PREFERRED_MANY)
+		return alloc_pages_bulk_array_preferred_many(gfp,
+				numa_node_id(), pol, nr_pages, page_array);
+
+	return __alloc_pages_bulk(gfp, policy_node(gfp, pol, numa_node_id()),
+				  policy_nodemask(gfp, pol), nr_pages, NULL,
+				  page_array);
+}
+
 int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
 {
 	struct mempolicy *pol = mpol_dup(vma_policy(src));
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c56720136c45..d2a00ad4e1dd 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2843,7 +2843,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 	 * to fails, fallback to a single page allocator that is
 	 * more permissive.
 	 */
-	if (!order && nid != NUMA_NO_NODE) {
+	if (!order) {
 		while (nr_allocated < nr_pages) {
 			unsigned int nr, nr_pages_request;
 
@@ -2855,8 +2855,20 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			 */
 			nr_pages_request = min(100U, nr_pages - nr_allocated);
 
-			nr = alloc_pages_bulk_array_node(gfp, nid,
-				nr_pages_request, pages + nr_allocated);
+			/* memory allocation should consider mempolicy, we can't
+			 * wrongly use nearest node when nid == NUMA_NO_NODE,
+			 * otherwise memory may be allocated in only one node,
+			 * but mempolcy want to alloc memory by interleaving.
+			 */
+			if (IS_ENABLED(CONFIG_NUMA) && nid == NUMA_NO_NODE)
+				nr = alloc_pages_bulk_array_mempolicy(gfp,
+							nr_pages_request,
+							pages + nr_allocated);
+
+			else
+				nr = alloc_pages_bulk_array_node(gfp, nid,
+							nr_pages_request,
+							pages + nr_allocated);
 
 			nr_allocated += nr;
 			cond_resched();
@@ -2868,7 +2880,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
 			if (nr != nr_pages_request)
 				break;
 		}
-	} else if (order)
+	} else
 		/*
 		 * Compound pages required for remap_vmalloc_page if
 		 * high-order pages.
-- 
cgit v1.2.3


From 8ca1b5a49885f0c0c486544da46a9e0ac790831d Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 5 Nov 2021 13:40:34 -0700
Subject: mm/page_alloc: detect allocation forbidden by cpuset and bail out
 early

There was a report that starting an Ubuntu in docker while using cpuset
to bind it to movable nodes (a node only has movable zone, like a node
for hotplug or a Persistent Memory node in normal usage) will fail due
to memory allocation failure, and then OOM is involved and many other
innocent processes got killed.

It can be reproduced with command:

    $ docker run -it --rm --cpuset-mems 4 ubuntu:latest bash -c "grep Mems_allowed /proc/self/status"

(where node 4 is a movable node)

  runc:[2:INIT] invoked oom-killer: gfp_mask=0x500cc2(GFP_HIGHUSER|__GFP_ACCOUNT), order=0, oom_score_adj=0
  CPU: 8 PID: 8291 Comm: runc:[2:INIT] Tainted: G        W I E     5.8.2-0.g71b519a-default #1 openSUSE Tumbleweed (unreleased)
  Hardware name: Dell Inc. PowerEdge R640/0PHYDR, BIOS 2.6.4 04/09/2020
  Call Trace:
   dump_stack+0x6b/0x88
   dump_header+0x4a/0x1e2
   oom_kill_process.cold+0xb/0x10
   out_of_memory.part.0+0xaf/0x230
   out_of_memory+0x3d/0x80
   __alloc_pages_slowpath.constprop.0+0x954/0xa20
   __alloc_pages_nodemask+0x2d3/0x300
   pipe_write+0x322/0x590
   new_sync_write+0x196/0x1b0
   vfs_write+0x1c3/0x1f0
   ksys_write+0xa7/0xe0
   do_syscall_64+0x52/0xd0
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

  Mem-Info:
  active_anon:392832 inactive_anon:182 isolated_anon:0
   active_file:68130 inactive_file:151527 isolated_file:0
   unevictable:2701 dirty:0 writeback:7
   slab_reclaimable:51418 slab_unreclaimable:116300
   mapped:45825 shmem:735 pagetables:2540 bounce:0
   free:159849484 free_pcp:73 free_cma:0
  Node 4 active_anon:1448kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB isolated(anon):0kB isolated(file):0kB mapped:0kB dirty:0kB writeback:0kB shmem:0kB shmem_thp: 0kB shmem_pmdmapped: 0kB anon_thp: 0kB writeback_tmp:0kB all_unreclaimable? no
  Node 4 Movable free:130021408kB min:9140kB low:139160kB high:269180kB reserved_highatomic:0KB active_anon:1448kB inactive_anon:0kB active_file:0kB inactive_file:0kB unevictable:0kB writepending:0kB present:130023424kB managed:130023424kB mlocked:0kB kernel_stack:0kB pagetables:0kB bounce:0kB free_pcp:292kB local_pcp:84kB free_cma:0kB
  lowmem_reserve[]: 0 0 0 0 0
  Node 4 Movable: 1*4kB (M) 0*8kB 0*16kB 1*32kB (M) 0*64kB 0*128kB 1*256kB (M) 1*512kB (M) 1*1024kB (M) 0*2048kB 31743*4096kB (M) = 130021156kB

  oom-kill:constraint=CONSTRAINT_CPUSET,nodemask=(null),cpuset=docker-9976a269caec812c134fa317f27487ee36e1129beba7278a463dd53e5fb9997b.scope,mems_allowed=4,global_oom,task_memcg=/system.slice/containerd.service,task=containerd,pid=4100,uid=0
  Out of memory: Killed process 4100 (containerd) total-vm:4077036kB, anon-rss:51184kB, file-rss:26016kB, shmem-rss:0kB, UID:0 pgtables:676kB oom_score_adj:0
  oom_reaper: reaped process 8248 (docker), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
  oom_reaper: reaped process 2054 (node_exporter), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
  oom_reaper: reaped process 1452 (systemd-journal), now anon-rss:0kB, file-rss:8564kB, shmem-rss:4kB
  oom_reaper: reaped process 2146 (munin-node), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB
  oom_reaper: reaped process 8291 (runc:[2:INIT]), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB

The reason is that in this case, the target cpuset nodes only have
movable zone, while the creation of an OS in docker sometimes needs to
allocate memory in non-movable zones (dma/dma32/normal) like
GFP_HIGHUSER, and the cpuset limit forbids the allocation, then
out-of-memory killing is involved even when normal nodes and movable
nodes both have many free memory.

The OOM killer cannot help to resolve the situation as there is no
usable memory for the request in the cpuset scope.  The only reasonable
measure to take is to fail the allocation right away and have the caller
to deal with it.

So add a check for cases like this in the slowpath of allocation, and
bail out early returning NULL for the allocation.

As page allocation is one of the hottest path in kernel, this check will
hurt all users with sane cpuset configuration, add a static branch check
and detect the abnormal config in cpuset memory binding setup so that
the extra check cost in page allocation is not paid by everyone.

[thanks to Micho Hocko and David Rientjes for suggesting not handling
 it inside OOM code, adding cpuset check, refining comments]

Link: https://lkml.kernel.org/r/1632481657-68112-1-git-send-email-feng.tang@intel.com
Signed-off-by: Feng Tang <feng.tang@intel.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan.x@bytedance.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpuset.h | 17 +++++++++++++++++
 include/linux/mmzone.h | 22 ++++++++++++++++++++++
 kernel/cgroup/cpuset.c | 23 +++++++++++++++++++++++
 mm/page_alloc.c        | 13 +++++++++++++
 4 files changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index d2b9c41c8edf..d58e0476ee8e 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -34,6 +34,8 @@
  */
 extern struct static_key_false cpusets_pre_enable_key;
 extern struct static_key_false cpusets_enabled_key;
+extern struct static_key_false cpusets_insane_config_key;
+
 static inline bool cpusets_enabled(void)
 {
 	return static_branch_unlikely(&cpusets_enabled_key);
@@ -51,6 +53,19 @@ static inline void cpuset_dec(void)
 	static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
 }
 
+/*
+ * This will get enabled whenever a cpuset configuration is considered
+ * unsupportable in general. E.g. movable only node which cannot satisfy
+ * any non movable allocations (see update_nodemask). Page allocator
+ * needs to make additional checks for those configurations and this
+ * check is meant to guard those checks without any overhead for sane
+ * configurations.
+ */
+static inline bool cpusets_insane_config(void)
+{
+	return static_branch_unlikely(&cpusets_insane_config_key);
+}
+
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
@@ -167,6 +182,8 @@ static inline void set_mems_allowed(nodemask_t nodemask)
 
 static inline bool cpusets_enabled(void) { return false; }
 
+static inline bool cpusets_insane_config(void) { return false; }
+
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 832aa49d0d8e..fb36a29e3aae 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1220,6 +1220,28 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
 #define for_each_zone_zonelist(zone, z, zlist, highidx) \
 	for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, NULL)
 
+/* Whether the 'nodes' are all movable nodes */
+static inline bool movable_only_nodes(nodemask_t *nodes)
+{
+	struct zonelist *zonelist;
+	struct zoneref *z;
+	int nid;
+
+	if (nodes_empty(*nodes))
+		return false;
+
+	/*
+	 * We can chose arbitrary node from the nodemask to get a
+	 * zonelist as they are interlinked. We just need to find
+	 * at least one zone that can satisfy kernel allocations.
+	 */
+	nid = first_node(*nodes);
+	zonelist = &NODE_DATA(nid)->node_zonelists[ZONELIST_FALLBACK];
+	z = first_zones_zonelist(zonelist, ZONE_NORMAL,	nodes);
+	return (!z->zone) ? true : false;
+}
+
+
 #ifdef CONFIG_SPARSEMEM
 #include <asm/sparsemem.h>
 #endif
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 2a9695ccb65f..d0e163a02099 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -69,6 +69,13 @@
 DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
 DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
 
+/*
+ * There could be abnormal cpuset configurations for cpu or memory
+ * node binding, add this key to provide a quick low-cost judgement
+ * of the situation.
+ */
+DEFINE_STATIC_KEY_FALSE(cpusets_insane_config_key);
+
 /* See "Frequency meter" comments, below. */
 
 struct fmeter {
@@ -372,6 +379,17 @@ static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
 
 static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
 
+static inline void check_insane_mems_config(nodemask_t *nodes)
+{
+	if (!cpusets_insane_config() &&
+		movable_only_nodes(nodes)) {
+		static_branch_enable(&cpusets_insane_config_key);
+		pr_info("Unsupported (movable nodes only) cpuset configuration detected (nmask=%*pbl)!\n"
+			"Cpuset allocations might fail even with a lot of memory available.\n",
+			nodemask_pr_args(nodes));
+	}
+}
+
 /*
  * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
  * on default hierarchy or when the cpuset_v2_mode flag is set by mounting
@@ -1870,6 +1888,8 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 	if (retval < 0)
 		goto done;
 
+	check_insane_mems_config(&trialcs->mems_allowed);
+
 	spin_lock_irq(&callback_lock);
 	cs->mems_allowed = trialcs->mems_allowed;
 	spin_unlock_irq(&callback_lock);
@@ -3173,6 +3193,9 @@ update_tasks:
 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
 	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
 
+	if (mems_updated)
+		check_insane_mems_config(&new_mems);
+
 	if (is_in_v2_mode())
 		hotplug_update_tasks(cs, &new_cpus, &new_mems,
 				     cpus_updated, mems_updated);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e891560e0a80..e493d7da2614 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4910,6 +4910,19 @@ retry_cpuset:
 	if (!ac->preferred_zoneref->zone)
 		goto nopage;
 
+	/*
+	 * Check for insane configurations where the cpuset doesn't contain
+	 * any suitable zone to satisfy the request - e.g. non-movable
+	 * GFP_HIGHUSER allocations from MOVABLE nodes only.
+	 */
+	if (cpusets_insane_config() && (gfp_mask & __GFP_HARDWALL)) {
+		struct zoneref *z = first_zones_zonelist(ac->zonelist,
+					ac->highest_zoneidx,
+					&cpuset_current_mems_allowed);
+		if (!z->zone)
+			goto nopage;
+	}
+
 	if (alloc_flags & ALLOC_KSWAPD)
 		wake_all_kswapds(order, gfp_mask, ac);
 
-- 
cgit v1.2.3


From d2635f2012a44e3d469ab9a4022162dbe0e53f21 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 5 Nov 2021 13:40:40 -0700
Subject: mm: create a new system state and fix core_kernel_text()

core_kernel_text() considers that until system_state in at least
SYSTEM_RUNNING, init memory is valid.

But init memory is freed a few lines before setting SYSTEM_RUNNING, so
we have a small period of time when core_kernel_text() is wrong.

Create an intermediate system state called SYSTEM_FREEING_INIT that is
set before starting freeing init memory, and use it in
core_kernel_text() to report init memory invalid earlier.

Link: https://lkml.kernel.org/r/9ecfdee7dd4d741d172cb93ff1d87f1c58127c9a.1633001016.git.christophe.leroy@csgroup.eu
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 1 +
 init/main.c            | 2 ++
 kernel/extable.c       | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2776423a587e..471bc0593679 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -248,6 +248,7 @@ extern bool early_boot_irqs_disabled;
 extern enum system_states {
 	SYSTEM_BOOTING,
 	SYSTEM_SCHEDULING,
+	SYSTEM_FREEING_INITMEM,
 	SYSTEM_RUNNING,
 	SYSTEM_HALT,
 	SYSTEM_POWER_OFF,
diff --git a/init/main.c b/init/main.c
index 3c4054a95545..767ee2672176 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1506,6 +1506,8 @@ static int __ref kernel_init(void *unused)
 	kernel_init_freeable();
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
+
+	system_state = SYSTEM_FREEING_INITMEM;
 	kprobe_free_init_mem();
 	ftrace_free_init_mem();
 	kgdb_free_init_mem();
diff --git a/kernel/extable.c b/kernel/extable.c
index b0ea5eb0c3b4..290661f68e6b 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -76,7 +76,7 @@ int notrace core_kernel_text(unsigned long addr)
 	    addr < (unsigned long)_etext)
 		return 1;
 
-	if (system_state < SYSTEM_RUNNING &&
+	if (system_state < SYSTEM_FREEING_INITMEM &&
 	    init_kernel_text(addr))
 		return 1;
 	return 0;
-- 
cgit v1.2.3


From 477d01fce8dacb41cae9363136ea56812e8baa59 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:40:58 -0700
Subject: mm: fix data race in PagePoisoned()

PagePoisoned() accesses page->flags which can be updated concurrently:

  | BUG: KCSAN: data-race in next_uptodate_page / unlock_page
  |
  | write (marked) to 0xffffea00050f37c0 of 8 bytes by task 1872 on cpu 1:
  |  instrument_atomic_write           include/linux/instrumented.h:87 [inline]
  |  clear_bit_unlock_is_negative_byte include/asm-generic/bitops/instrumented-lock.h:74 [inline]
  |  unlock_page+0x102/0x1b0           mm/filemap.c:1465
  |  filemap_map_pages+0x6c6/0x890     mm/filemap.c:3057
  |  ...
  | read to 0xffffea00050f37c0 of 8 bytes by task 1873 on cpu 0:
  |  PagePoisoned                   include/linux/page-flags.h:204 [inline]
  |  PageReadahead                  include/linux/page-flags.h:382 [inline]
  |  next_uptodate_page+0x456/0x830 mm/filemap.c:2975
  |  ...
  | CPU: 0 PID: 1873 Comm: systemd-udevd Not tainted 5.11.0-rc4-00001-gf9ce0be71d1f #1

To avoid the compiler tearing or otherwise optimizing the access, use
READ_ONCE() to access flags.

Link: https://lore.kernel.org/all/20210826144157.GA26950@xsang-OptiPlex-9020/
Link: https://lkml.kernel.org/r/20210913113542.2658064-1-elver@google.com
Reported-by: kernel test robot <oliver.sang@intel.com>
Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index fbfd3fad48f2..dd80cf0bb579 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -215,7 +215,7 @@ static __always_inline int PageCompound(struct page *page)
 #define	PAGE_POISON_PATTERN	-1l
 static inline int PagePoisoned(const struct page *page)
 {
-	return page->flags == PAGE_POISON_PATTERN;
+	return READ_ONCE(page->flags) == PAGE_POISON_PATTERN;
 }
 
 #ifdef CONFIG_DEBUG_VM
-- 
cgit v1.2.3


From 73c54763482b841d9c14bad87eec98f80f700e0b Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 5 Nov 2021 13:41:17 -0700
Subject: mm/hugetlb: drop __unmap_hugepage_range definition from hugetlb.h

Remove __unmap_hugepage_range() from the header file, because it is only
used in hugetlb.c.

Link: https://lkml.kernel.org/r/20210917165108.9341-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ----------
 mm/hugetlb.c            |  6 +++---
 2 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1faebe1cd0ed..3cbf60464398 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -143,9 +143,6 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 			  struct vm_area_struct *vma,
 			  unsigned long start, unsigned long end,
 			  struct page *ref_page);
-void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
-				unsigned long start, unsigned long end,
-				struct page *ref_page);
 void hugetlb_report_meminfo(struct seq_file *);
 int hugetlb_report_node_meminfo(char *buf, int len, int nid);
 void hugetlb_show_meminfo(void);
@@ -385,13 +382,6 @@ static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 	BUG();
 }
 
-static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
-			struct vm_area_struct *vma, unsigned long start,
-			unsigned long end, struct page *ref_page)
-{
-	BUG();
-}
-
 static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
 			struct vm_area_struct *vma, unsigned long address,
 			unsigned int flags)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 95dc7b83381f..d394d9545c4e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4426,9 +4426,9 @@ again:
 	return ret;
 }
 
-void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
-			    unsigned long start, unsigned long end,
-			    struct page *ref_page)
+static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+				   unsigned long start, unsigned long end,
+				   struct page *ref_page)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
-- 
cgit v1.2.3


From 79dfc695525f60c8410015362b8aa4eb5c57210c Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Nov 2021 13:41:20 -0700
Subject: hugetlb: add demote hugetlb page sysfs interfaces

Patch series "hugetlb: add demote/split page functionality", v4.

The concurrent use of multiple hugetlb page sizes on a single system is
becoming more common.  One of the reasons is better TLB support for
gigantic page sizes on x86 hardware.  In addition, hugetlb pages are
being used to back VMs in hosting environments.

When using hugetlb pages to back VMs, it is often desirable to
preallocate hugetlb pools.  This avoids the delay and uncertainty of
allocating hugetlb pages at VM startup.  In addition, preallocating huge
pages minimizes the issue of memory fragmentation that increases the
longer the system is up and running.

In such environments, a combination of larger and smaller hugetlb pages
are preallocated in anticipation of backing VMs of various sizes.  Over
time, the preallocated pool of smaller hugetlb pages may become depleted
while larger hugetlb pages still remain.  In such situations, it is
desirable to convert larger hugetlb pages to smaller hugetlb pages.

Converting larger to smaller hugetlb pages can be accomplished today by
first freeing the larger page to the buddy allocator and then allocating
the smaller pages.  For example, to convert 50 GB pages on x86:

  gb_pages=`cat .../hugepages-1048576kB/nr_hugepages`
  m2_pages=`cat .../hugepages-2048kB/nr_hugepages`
  echo $(($gb_pages - 50)) > .../hugepages-1048576kB/nr_hugepages
  echo $(($m2_pages + 25600)) > .../hugepages-2048kB/nr_hugepages

On an idle system this operation is fairly reliable and results are as
expected.  The number of 2MB pages is increased as expected and the time
of the operation is a second or two.

However, when there is activity on the system the following issues
arise:

1) This process can take quite some time, especially if allocation of
   the smaller pages is not immediate and requires migration/compaction.

2) There is no guarantee that the total size of smaller pages allocated
   will match the size of the larger page which was freed. This is
   because the area freed by the larger page could quickly be
   fragmented.

In a test environment with a load that continually fills the page cache
with clean pages, results such as the following can be observed:

  Unexpected number of 2MB pages allocated: Expected 25600, have 19944
  real    0m42.092s
  user    0m0.008s
  sys     0m41.467s

To address these issues, introduce the concept of hugetlb page demotion.
Demotion provides a means of 'in place' splitting of a hugetlb page to
pages of a smaller size.  This avoids freeing pages to buddy and then
trying to allocate from buddy.

Page demotion is controlled via sysfs files that reside in the per-hugetlb
page size and per node directories.

 - demote_size
        Target page size for demotion, a smaller huge page size. File
        can be written to chose a smaller huge page size if multiple are
        available.

 - demote
        Writable number of hugetlb pages to be demoted

To demote 50 GB huge pages, one would:

  cat .../hugepages-1048576kB/free_hugepages   /* optional, verify free pages */
  cat .../hugepages-1048576kB/demote_size      /* optional, verify target size */
  echo 50 > .../hugepages-1048576kB/demote

Only hugetlb pages which are free at the time of the request can be
demoted.  Demotion does not add to the complexity of surplus pages and
honors reserved huge pages.  Therefore, when a value is written to the
sysfs demote file, that value is only the maximum number of pages which
will be demoted.  It is possible fewer will actually be demoted.  The
recently introduced per-hstate mutex is used to synchronize demote
operations with other operations that modify hugetlb pools.

Real world use cases
--------------------
The above scenario describes a real world use case where hugetlb pages
are used to back VMs on x86.  Both issues of long allocation times and
not necessarily getting the expected number of smaller huge pages after
a free and allocate cycle have been experienced.  The occurrence of
these issues is dependent on other activity within the host and can not
be predicted.

This patch (of 5):

Two new sysfs files are added to demote hugtlb pages.  These files are
both per-hugetlb page size and per node.  Files are:

  demote_size - The size in Kb that pages are demoted to. (read-write)
  demote - The number of huge pages to demote. (write-only)

By default, demote_size is the next smallest huge page size.  Valid huge
page sizes less than huge page size may be written to this file.  When
huge pages are demoted, they are demoted to this size.

Writing a value to demote will result in an attempt to demote that
number of hugetlb pages to an appropriate number of demote_size pages.

NOTE: Demote interfaces are only provided for huge page sizes if there
is a smaller target demote huge page size.  For example, on x86 1GB huge
pages will have demote interfaces.  2MB huge pages will not have demote
interfaces.

This patch does not provide full demote functionality.  It only provides
the sysfs interfaces.

It also provides documentation for the new interfaces.

[mike.kravetz@oracle.com: n_mask initialization does not need to be protected by the mutex]
  Link: https://lkml.kernel.org/r/0530e4ef-2492-5186-f919-5db68edea654@oracle.com

Link: https://lkml.kernel.org/r/20211007181918.136982-2-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: David Hildenbrand <david@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: David Rientjes <rientjes@google.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Nghia Le <nghialm78@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/mm/hugetlbpage.rst |  30 +++++-
 include/linux/hugetlb.h                      |   1 +
 mm/hugetlb.c                                 | 155 ++++++++++++++++++++++++++-
 3 files changed, 183 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index 8abaeb144e44..bb90de3885d1 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -234,8 +234,12 @@ will exist, of the form::
 
 	hugepages-${size}kB
 
-Inside each of these directories, the same set of files will exist::
+Inside each of these directories, the set of files contained in ``/proc``
+will exist.  In addition, two additional interfaces for demoting huge
+pages may exist::
 
+        demote
+        demote_size
 	nr_hugepages
 	nr_hugepages_mempolicy
 	nr_overcommit_hugepages
@@ -243,7 +247,29 @@ Inside each of these directories, the same set of files will exist::
 	resv_hugepages
 	surplus_hugepages
 
-which function as described above for the default huge page-sized case.
+The demote interfaces provide the ability to split a huge page into
+smaller huge pages.  For example, the x86 architecture supports both
+1GB and 2MB huge pages sizes.  A 1GB huge page can be split into 512
+2MB huge pages.  Demote interfaces are not available for the smallest
+huge page size.  The demote interfaces are:
+
+demote_size
+        is the size of demoted pages.  When a page is demoted a corresponding
+        number of huge pages of demote_size will be created.  By default,
+        demote_size is set to the next smaller huge page size.  If there are
+        multiple smaller huge page sizes, demote_size can be set to any of
+        these smaller sizes.  Only huge page sizes less than the current huge
+        pages size are allowed.
+
+demote
+        is used to demote a number of huge pages.  A user with root privileges
+        can write to this file.  It may not be possible to demote the
+        requested number of huge pages.  To determine how many pages were
+        actually demoted, compare the value of nr_hugepages before and after
+        writing to the demote interface.  demote is a write only interface.
+
+The interfaces which are the same as in ``/proc`` (all except demote and
+demote_size) function as described above for the default huge page-sized case.
 
 .. _mem_policy_and_hp_alloc:
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3cbf60464398..2bddd6c38204 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -586,6 +586,7 @@ struct hstate {
 	int next_nid_to_alloc;
 	int next_nid_to_free;
 	unsigned int order;
+	unsigned int demote_order;
 	unsigned long mask;
 	unsigned long max_huge_pages;
 	unsigned long nr_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d394d9545c4e..1a18ff2f0001 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2986,7 +2986,7 @@ free:
 
 static void __init hugetlb_init_hstates(void)
 {
-	struct hstate *h;
+	struct hstate *h, *h2;
 
 	for_each_hstate(h) {
 		if (minimum_order > huge_page_order(h))
@@ -2995,6 +2995,22 @@ static void __init hugetlb_init_hstates(void)
 		/* oversize hugepages were init'ed in early boot */
 		if (!hstate_is_gigantic(h))
 			hugetlb_hstate_alloc_pages(h);
+
+		/*
+		 * Set demote order for each hstate.  Note that
+		 * h->demote_order is initially 0.
+		 * - We can not demote gigantic pages if runtime freeing
+		 *   is not supported, so skip this.
+		 */
+		if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
+			continue;
+		for_each_hstate(h2) {
+			if (h2 == h)
+				continue;
+			if (h2->order < h->order &&
+			    h2->order > h->demote_order)
+				h->demote_order = h2->order;
+		}
 	}
 	VM_BUG_ON(minimum_order == UINT_MAX);
 }
@@ -3235,9 +3251,31 @@ out:
 	return 0;
 }
 
+static int demote_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+	__must_hold(&hugetlb_lock)
+{
+	int rc = 0;
+
+	lockdep_assert_held(&hugetlb_lock);
+
+	/* We should never get here if no demote order */
+	if (!h->demote_order) {
+		pr_warn("HugeTLB: NULL demote order passed to demote_pool_huge_page.\n");
+		return -EINVAL;		/* internal error */
+	}
+
+	/*
+	 * TODO - demote fucntionality will be added in subsequent patch
+	 */
+	return rc;
+}
+
 #define HSTATE_ATTR_RO(_name) \
 	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 
+#define HSTATE_ATTR_WO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_WO(_name)
+
 #define HSTATE_ATTR(_name) \
 	static struct kobj_attribute _name##_attr = \
 		__ATTR(_name, 0644, _name##_show, _name##_store)
@@ -3433,6 +3471,105 @@ static ssize_t surplus_hugepages_show(struct kobject *kobj,
 }
 HSTATE_ATTR_RO(surplus_hugepages);
 
+static ssize_t demote_store(struct kobject *kobj,
+	       struct kobj_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long nr_demote;
+	unsigned long nr_available;
+	nodemask_t nodes_allowed, *n_mask;
+	struct hstate *h;
+	int err = 0;
+	int nid;
+
+	err = kstrtoul(buf, 10, &nr_demote);
+	if (err)
+		return err;
+	h = kobj_to_hstate(kobj, &nid);
+
+	if (nid != NUMA_NO_NODE) {
+		init_nodemask_of_node(&nodes_allowed, nid);
+		n_mask = &nodes_allowed;
+	} else {
+		n_mask = &node_states[N_MEMORY];
+	}
+
+	/* Synchronize with other sysfs operations modifying huge pages */
+	mutex_lock(&h->resize_lock);
+	spin_lock_irq(&hugetlb_lock);
+
+	while (nr_demote) {
+		/*
+		 * Check for available pages to demote each time thorough the
+		 * loop as demote_pool_huge_page will drop hugetlb_lock.
+		 *
+		 * NOTE: demote_pool_huge_page does not yet drop hugetlb_lock
+		 * but will when full demote functionality is added in a later
+		 * patch.
+		 */
+		if (nid != NUMA_NO_NODE)
+			nr_available = h->free_huge_pages_node[nid];
+		else
+			nr_available = h->free_huge_pages;
+		nr_available -= h->resv_huge_pages;
+		if (!nr_available)
+			break;
+
+		err = demote_pool_huge_page(h, n_mask);
+		if (err)
+			break;
+
+		nr_demote--;
+	}
+
+	spin_unlock_irq(&hugetlb_lock);
+	mutex_unlock(&h->resize_lock);
+
+	if (err)
+		return err;
+	return len;
+}
+HSTATE_ATTR_WO(demote);
+
+static ssize_t demote_size_show(struct kobject *kobj,
+					struct kobj_attribute *attr, char *buf)
+{
+	int nid;
+	struct hstate *h = kobj_to_hstate(kobj, &nid);
+	unsigned long demote_size = (PAGE_SIZE << h->demote_order) / SZ_1K;
+
+	return sysfs_emit(buf, "%lukB\n", demote_size);
+}
+
+static ssize_t demote_size_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct hstate *h, *demote_hstate;
+	unsigned long demote_size;
+	unsigned int demote_order;
+	int nid;
+
+	demote_size = (unsigned long)memparse(buf, NULL);
+
+	demote_hstate = size_to_hstate(demote_size);
+	if (!demote_hstate)
+		return -EINVAL;
+	demote_order = demote_hstate->order;
+
+	/* demote order must be smaller than hstate order */
+	h = kobj_to_hstate(kobj, &nid);
+	if (demote_order >= h->order)
+		return -EINVAL;
+
+	/* resize_lock synchronizes access to demote size and writes */
+	mutex_lock(&h->resize_lock);
+	h->demote_order = demote_order;
+	mutex_unlock(&h->resize_lock);
+
+	return count;
+}
+HSTATE_ATTR(demote_size);
+
 static struct attribute *hstate_attrs[] = {
 	&nr_hugepages_attr.attr,
 	&nr_overcommit_hugepages_attr.attr,
@@ -3449,6 +3586,16 @@ static const struct attribute_group hstate_attr_group = {
 	.attrs = hstate_attrs,
 };
 
+static struct attribute *hstate_demote_attrs[] = {
+	&demote_size_attr.attr,
+	&demote_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group hstate_demote_attr_group = {
+	.attrs = hstate_demote_attrs,
+};
+
 static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
 				    struct kobject **hstate_kobjs,
 				    const struct attribute_group *hstate_attr_group)
@@ -3466,6 +3613,12 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
 		hstate_kobjs[hi] = NULL;
 	}
 
+	if (h->demote_order) {
+		if (sysfs_create_group(hstate_kobjs[hi],
+					&hstate_demote_attr_group))
+			pr_warn("HugeTLB unable to create demote interfaces for %s\n", h->name);
+	}
+
 	return retval;
 }
 
-- 
cgit v1.2.3


From 9871e2ded6c1ff61a59988d7a0e975f012105d52 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Nov 2021 13:41:23 -0700
Subject: mm/cma: add cma_pages_valid to determine if pages are in CMA

Add new interface cma_pages_valid() which indicates if the specified
pages are part of a CMA region.  This interface will be used in a
subsequent patch by hugetlb code.

In order to keep the same amount of DEBUG information, a pr_debug() call
was added to cma_pages_valid().  In the case where the page passed to
cma_release is not in cma region, the debug message will be printed from
cma_pages_valid as opposed to cma_release.

Link: https://lkml.kernel.org/r/20211007181918.136982-3-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Nghia Le <nghialm78@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cma.h |  1 +
 mm/cma.c            | 24 ++++++++++++++++++++----
 2 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 53fd8c3cdbd0..bd801023504b 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -46,6 +46,7 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
+extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
 
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
diff --git a/mm/cma.c b/mm/cma.c
index 995e15480937..11152c3fb23c 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -524,6 +524,25 @@ out:
 	return page;
 }
 
+bool cma_pages_valid(struct cma *cma, const struct page *pages,
+		     unsigned long count)
+{
+	unsigned long pfn;
+
+	if (!cma || !pages)
+		return false;
+
+	pfn = page_to_pfn(pages);
+
+	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) {
+		pr_debug("%s(page %p, count %lu)\n", __func__,
+						(void *)pages, count);
+		return false;
+	}
+
+	return true;
+}
+
 /**
  * cma_release() - release allocated pages
  * @cma:   Contiguous memory region for which the allocation is performed.
@@ -539,16 +558,13 @@ bool cma_release(struct cma *cma, const struct page *pages,
 {
 	unsigned long pfn;
 
-	if (!cma || !pages)
+	if (!cma_pages_valid(cma, pages, count))
 		return false;
 
 	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
 
 	pfn = page_to_pfn(pages);
 
-	if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count)
-		return false;
-
 	VM_BUG_ON(pfn + count > cma->base_pfn + cma->count);
 
 	free_contig_range(pfn, count);
-- 
cgit v1.2.3


From bd3400ea173fb611cdf2030d03620185ff6c0b0e Mon Sep 17 00:00:00 2001
From: Liangcai Fan <liangcaifan19@gmail.com>
Date: Fri, 5 Nov 2021 13:41:36 -0700
Subject: mm: khugepaged: recalculate min_free_kbytes after stopping khugepaged

When initializing transparent huge pages, min_free_kbytes would be
calculated according to what khugepaged expected.

So when transparent huge pages get disabled, min_free_kbytes should be
recalculated instead of the higher value set by khugepaged.

Link: https://lkml.kernel.org/r/1633937809-16558-1-git-send-email-liangcaifan19@gmail.com
Signed-off-by: Liangcai Fan <liangcaifan19@gmail.com>
Signed-off-by: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/khugepaged.c    | 10 ++++++++--
 mm/page_alloc.c    |  7 ++++++-
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6e29deb1e0d4..7e37c726b9db 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2453,6 +2453,7 @@ extern void memmap_init_range(unsigned long, int, unsigned long,
 		unsigned long, unsigned long, enum meminit_context,
 		struct vmem_altmap *, int migratetype);
 extern void setup_per_zone_wmarks(void);
+extern void calculate_min_free_kbytes(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
 extern void __init mmap_init(void);
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8a8b3aa92937..629961966854 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2299,6 +2299,11 @@ static void set_recommended_min_free_kbytes(void)
 	int nr_zones = 0;
 	unsigned long recommended_min;
 
+	if (!khugepaged_enabled()) {
+		calculate_min_free_kbytes();
+		goto update_wmarks;
+	}
+
 	for_each_populated_zone(zone) {
 		/*
 		 * We don't need to worry about fragmentation of
@@ -2334,6 +2339,8 @@ static void set_recommended_min_free_kbytes(void)
 
 		min_free_kbytes = recommended_min;
 	}
+
+update_wmarks:
 	setup_per_zone_wmarks();
 }
 
@@ -2355,12 +2362,11 @@ int start_stop_khugepaged(void)
 
 		if (!list_empty(&khugepaged_scan.mm_head))
 			wake_up_interruptible(&khugepaged_wait);
-
-		set_recommended_min_free_kbytes();
 	} else if (khugepaged_thread) {
 		kthread_stop(khugepaged_thread);
 		khugepaged_thread = NULL;
 	}
+	set_recommended_min_free_kbytes();
 fail:
 	mutex_unlock(&khugepaged_mutex);
 	return err;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a7035467bf6d..09a0f1c5d5d2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8469,7 +8469,7 @@ void setup_per_zone_wmarks(void)
  * 8192MB:	11584k
  * 16384MB:	16384k
  */
-int __meminit init_per_zone_wmark_min(void)
+void calculate_min_free_kbytes(void)
 {
 	unsigned long lowmem_kbytes;
 	int new_min_free_kbytes;
@@ -8483,6 +8483,11 @@ int __meminit init_per_zone_wmark_min(void)
 		pr_warn("min_free_kbytes is not updated to %d because user defined value %d is preferred\n",
 				new_min_free_kbytes, user_min_free_kbytes);
 
+}
+
+int __meminit init_per_zone_wmark_min(void)
+{
+	calculate_min_free_kbytes();
 	setup_per_zone_wmarks();
 	refresh_zone_stat_thresholds();
 	setup_per_zone_lowmem_reserve();
-- 
cgit v1.2.3


From 550a7d60bd5e35a56942dba6d8a26752beb26c9f Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 5 Nov 2021 13:41:40 -0700
Subject: mm, hugepages: add mremap() support for hugepage backed vma

Support mremap() for hugepage backed vma segment by simply repositioning
page table entries.  The page table entries are repositioned to the new
virtual address on mremap().

Hugetlb mremap() support is of course generic; my motivating use case is
a library (hugepage_text), which reloads the ELF text of executables in
hugepages.  This significantly increases the execution performance of
said executables.

Restrict the mremap operation on hugepages to up to the size of the
original mapping as the underlying hugetlb reservation is not yet
capable of handling remapping to a larger size.

During the mremap() operation we detect pmd_share'd mappings and we
unshare those during the mremap().  On access and fault the sharing is
established again.

Link: https://lkml.kernel.org/r/20211013195825.3058275-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Ken Chen <kenchen@google.com>
Cc: Chris Kennelly <ckennelly@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Kirill Shutemov <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  19 +++++++++
 mm/hugetlb.c            | 111 +++++++++++++++++++++++++++++++++++++++++++++---
 mm/mremap.c             |  36 ++++++++++++++--
 3 files changed, 157 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2bddd6c38204..c0a20781b28e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -124,6 +124,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
+void clear_vma_resv_huge_pages(struct vm_area_struct *vma);
 int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
 int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
@@ -132,6 +133,10 @@ int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *,
 int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *,
 		loff_t *);
 
+int move_hugetlb_page_tables(struct vm_area_struct *vma,
+			     struct vm_area_struct *new_vma,
+			     unsigned long old_addr, unsigned long new_addr,
+			     unsigned long len);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			 struct page **, struct vm_area_struct **,
@@ -215,6 +220,10 @@ static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
 }
 
+static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+}
+
 static inline unsigned long hugetlb_total_pages(void)
 {
 	return 0;
@@ -262,6 +271,16 @@ static inline int copy_hugetlb_page_range(struct mm_struct *dst,
 	return 0;
 }
 
+static inline int move_hugetlb_page_tables(struct vm_area_struct *vma,
+					   struct vm_area_struct *new_vma,
+					   unsigned long old_addr,
+					   unsigned long new_addr,
+					   unsigned long len)
+{
+	BUG();
+	return 0;
+}
+
 static inline void hugetlb_report_meminfo(struct seq_file *m)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1835d548fecc..8028fb7677eb 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1014,6 +1014,35 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 		vma->vm_private_data = (void *)0;
 }
 
+/*
+ * Reset and decrement one ref on hugepage private reservation.
+ * Called with mm->mmap_sem writer semaphore held.
+ * This function should be only used by move_vma() and operate on
+ * same sized vma. It should never come here with last ref on the
+ * reservation.
+ */
+void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
+{
+	/*
+	 * Clear the old hugetlb private page reservation.
+	 * It has already been transferred to new_vma.
+	 *
+	 * During a mremap() operation of a hugetlb vma we call move_vma()
+	 * which copies vma into new_vma and unmaps vma. After the copy
+	 * operation both new_vma and vma share a reference to the resv_map
+	 * struct, and at that point vma is about to be unmapped. We don't
+	 * want to return the reservation to the pool at unmap of vma because
+	 * the reservation still lives on in new_vma, so simply decrement the
+	 * ref here and remove the resv_map reference from this vma.
+	 */
+	struct resv_map *reservations = vma_resv_map(vma);
+
+	if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+		kref_put(&reservations->refs, resv_map_release);
+
+	reset_vma_resv_huge_pages(vma);
+}
+
 /* Returns true if the VMA has associated reserve pages */
 static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
 {
@@ -4718,6 +4747,82 @@ again:
 	return ret;
 }
 
+static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
+			  unsigned long new_addr, pte_t *src_pte)
+{
+	struct hstate *h = hstate_vma(vma);
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *dst_pte, pte;
+	spinlock_t *src_ptl, *dst_ptl;
+
+	dst_pte = huge_pte_offset(mm, new_addr, huge_page_size(h));
+	dst_ptl = huge_pte_lock(h, mm, dst_pte);
+	src_ptl = huge_pte_lockptr(h, mm, src_pte);
+
+	/*
+	 * We don't have to worry about the ordering of src and dst ptlocks
+	 * because exclusive mmap_sem (or the i_mmap_lock) prevents deadlock.
+	 */
+	if (src_ptl != dst_ptl)
+		spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+
+	pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
+	set_huge_pte_at(mm, new_addr, dst_pte, pte);
+
+	if (src_ptl != dst_ptl)
+		spin_unlock(src_ptl);
+	spin_unlock(dst_ptl);
+}
+
+int move_hugetlb_page_tables(struct vm_area_struct *vma,
+			     struct vm_area_struct *new_vma,
+			     unsigned long old_addr, unsigned long new_addr,
+			     unsigned long len)
+{
+	struct hstate *h = hstate_vma(vma);
+	struct address_space *mapping = vma->vm_file->f_mapping;
+	unsigned long sz = huge_page_size(h);
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long old_end = old_addr + len;
+	unsigned long old_addr_copy;
+	pte_t *src_pte, *dst_pte;
+	struct mmu_notifier_range range;
+
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, old_addr,
+				old_end);
+	adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+	mmu_notifier_invalidate_range_start(&range);
+	/* Prevent race with file truncation */
+	i_mmap_lock_write(mapping);
+	for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
+		src_pte = huge_pte_offset(mm, old_addr, sz);
+		if (!src_pte)
+			continue;
+		if (huge_pte_none(huge_ptep_get(src_pte)))
+			continue;
+
+		/* old_addr arg to huge_pmd_unshare() is a pointer and so the
+		 * arg may be modified. Pass a copy instead to preserve the
+		 * value in old_addr.
+		 */
+		old_addr_copy = old_addr;
+
+		if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte))
+			continue;
+
+		dst_pte = huge_pte_alloc(mm, new_vma, new_addr, sz);
+		if (!dst_pte)
+			break;
+
+		move_huge_pte(vma, old_addr, new_addr, src_pte);
+	}
+	i_mmap_unlock_write(mapping);
+	flush_tlb_range(vma, old_end - len, old_end);
+	mmu_notifier_invalidate_range_end(&range);
+
+	return len + old_addr - old_end;
+}
+
 static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end,
 				   struct page *ref_page)
@@ -6257,12 +6362,6 @@ void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
  * sharing is possible.  For hugetlbfs, this prevents removal of any page
  * table entries associated with the address space.  This is important as we
  * are setting up sharing based on existing page table entries (mappings).
- *
- * NOTE: This routine is only called from huge_pte_alloc.  Some callers of
- * huge_pte_alloc know that sharing is not possible and do not take
- * i_mmap_rwsem as a performance optimization.  This is handled by the
- * if !vma_shareable check at the beginning of the routine. i_mmap_rwsem is
- * only required for subsequent processing.
  */
 pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long addr, pud_t *pud)
diff --git a/mm/mremap.c b/mm/mremap.c
index c0b6c41b7b78..002eec83e91e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -489,6 +489,10 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 	old_end = old_addr + len;
 	flush_cache_range(vma, old_addr, old_end);
 
+	if (is_vm_hugetlb_page(vma))
+		return move_hugetlb_page_tables(vma, new_vma, old_addr,
+						new_addr, len);
+
 	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
 				old_addr, old_end);
 	mmu_notifier_invalidate_range_start(&range);
@@ -646,6 +650,10 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		mremap_userfaultfd_prep(new_vma, uf);
 	}
 
+	if (is_vm_hugetlb_page(vma)) {
+		clear_vma_resv_huge_pages(vma);
+	}
+
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT && !(flags & MREMAP_DONTUNMAP)) {
 		vma->vm_flags &= ~VM_ACCOUNT;
@@ -739,9 +747,6 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
 			(vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)))
 		return ERR_PTR(-EINVAL);
 
-	if (is_vm_hugetlb_page(vma))
-		return ERR_PTR(-EINVAL);
-
 	/* We can't remap across vm area boundaries */
 	if (old_len > vma->vm_end - addr)
 		return ERR_PTR(-EFAULT);
@@ -937,6 +942,31 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 
 	if (mmap_write_lock_killable(current->mm))
 		return -EINTR;
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start > addr) {
+		ret = EFAULT;
+		goto out;
+	}
+
+	if (is_vm_hugetlb_page(vma)) {
+		struct hstate *h __maybe_unused = hstate_vma(vma);
+
+		old_len = ALIGN(old_len, huge_page_size(h));
+		new_len = ALIGN(new_len, huge_page_size(h));
+
+		/* addrs must be huge page aligned */
+		if (addr & ~huge_page_mask(h))
+			goto out;
+		if (new_addr & ~huge_page_mask(h))
+			goto out;
+
+		/*
+		 * Don't allow remap expansion, because the underlying hugetlb
+		 * reservation is not yet capable to handle split reservation.
+		 */
+		if (new_len > old_len)
+			goto out;
+	}
 
 	if (flags & (MREMAP_FIXED | MREMAP_DONTUNMAP)) {
 		ret = mremap_to(addr, old_len, new_addr, new_len,
-- 
cgit v1.2.3


From 8cd7c588decf470bf7e14f2be93b709f839a965e Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 5 Nov 2021 13:42:25 -0700
Subject: mm/vmscan: throttle reclaim until some writeback completes if
 congested

Patch series "Remove dependency on congestion_wait in mm/", v5.

This series that removes all calls to congestion_wait in mm/ and deletes
wait_iff_congested.  It's not a clever implementation but
congestion_wait has been broken for a long time [1].

Even if congestion throttling worked, it was never a great idea.  While
excessive dirty/writeback pages at the tail of the LRU is one
possibility that reclaim may be slow, there is also the problem of too
many pages being isolated and reclaim failing for other reasons
(elevated references, too many pages isolated, excessive LRU contention
etc).

This series replaces the "congestion" throttling with 3 different types.

 - If there are too many dirty/writeback pages, sleep until a timeout or
   enough pages get cleaned

 - If too many pages are isolated, sleep until enough isolated pages are
   either reclaimed or put back on the LRU

 - If no progress is being made, direct reclaim tasks sleep until
   another task makes progress with acceptable efficiency.

This was initially tested with a mix of workloads that used to trigger
corner cases that no longer work.  A new test case was created called
"stutterp" (pagereclaim-stutterp-noreaders in mmtests) using a freshly
created XFS filesystem.  Note that it may be necessary to increase the
timeout of ssh if executing remotely as ssh itself can get throttled and
the connection may timeout.

stutterp varies the number of "worker" processes from 4 up to NR_CPUS*4
to check the impact as the number of direct reclaimers increase.  It has
four types of worker.

 - One "anon latency" worker creates small mappings with mmap() and
   times how long it takes to fault the mapping reading it 4K at a time

 - X file writers which is fio randomly writing X files where the total
   size of the files add up to the allowed dirty_ratio. fio is allowed
   to run for a warmup period to allow some file-backed pages to
   accumulate. The duration of the warmup is based on the best-case
   linear write speed of the storage.

 - Y file readers which is fio randomly reading small files

 - Z anon memory hogs which continually map (100-dirty_ratio)% of memory

 - Total estimated WSS = (100+dirty_ration) percentage of memory

X+Y+Z+1 == NR_WORKERS varying from 4 up to NR_CPUS*4

The intent is to maximise the total WSS with a mix of file and anon
memory where some anonymous memory must be swapped and there is a high
likelihood of dirty/writeback pages reaching the end of the LRU.

The test can be configured to have no background readers to stress
dirty/writeback pages.  The results below are based on having zero
readers.

The short summary of the results is that the series works and stalls
until some event occurs but the timeouts may need adjustment.

The test results are not broken down by patch as the series should be
treated as one block that replaces a broken throttling mechanism with a
working one.

Finally, three machines were tested but I'm reporting the worst set of
results.  The other two machines had much better latencies for example.

First the results of the "anon latency" latency

  stutterp
                                5.15.0-rc1             5.15.0-rc1
                                   vanilla mm-reclaimcongest-v5r4
  Amean     mmap-4      31.4003 (   0.00%)   2661.0198 (-8374.52%)
  Amean     mmap-7      38.1641 (   0.00%)    149.2891 (-291.18%)
  Amean     mmap-12     60.0981 (   0.00%)    187.8105 (-212.51%)
  Amean     mmap-21    161.2699 (   0.00%)    213.9107 ( -32.64%)
  Amean     mmap-30    174.5589 (   0.00%)    377.7548 (-116.41%)
  Amean     mmap-48   8106.8160 (   0.00%)   1070.5616 (  86.79%)
  Stddev    mmap-4      41.3455 (   0.00%)  27573.9676 (-66591.66%)
  Stddev    mmap-7      53.5556 (   0.00%)   4608.5860 (-8505.23%)
  Stddev    mmap-12    171.3897 (   0.00%)   5559.4542 (-3143.75%)
  Stddev    mmap-21   1506.6752 (   0.00%)   5746.2507 (-281.39%)
  Stddev    mmap-30    557.5806 (   0.00%)   7678.1624 (-1277.05%)
  Stddev    mmap-48  61681.5718 (   0.00%)  14507.2830 (  76.48%)
  Max-90    mmap-4      31.4243 (   0.00%)     83.1457 (-164.59%)
  Max-90    mmap-7      41.0410 (   0.00%)     41.0720 (  -0.08%)
  Max-90    mmap-12     66.5255 (   0.00%)     53.9073 (  18.97%)
  Max-90    mmap-21    146.7479 (   0.00%)    105.9540 (  27.80%)
  Max-90    mmap-30    193.9513 (   0.00%)     64.3067 (  66.84%)
  Max-90    mmap-48    277.9137 (   0.00%)    591.0594 (-112.68%)
  Max       mmap-4    1913.8009 (   0.00%) 299623.9695 (-15555.96%)
  Max       mmap-7    2423.9665 (   0.00%) 204453.1708 (-8334.65%)
  Max       mmap-12   6845.6573 (   0.00%) 221090.3366 (-3129.64%)
  Max       mmap-21  56278.6508 (   0.00%) 213877.3496 (-280.03%)
  Max       mmap-30  19716.2990 (   0.00%) 216287.6229 (-997.00%)
  Max       mmap-48 477923.9400 (   0.00%) 245414.8238 (  48.65%)

For most thread counts, the time to mmap() is unfortunately increased.
In earlier versions of the series, this was lower but a large number of
throttling events were reaching their timeout increasing the amount of
inefficient scanning of the LRU.  There is no prioritisation of reclaim
tasks making progress based on each tasks rate of page allocation versus
progress of reclaim.  The variance is also impacted for high worker
counts but in all cases, the differences in latency are not
statistically significant due to very large maximum outliers.  Max-90
shows that 90% of the stalls are comparable but the Max results show the
massive outliers which are increased to to stalling.

It is expected that this will be very machine dependant.  Due to the
test design, reclaim is difficult so allocations stall and there are
variances depending on whether THPs can be allocated or not.  The amount
of memory will affect exactly how bad the corner cases are and how often
they trigger.  The warmup period calculation is not ideal as it's based
on linear writes where as fio is randomly writing multiple files from
multiple tasks so the start state of the test is variable.  For example,
these are the latencies on a single-socket machine that had more memory

  Amean     mmap-4      42.2287 (   0.00%)     49.6838 * -17.65%*
  Amean     mmap-7     216.4326 (   0.00%)     47.4451 *  78.08%*
  Amean     mmap-12   2412.0588 (   0.00%)     51.7497 (  97.85%)
  Amean     mmap-21   5546.2548 (   0.00%)     51.8862 (  99.06%)
  Amean     mmap-30   1085.3121 (   0.00%)     72.1004 (  93.36%)

The overall system CPU usage and elapsed time is as follows

                    5.15.0-rc3  5.15.0-rc3
                       vanilla mm-reclaimcongest-v5r4
  Duration User        6989.03      983.42
  Duration System      7308.12      799.68
  Duration Elapsed     2277.67     2092.98

The patches reduce system CPU usage by 89% as the vanilla kernel is rarely
stalling.

The high-level /proc/vmstats show

                                       5.15.0-rc1     5.15.0-rc1
                                          vanilla mm-reclaimcongest-v5r2
  Ops Direct pages scanned          1056608451.00   503594991.00
  Ops Kswapd pages scanned           109795048.00   147289810.00
  Ops Kswapd pages reclaimed          63269243.00    31036005.00
  Ops Direct pages reclaimed          10803973.00     6328887.00
  Ops Kswapd efficiency %                   57.62          21.07
  Ops Kswapd velocity                    48204.98       57572.86
  Ops Direct efficiency %                    1.02           1.26
  Ops Direct velocity                   463898.83      196845.97

Kswapd scanned less pages but the detailed pattern is different.  The
vanilla kernel scans slowly over time where as the patches exhibits
burst patterns of scan activity.  Direct reclaim scanning is reduced by
52% due to stalling.

The pattern for stealing pages is also slightly different.  Both kernels
exhibit spikes but the vanilla kernel when reclaiming shows pages being
reclaimed over a period of time where as the patches tend to reclaim in
spikes.  The difference is that vanilla is not throttling and instead
scanning constantly finding some pages over time where as the patched
kernel throttles and reclaims in spikes.

  Ops Percentage direct scans               90.59          77.37

For direct reclaim, vanilla scanned 90.59% of pages where as with the
patches, 77.37% were direct reclaim due to throttling

  Ops Page writes by reclaim           2613590.00     1687131.00

Page writes from reclaim context are reduced.

  Ops Page writes anon                 2932752.00     1917048.00

And there is less swapping.

  Ops Page reclaim immediate         996248528.00   107664764.00

The number of pages encountered at the tail of the LRU tagged for
immediate reclaim but still dirty/writeback is reduced by 89%.

  Ops Slabs scanned                     164284.00      153608.00

Slab scan activity is similar.

ftrace was used to gather stall activity

  Vanilla
  -------
      1 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=16000
      2 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=12000
      8 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=8000
     29 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=4000
  82394 writeback_wait_iff_congested: usec_timeout=100000 usec_delayed=0

The fast majority of wait_iff_congested calls do not stall at all.  What
is likely happening is that cond_resched() reschedules the task for a
short period when the BDI is not registering congestion (which it never
will in this test setup).

      1 writeback_congestion_wait: usec_timeout=100000 usec_delayed=120000
      2 writeback_congestion_wait: usec_timeout=100000 usec_delayed=132000
      4 writeback_congestion_wait: usec_timeout=100000 usec_delayed=112000
    380 writeback_congestion_wait: usec_timeout=100000 usec_delayed=108000
    778 writeback_congestion_wait: usec_timeout=100000 usec_delayed=104000

congestion_wait if called always exceeds the timeout as there is no
trigger to wake it up.

Bottom line: Vanilla will throttle but it's not effective.

Patch series
------------

Kswapd throttle activity was always due to scanning pages tagged for
immediate reclaim at the tail of the LRU

      1 usec_timeout=100000 usect_delayed=72000 reason=VMSCAN_THROTTLE_WRITEBACK
      4 usec_timeout=100000 usect_delayed=20000 reason=VMSCAN_THROTTLE_WRITEBACK
      5 usec_timeout=100000 usect_delayed=12000 reason=VMSCAN_THROTTLE_WRITEBACK
      6 usec_timeout=100000 usect_delayed=16000 reason=VMSCAN_THROTTLE_WRITEBACK
     11 usec_timeout=100000 usect_delayed=100000 reason=VMSCAN_THROTTLE_WRITEBACK
     11 usec_timeout=100000 usect_delayed=8000 reason=VMSCAN_THROTTLE_WRITEBACK
     94 usec_timeout=100000 usect_delayed=0 reason=VMSCAN_THROTTLE_WRITEBACK
    112 usec_timeout=100000 usect_delayed=4000 reason=VMSCAN_THROTTLE_WRITEBACK

The majority of events did not stall or stalled for a short period.
Roughly 16% of stalls reached the timeout before expiry.  For direct
reclaim, the number of times stalled for each reason were

   6624 reason=VMSCAN_THROTTLE_ISOLATED
  93246 reason=VMSCAN_THROTTLE_NOPROGRESS
  96934 reason=VMSCAN_THROTTLE_WRITEBACK

The most common reason to stall was due to excessive pages tagged for
immediate reclaim at the tail of the LRU followed by a failure to make
forward.  A relatively small number were due to too many pages isolated
from the LRU by parallel threads

For VMSCAN_THROTTLE_ISOLATED, the breakdown of delays was

      9 usec_timeout=20000 usect_delayed=4000 reason=VMSCAN_THROTTLE_ISOLATED
     12 usec_timeout=20000 usect_delayed=16000 reason=VMSCAN_THROTTLE_ISOLATED
     83 usec_timeout=20000 usect_delayed=20000 reason=VMSCAN_THROTTLE_ISOLATED
   6520 usec_timeout=20000 usect_delayed=0 reason=VMSCAN_THROTTLE_ISOLATED

Most did not stall at all.  A small number reached the timeout.

For VMSCAN_THROTTLE_NOPROGRESS, the breakdown of stalls were all over
the map

      1 usec_timeout=500000 usect_delayed=324000 reason=VMSCAN_THROTTLE_NOPROGRESS
      1 usec_timeout=500000 usect_delayed=332000 reason=VMSCAN_THROTTLE_NOPROGRESS
      1 usec_timeout=500000 usect_delayed=348000 reason=VMSCAN_THROTTLE_NOPROGRESS
      1 usec_timeout=500000 usect_delayed=360000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=228000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=260000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=340000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=364000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=372000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=428000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=460000 reason=VMSCAN_THROTTLE_NOPROGRESS
      2 usec_timeout=500000 usect_delayed=464000 reason=VMSCAN_THROTTLE_NOPROGRESS
      3 usec_timeout=500000 usect_delayed=244000 reason=VMSCAN_THROTTLE_NOPROGRESS
      3 usec_timeout=500000 usect_delayed=252000 reason=VMSCAN_THROTTLE_NOPROGRESS
      3 usec_timeout=500000 usect_delayed=272000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=188000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=268000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=328000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=380000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=392000 reason=VMSCAN_THROTTLE_NOPROGRESS
      4 usec_timeout=500000 usect_delayed=432000 reason=VMSCAN_THROTTLE_NOPROGRESS
      5 usec_timeout=500000 usect_delayed=204000 reason=VMSCAN_THROTTLE_NOPROGRESS
      5 usec_timeout=500000 usect_delayed=220000 reason=VMSCAN_THROTTLE_NOPROGRESS
      5 usec_timeout=500000 usect_delayed=412000 reason=VMSCAN_THROTTLE_NOPROGRESS
      5 usec_timeout=500000 usect_delayed=436000 reason=VMSCAN_THROTTLE_NOPROGRESS
      6 usec_timeout=500000 usect_delayed=488000 reason=VMSCAN_THROTTLE_NOPROGRESS
      7 usec_timeout=500000 usect_delayed=212000 reason=VMSCAN_THROTTLE_NOPROGRESS
      7 usec_timeout=500000 usect_delayed=300000 reason=VMSCAN_THROTTLE_NOPROGRESS
      7 usec_timeout=500000 usect_delayed=316000 reason=VMSCAN_THROTTLE_NOPROGRESS
      7 usec_timeout=500000 usect_delayed=472000 reason=VMSCAN_THROTTLE_NOPROGRESS
      8 usec_timeout=500000 usect_delayed=248000 reason=VMSCAN_THROTTLE_NOPROGRESS
      8 usec_timeout=500000 usect_delayed=356000 reason=VMSCAN_THROTTLE_NOPROGRESS
      8 usec_timeout=500000 usect_delayed=456000 reason=VMSCAN_THROTTLE_NOPROGRESS
      9 usec_timeout=500000 usect_delayed=124000 reason=VMSCAN_THROTTLE_NOPROGRESS
      9 usec_timeout=500000 usect_delayed=376000 reason=VMSCAN_THROTTLE_NOPROGRESS
      9 usec_timeout=500000 usect_delayed=484000 reason=VMSCAN_THROTTLE_NOPROGRESS
     10 usec_timeout=500000 usect_delayed=172000 reason=VMSCAN_THROTTLE_NOPROGRESS
     10 usec_timeout=500000 usect_delayed=420000 reason=VMSCAN_THROTTLE_NOPROGRESS
     10 usec_timeout=500000 usect_delayed=452000 reason=VMSCAN_THROTTLE_NOPROGRESS
     11 usec_timeout=500000 usect_delayed=256000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=112000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=116000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=144000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=152000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=264000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=384000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=424000 reason=VMSCAN_THROTTLE_NOPROGRESS
     12 usec_timeout=500000 usect_delayed=492000 reason=VMSCAN_THROTTLE_NOPROGRESS
     13 usec_timeout=500000 usect_delayed=184000 reason=VMSCAN_THROTTLE_NOPROGRESS
     13 usec_timeout=500000 usect_delayed=444000 reason=VMSCAN_THROTTLE_NOPROGRESS
     14 usec_timeout=500000 usect_delayed=308000 reason=VMSCAN_THROTTLE_NOPROGRESS
     14 usec_timeout=500000 usect_delayed=440000 reason=VMSCAN_THROTTLE_NOPROGRESS
     14 usec_timeout=500000 usect_delayed=476000 reason=VMSCAN_THROTTLE_NOPROGRESS
     16 usec_timeout=500000 usect_delayed=140000 reason=VMSCAN_THROTTLE_NOPROGRESS
     17 usec_timeout=500000 usect_delayed=232000 reason=VMSCAN_THROTTLE_NOPROGRESS
     17 usec_timeout=500000 usect_delayed=240000 reason=VMSCAN_THROTTLE_NOPROGRESS
     17 usec_timeout=500000 usect_delayed=280000 reason=VMSCAN_THROTTLE_NOPROGRESS
     18 usec_timeout=500000 usect_delayed=404000 reason=VMSCAN_THROTTLE_NOPROGRESS
     20 usec_timeout=500000 usect_delayed=148000 reason=VMSCAN_THROTTLE_NOPROGRESS
     20 usec_timeout=500000 usect_delayed=216000 reason=VMSCAN_THROTTLE_NOPROGRESS
     20 usec_timeout=500000 usect_delayed=468000 reason=VMSCAN_THROTTLE_NOPROGRESS
     21 usec_timeout=500000 usect_delayed=448000 reason=VMSCAN_THROTTLE_NOPROGRESS
     23 usec_timeout=500000 usect_delayed=168000 reason=VMSCAN_THROTTLE_NOPROGRESS
     23 usec_timeout=500000 usect_delayed=296000 reason=VMSCAN_THROTTLE_NOPROGRESS
     25 usec_timeout=500000 usect_delayed=132000 reason=VMSCAN_THROTTLE_NOPROGRESS
     25 usec_timeout=500000 usect_delayed=352000 reason=VMSCAN_THROTTLE_NOPROGRESS
     26 usec_timeout=500000 usect_delayed=180000 reason=VMSCAN_THROTTLE_NOPROGRESS
     27 usec_timeout=500000 usect_delayed=284000 reason=VMSCAN_THROTTLE_NOPROGRESS
     28 usec_timeout=500000 usect_delayed=164000 reason=VMSCAN_THROTTLE_NOPROGRESS
     29 usec_timeout=500000 usect_delayed=136000 reason=VMSCAN_THROTTLE_NOPROGRESS
     30 usec_timeout=500000 usect_delayed=200000 reason=VMSCAN_THROTTLE_NOPROGRESS
     30 usec_timeout=500000 usect_delayed=400000 reason=VMSCAN_THROTTLE_NOPROGRESS
     31 usec_timeout=500000 usect_delayed=196000 reason=VMSCAN_THROTTLE_NOPROGRESS
     32 usec_timeout=500000 usect_delayed=156000 reason=VMSCAN_THROTTLE_NOPROGRESS
     33 usec_timeout=500000 usect_delayed=224000 reason=VMSCAN_THROTTLE_NOPROGRESS
     35 usec_timeout=500000 usect_delayed=128000 reason=VMSCAN_THROTTLE_NOPROGRESS
     35 usec_timeout=500000 usect_delayed=176000 reason=VMSCAN_THROTTLE_NOPROGRESS
     36 usec_timeout=500000 usect_delayed=368000 reason=VMSCAN_THROTTLE_NOPROGRESS
     36 usec_timeout=500000 usect_delayed=496000 reason=VMSCAN_THROTTLE_NOPROGRESS
     37 usec_timeout=500000 usect_delayed=312000 reason=VMSCAN_THROTTLE_NOPROGRESS
     38 usec_timeout=500000 usect_delayed=304000 reason=VMSCAN_THROTTLE_NOPROGRESS
     40 usec_timeout=500000 usect_delayed=288000 reason=VMSCAN_THROTTLE_NOPROGRESS
     43 usec_timeout=500000 usect_delayed=408000 reason=VMSCAN_THROTTLE_NOPROGRESS
     55 usec_timeout=500000 usect_delayed=416000 reason=VMSCAN_THROTTLE_NOPROGRESS
     56 usec_timeout=500000 usect_delayed=76000 reason=VMSCAN_THROTTLE_NOPROGRESS
     58 usec_timeout=500000 usect_delayed=120000 reason=VMSCAN_THROTTLE_NOPROGRESS
     59 usec_timeout=500000 usect_delayed=208000 reason=VMSCAN_THROTTLE_NOPROGRESS
     61 usec_timeout=500000 usect_delayed=68000 reason=VMSCAN_THROTTLE_NOPROGRESS
     71 usec_timeout=500000 usect_delayed=192000 reason=VMSCAN_THROTTLE_NOPROGRESS
     71 usec_timeout=500000 usect_delayed=480000 reason=VMSCAN_THROTTLE_NOPROGRESS
     79 usec_timeout=500000 usect_delayed=60000 reason=VMSCAN_THROTTLE_NOPROGRESS
     82 usec_timeout=500000 usect_delayed=320000 reason=VMSCAN_THROTTLE_NOPROGRESS
     82 usec_timeout=500000 usect_delayed=92000 reason=VMSCAN_THROTTLE_NOPROGRESS
     85 usec_timeout=500000 usect_delayed=64000 reason=VMSCAN_THROTTLE_NOPROGRESS
     85 usec_timeout=500000 usect_delayed=80000 reason=VMSCAN_THROTTLE_NOPROGRESS
     88 usec_timeout=500000 usect_delayed=84000 reason=VMSCAN_THROTTLE_NOPROGRESS
     90 usec_timeout=500000 usect_delayed=160000 reason=VMSCAN_THROTTLE_NOPROGRESS
     90 usec_timeout=500000 usect_delayed=292000 reason=VMSCAN_THROTTLE_NOPROGRESS
     94 usec_timeout=500000 usect_delayed=56000 reason=VMSCAN_THROTTLE_NOPROGRESS
    118 usec_timeout=500000 usect_delayed=88000 reason=VMSCAN_THROTTLE_NOPROGRESS
    119 usec_timeout=500000 usect_delayed=72000 reason=VMSCAN_THROTTLE_NOPROGRESS
    126 usec_timeout=500000 usect_delayed=108000 reason=VMSCAN_THROTTLE_NOPROGRESS
    146 usec_timeout=500000 usect_delayed=52000 reason=VMSCAN_THROTTLE_NOPROGRESS
    148 usec_timeout=500000 usect_delayed=36000 reason=VMSCAN_THROTTLE_NOPROGRESS
    148 usec_timeout=500000 usect_delayed=48000 reason=VMSCAN_THROTTLE_NOPROGRESS
    159 usec_timeout=500000 usect_delayed=28000 reason=VMSCAN_THROTTLE_NOPROGRESS
    178 usec_timeout=500000 usect_delayed=44000 reason=VMSCAN_THROTTLE_NOPROGRESS
    183 usec_timeout=500000 usect_delayed=40000 reason=VMSCAN_THROTTLE_NOPROGRESS
    237 usec_timeout=500000 usect_delayed=100000 reason=VMSCAN_THROTTLE_NOPROGRESS
    266 usec_timeout=500000 usect_delayed=32000 reason=VMSCAN_THROTTLE_NOPROGRESS
    313 usec_timeout=500000 usect_delayed=24000 reason=VMSCAN_THROTTLE_NOPROGRESS
    347 usec_timeout=500000 usect_delayed=96000 reason=VMSCAN_THROTTLE_NOPROGRESS
    470 usec_timeout=500000 usect_delayed=20000 reason=VMSCAN_THROTTLE_NOPROGRESS
    559 usec_timeout=500000 usect_delayed=16000 reason=VMSCAN_THROTTLE_NOPROGRESS
    964 usec_timeout=500000 usect_delayed=12000 reason=VMSCAN_THROTTLE_NOPROGRESS
   2001 usec_timeout=500000 usect_delayed=104000 reason=VMSCAN_THROTTLE_NOPROGRESS
   2447 usec_timeout=500000 usect_delayed=8000 reason=VMSCAN_THROTTLE_NOPROGRESS
   7888 usec_timeout=500000 usect_delayed=4000 reason=VMSCAN_THROTTLE_NOPROGRESS
  22727 usec_timeout=500000 usect_delayed=0 reason=VMSCAN_THROTTLE_NOPROGRESS
  51305 usec_timeout=500000 usect_delayed=500000 reason=VMSCAN_THROTTLE_NOPROGRESS

The full timeout is often hit but a large number also do not stall at
all.  The remainder slept a little allowing other reclaim tasks to make
progress.

While this timeout could be further increased, it could also negatively
impact worst-case behaviour when there is no prioritisation of what task
should make progress.

For VMSCAN_THROTTLE_WRITEBACK, the breakdown was

      1 usec_timeout=100000 usect_delayed=44000 reason=VMSCAN_THROTTLE_WRITEBACK
      2 usec_timeout=100000 usect_delayed=76000 reason=VMSCAN_THROTTLE_WRITEBACK
      3 usec_timeout=100000 usect_delayed=80000 reason=VMSCAN_THROTTLE_WRITEBACK
      5 usec_timeout=100000 usect_delayed=48000 reason=VMSCAN_THROTTLE_WRITEBACK
      5 usec_timeout=100000 usect_delayed=84000 reason=VMSCAN_THROTTLE_WRITEBACK
      6 usec_timeout=100000 usect_delayed=72000 reason=VMSCAN_THROTTLE_WRITEBACK
      7 usec_timeout=100000 usect_delayed=88000 reason=VMSCAN_THROTTLE_WRITEBACK
     11 usec_timeout=100000 usect_delayed=56000 reason=VMSCAN_THROTTLE_WRITEBACK
     12 usec_timeout=100000 usect_delayed=64000 reason=VMSCAN_THROTTLE_WRITEBACK
     16 usec_timeout=100000 usect_delayed=92000 reason=VMSCAN_THROTTLE_WRITEBACK
     24 usec_timeout=100000 usect_delayed=68000 reason=VMSCAN_THROTTLE_WRITEBACK
     28 usec_timeout=100000 usect_delayed=32000 reason=VMSCAN_THROTTLE_WRITEBACK
     30 usec_timeout=100000 usect_delayed=60000 reason=VMSCAN_THROTTLE_WRITEBACK
     30 usec_timeout=100000 usect_delayed=96000 reason=VMSCAN_THROTTLE_WRITEBACK
     32 usec_timeout=100000 usect_delayed=52000 reason=VMSCAN_THROTTLE_WRITEBACK
     42 usec_timeout=100000 usect_delayed=40000 reason=VMSCAN_THROTTLE_WRITEBACK
     77 usec_timeout=100000 usect_delayed=28000 reason=VMSCAN_THROTTLE_WRITEBACK
     99 usec_timeout=100000 usect_delayed=36000 reason=VMSCAN_THROTTLE_WRITEBACK
    137 usec_timeout=100000 usect_delayed=24000 reason=VMSCAN_THROTTLE_WRITEBACK
    190 usec_timeout=100000 usect_delayed=20000 reason=VMSCAN_THROTTLE_WRITEBACK
    339 usec_timeout=100000 usect_delayed=16000 reason=VMSCAN_THROTTLE_WRITEBACK
    518 usec_timeout=100000 usect_delayed=12000 reason=VMSCAN_THROTTLE_WRITEBACK
    852 usec_timeout=100000 usect_delayed=8000 reason=VMSCAN_THROTTLE_WRITEBACK
   3359 usec_timeout=100000 usect_delayed=4000 reason=VMSCAN_THROTTLE_WRITEBACK
   7147 usec_timeout=100000 usect_delayed=0 reason=VMSCAN_THROTTLE_WRITEBACK
  83962 usec_timeout=100000 usect_delayed=100000 reason=VMSCAN_THROTTLE_WRITEBACK

The majority hit the timeout in direct reclaim context although a
sizable number did not stall at all.  This is very different to kswapd
where only a tiny percentage of stalls due to writeback reached the
timeout.

Bottom line, the throttling appears to work and the wakeup events may
limit worst case stalls.  There might be some grounds for adjusting
timeouts but it's likely futile as the worst-case scenarios depend on
the workload, memory size and the speed of the storage.  A better
approach to improve the series further would be to prioritise tasks
based on their rate of allocation with the caveat that it may be very
expensive to track.

This patch (of 5):

Page reclaim throttles on wait_iff_congested under the following
conditions:

 - kswapd is encountering pages under writeback and marked for immediate
   reclaim implying that pages are cycling through the LRU faster than
   pages can be cleaned.

 - Direct reclaim will stall if all dirty pages are backed by congested
   inodes.

wait_iff_congested is almost completely broken with few exceptions.
This patch adds a new node-based workqueue and tracks the number of
throttled tasks and pages written back since throttling started.  If
enough pages belonging to the node are written back then the throttled
tasks will wake early.  If not, the throttled tasks sleeps until the
timeout expires.

[neilb@suse.de: Uninterruptible sleep and simpler wakeups]
[hdanton@sina.com: Avoid race when reclaim starts]
[vbabka@suse.cz: vmstat irq-safe api, clarifications]

Link: https://lore.kernel.org/linux-mm/45d8b7a6-8548-65f5-cccf-9f451d4ae3d4@kernel.dk/ [1]
Link: https://lkml.kernel.org/r/20211022144651.19914-1-mgorman@techsingularity.net
Link: https://lkml.kernel.org/r/20211022144651.19914-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: NeilBrown <neilb@suse.de>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h      |  1 -
 include/linux/mmzone.h           | 13 +++++++
 include/trace/events/vmscan.h    | 34 +++++++++++++++++
 include/trace/events/writeback.h |  7 ----
 mm/backing-dev.c                 | 48 -----------------------
 mm/filemap.c                     |  1 +
 mm/internal.h                    | 11 ++++++
 mm/page_alloc.c                  |  5 +++
 mm/vmscan.c                      | 82 ++++++++++++++++++++++++++++++++++------
 mm/vmstat.c                      |  1 +
 10 files changed, 135 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ac7f231b8825..9fb1f0ae273c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -154,7 +154,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
 }
 
 long congestion_wait(int sync, long timeout);
-long wait_iff_congested(int sync, long timeout);
 
 static inline bool mapping_can_writeback(struct address_space *mapping)
 {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fb36a29e3aae..5a5e19b90dab 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -199,6 +199,7 @@ enum node_stat_item {
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
+	NR_THROTTLED_WRITTEN,	/* NR_WRITTEN while reclaim throttled */
 	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_FOLL_PIN_ACQUIRED,	/* via: pin_user_page(), gup flag: FOLL_PIN */
 	NR_FOLL_PIN_RELEASED,	/* pages returned via unpin_user_page() */
@@ -272,6 +273,11 @@ enum lru_list {
 	NR_LRU_LISTS
 };
 
+enum vmscan_throttle_state {
+	VMSCAN_THROTTLE_WRITEBACK,
+	NR_VMSCAN_THROTTLE,
+};
+
 #define for_each_lru(lru) for (lru = 0; lru < NR_LRU_LISTS; lru++)
 
 #define for_each_evictable_lru(lru) for (lru = 0; lru <= LRU_ACTIVE_FILE; lru++)
@@ -841,6 +847,13 @@ typedef struct pglist_data {
 	int node_id;
 	wait_queue_head_t kswapd_wait;
 	wait_queue_head_t pfmemalloc_wait;
+
+	/* workqueues for throttling reclaim for different reasons. */
+	wait_queue_head_t reclaim_wait[NR_VMSCAN_THROTTLE];
+
+	atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */
+	unsigned long nr_reclaim_start;	/* nr pages written while throttled
+					 * when throttling started. */
 	struct task_struct *kswapd;	/* Protected by
 					   mem_hotplug_begin/end() */
 	int kswapd_order;
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 88faf2400ec2..c317f9fe0d17 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -27,6 +27,14 @@
 		{RECLAIM_WB_ASYNC,	"RECLAIM_WB_ASYNC"}	\
 		) : "RECLAIM_WB_NONE"
 
+#define _VMSCAN_THROTTLE_WRITEBACK	(1 << VMSCAN_THROTTLE_WRITEBACK)
+
+#define show_throttle_flags(flags)						\
+	(flags) ? __print_flags(flags, "|",					\
+		{_VMSCAN_THROTTLE_WRITEBACK,	"VMSCAN_THROTTLE_WRITEBACK"}	\
+		) : "VMSCAN_THROTTLE_NONE"
+
+
 #define trace_reclaim_flags(file) ( \
 	(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
 	(RECLAIM_WB_ASYNC) \
@@ -454,6 +462,32 @@ DEFINE_EVENT(mm_vmscan_direct_reclaim_end_template, mm_vmscan_node_reclaim_end,
 	TP_ARGS(nr_reclaimed)
 );
 
+TRACE_EVENT(mm_vmscan_throttled,
+
+	TP_PROTO(int nid, int usec_timeout, int usec_delayed, int reason),
+
+	TP_ARGS(nid, usec_timeout, usec_delayed, reason),
+
+	TP_STRUCT__entry(
+		__field(int, nid)
+		__field(int, usec_timeout)
+		__field(int, usec_delayed)
+		__field(int, reason)
+	),
+
+	TP_fast_assign(
+		__entry->nid = nid;
+		__entry->usec_timeout = usec_timeout;
+		__entry->usec_delayed = usec_delayed;
+		__entry->reason = 1U << reason;
+	),
+
+	TP_printk("nid=%d usec_timeout=%d usect_delayed=%d reason=%s",
+		__entry->nid,
+		__entry->usec_timeout,
+		__entry->usec_delayed,
+		show_throttle_flags(__entry->reason))
+);
 #endif /* _TRACE_VMSCAN_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 840d1ba84cf5..3bc759b81897 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -763,13 +763,6 @@ DEFINE_EVENT(writeback_congest_waited_template, writeback_congestion_wait,
 	TP_ARGS(usec_timeout, usec_delayed)
 );
 
-DEFINE_EVENT(writeback_congest_waited_template, writeback_wait_iff_congested,
-
-	TP_PROTO(unsigned int usec_timeout, unsigned int usec_delayed),
-
-	TP_ARGS(usec_timeout, usec_delayed)
-);
-
 DECLARE_EVENT_CLASS(writeback_single_inode_template,
 
 	TP_PROTO(struct inode *inode,
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5ccb25089808..3d2983752e24 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1038,51 +1038,3 @@ long congestion_wait(int sync, long timeout)
 	return ret;
 }
 EXPORT_SYMBOL(congestion_wait);
-
-/**
- * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
- * @sync: SYNC or ASYNC IO
- * @timeout: timeout in jiffies
- *
- * In the event of a congested backing_dev (any backing_dev) this waits
- * for up to @timeout jiffies for either a BDI to exit congestion of the
- * given @sync queue or a write to complete.
- *
- * The return value is 0 if the sleep is for the full timeout. Otherwise,
- * it is the number of jiffies that were still remaining when the function
- * returned. return_value == timeout implies the function did not sleep.
- */
-long wait_iff_congested(int sync, long timeout)
-{
-	long ret;
-	unsigned long start = jiffies;
-	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = &congestion_wqh[sync];
-
-	/*
-	 * If there is no congestion, yield if necessary instead
-	 * of sleeping on the congestion queue
-	 */
-	if (atomic_read(&nr_wb_congested[sync]) == 0) {
-		cond_resched();
-
-		/* In case we scheduled, work out time remaining */
-		ret = timeout - (jiffies - start);
-		if (ret < 0)
-			ret = 0;
-
-		goto out;
-	}
-
-	/* Sleep until uncongested or a write happens */
-	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-	ret = io_schedule_timeout(timeout);
-	finish_wait(wqh, &wait);
-
-out:
-	trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
-					jiffies_to_usecs(jiffies - start));
-
-	return ret;
-}
-EXPORT_SYMBOL(wait_iff_congested);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6f2c4bd6a571..b6140debc2da 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1612,6 +1612,7 @@ void end_page_writeback(struct page *page)
 
 	smp_mb__after_atomic();
 	wake_up_page(page, PG_writeback);
+	acct_reclaim_writeback(page);
 	put_page(page);
 }
 EXPORT_SYMBOL(end_page_writeback);
diff --git a/mm/internal.h b/mm/internal.h
index 6c3e1a9f8c5a..a59b5626f968 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -34,6 +34,17 @@
 
 void page_writeback_init(void);
 
+void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
+						int nr_throttled);
+static inline void acct_reclaim_writeback(struct page *page)
+{
+	pg_data_t *pgdat = page_pgdat(page);
+	int nr_throttled = atomic_read(&pgdat->nr_writeback_throttled);
+
+	if (nr_throttled)
+		__acct_reclaim_writeback(pgdat, page, nr_throttled);
+}
+
 vm_fault_t do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 09a0f1c5d5d2..0f1f1f353211 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7408,6 +7408,8 @@ static void pgdat_init_kcompactd(struct pglist_data *pgdat) {}
 
 static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 {
+	int i;
+
 	pgdat_resize_init(pgdat);
 
 	pgdat_init_split_queue(pgdat);
@@ -7416,6 +7418,9 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
 
+	for (i = 0; i < NR_VMSCAN_THROTTLE; i++)
+		init_waitqueue_head(&pgdat->reclaim_wait[i]);
+
 	pgdat_page_ext_init(pgdat);
 	lruvec_init(&pgdat->__lruvec);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9483dd6af550..09beeb55546c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1006,6 +1006,64 @@ static void handle_write_error(struct address_space *mapping,
 	unlock_page(page);
 }
 
+static void
+reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
+							long timeout)
+{
+	wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
+	long ret;
+	DEFINE_WAIT(wait);
+
+	/*
+	 * Do not throttle IO workers, kthreads other than kswapd or
+	 * workqueues. They may be required for reclaim to make
+	 * forward progress (e.g. journalling workqueues or kthreads).
+	 */
+	if (!current_is_kswapd() &&
+	    current->flags & (PF_IO_WORKER|PF_KTHREAD))
+		return;
+
+	if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
+		WRITE_ONCE(pgdat->nr_reclaim_start,
+			node_page_state(pgdat, NR_THROTTLED_WRITTEN));
+	}
+
+	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+	ret = schedule_timeout(timeout);
+	finish_wait(wqh, &wait);
+	atomic_dec(&pgdat->nr_writeback_throttled);
+
+	trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
+				jiffies_to_usecs(timeout - ret),
+				reason);
+}
+
+/*
+ * Account for pages written if tasks are throttled waiting on dirty
+ * pages to clean. If enough pages have been cleaned since throttling
+ * started then wakeup the throttled tasks.
+ */
+void __acct_reclaim_writeback(pg_data_t *pgdat, struct page *page,
+							int nr_throttled)
+{
+	unsigned long nr_written;
+
+	inc_node_page_state(page, NR_THROTTLED_WRITTEN);
+
+	/*
+	 * This is an inaccurate read as the per-cpu deltas may not
+	 * be synchronised. However, given that the system is
+	 * writeback throttled, it is not worth taking the penalty
+	 * of getting an accurate count. At worst, the throttle
+	 * timeout guarantees forward progress.
+	 */
+	nr_written = node_page_state(pgdat, NR_THROTTLED_WRITTEN) -
+		READ_ONCE(pgdat->nr_reclaim_start);
+
+	if (nr_written > SWAP_CLUSTER_MAX * nr_throttled)
+		wake_up(&pgdat->reclaim_wait[VMSCAN_THROTTLE_WRITEBACK]);
+}
+
 /* possible outcome of pageout() */
 typedef enum {
 	/* failed to write page out, page is locked */
@@ -1411,9 +1469,8 @@ retry:
 
 		/*
 		 * The number of dirty pages determines if a node is marked
-		 * reclaim_congested which affects wait_iff_congested. kswapd
-		 * will stall and start writing pages if the tail of the LRU
-		 * is all dirty unqueued pages.
+		 * reclaim_congested. kswapd will stall and start writing
+		 * pages if the tail of the LRU is all dirty unqueued pages.
 		 */
 		page_check_dirty_writeback(page, &dirty, &writeback);
 		if (dirty || writeback)
@@ -3179,19 +3236,19 @@ again:
 		 * If kswapd scans pages marked for immediate
 		 * reclaim and under writeback (nr_immediate), it
 		 * implies that pages are cycling through the LRU
-		 * faster than they are written so also forcibly stall.
+		 * faster than they are written so forcibly stall
+		 * until some pages complete writeback.
 		 */
 		if (sc->nr.immediate)
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
+			reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
 	}
 
 	/*
-	 * Tag a node/memcg as congested if all the dirty pages
-	 * scanned were backed by a congested BDI and
-	 * wait_iff_congested will stall.
+	 * Tag a node/memcg as congested if all the dirty pages were marked
+	 * for writeback and immediate reclaim (counted in nr.congested).
 	 *
 	 * Legacy memcg will stall in page writeback so avoid forcibly
-	 * stalling in wait_iff_congested().
+	 * stalling in reclaim_throttle().
 	 */
 	if ((current_is_kswapd() ||
 	     (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
@@ -3199,15 +3256,15 @@ again:
 		set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
 
 	/*
-	 * Stall direct reclaim for IO completions if underlying BDIs
-	 * and node is congested. Allow kswapd to continue until it
+	 * Stall direct reclaim for IO completions if the lruvec is
+	 * node is congested. Allow kswapd to continue until it
 	 * starts encountering unqueued dirty pages or cycling through
 	 * the LRU too quickly.
 	 */
 	if (!current_is_kswapd() && current_may_throttle() &&
 	    !sc->hibernation_mode &&
 	    test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
-		wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+		reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK, HZ/10);
 
 	if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
 				    sc))
@@ -4285,6 +4342,7 @@ static int kswapd(void *p)
 
 	WRITE_ONCE(pgdat->kswapd_order, 0);
 	WRITE_ONCE(pgdat->kswapd_highest_zoneidx, MAX_NR_ZONES);
+	atomic_set(&pgdat->nr_writeback_throttled, 0);
 	for ( ; ; ) {
 		bool ret;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ae87c90e0b4e..0f4643e5324d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1225,6 +1225,7 @@ const char * const vmstat_text[] = {
 	"nr_vmscan_immediate_reclaim",
 	"nr_dirtied",
 	"nr_written",
+	"nr_throttled_written",
 	"nr_kernel_misc_reclaimable",
 	"nr_foll_pin_acquired",
 	"nr_foll_pin_released",
-- 
cgit v1.2.3


From d818fca1cac31b1fc9301bda83e195a46fb4ebaa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 5 Nov 2021 13:42:29 -0700
Subject: mm/vmscan: throttle reclaim and compaction when too may pages are
 isolated

Page reclaim throttles on congestion if too many parallel reclaim
instances have isolated too many pages.  This makes no sense, excessive
parallelisation has nothing to do with writeback or congestion.

This patch creates an additional workqueue to sleep on when too many
pages are isolated.  The throttled tasks are woken when the number of
isolated pages is reduced or a timeout occurs.  There may be some false
positive wakeups for GFP_NOIO/GFP_NOFS callers but the tasks will
throttle again if necessary.

[shy828301@gmail.com: Wake up from compaction context]
[vbabka@suse.cz: Account number of throttled tasks only for writeback]

Link: https://lkml.kernel.org/r/20211022144651.19914-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h        |  1 +
 include/trace/events/vmscan.h |  4 +++-
 mm/compaction.c               | 10 ++++++++--
 mm/internal.h                 | 11 +++++++++++
 mm/vmscan.c                   | 22 ++++++++++++++++------
 5 files changed, 39 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5a5e19b90dab..312c1ea9aafa 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -275,6 +275,7 @@ enum lru_list {
 
 enum vmscan_throttle_state {
 	VMSCAN_THROTTLE_WRITEBACK,
+	VMSCAN_THROTTLE_ISOLATED,
 	NR_VMSCAN_THROTTLE,
 };
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index c317f9fe0d17..d4905bd9e9c4 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -28,10 +28,12 @@
 		) : "RECLAIM_WB_NONE"
 
 #define _VMSCAN_THROTTLE_WRITEBACK	(1 << VMSCAN_THROTTLE_WRITEBACK)
+#define _VMSCAN_THROTTLE_ISOLATED	(1 << VMSCAN_THROTTLE_ISOLATED)
 
 #define show_throttle_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",					\
-		{_VMSCAN_THROTTLE_WRITEBACK,	"VMSCAN_THROTTLE_WRITEBACK"}	\
+		{_VMSCAN_THROTTLE_WRITEBACK,	"VMSCAN_THROTTLE_WRITEBACK"},	\
+		{_VMSCAN_THROTTLE_ISOLATED,	"VMSCAN_THROTTLE_ISOLATED"}	\
 		) : "VMSCAN_THROTTLE_NONE"
 
 
diff --git a/mm/compaction.c b/mm/compaction.c
index bfc93da1c2c7..7359093d8ac0 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -761,6 +761,8 @@ isolate_freepages_range(struct compact_control *cc,
 /* Similar to reclaim, but different enough that they don't share logic */
 static bool too_many_isolated(pg_data_t *pgdat)
 {
+	bool too_many;
+
 	unsigned long active, inactive, isolated;
 
 	inactive = node_page_state(pgdat, NR_INACTIVE_FILE) +
@@ -770,7 +772,11 @@ static bool too_many_isolated(pg_data_t *pgdat)
 	isolated = node_page_state(pgdat, NR_ISOLATED_FILE) +
 			node_page_state(pgdat, NR_ISOLATED_ANON);
 
-	return isolated > (inactive + active) / 2;
+	too_many = isolated > (inactive + active) / 2;
+	if (!too_many)
+		wake_throttle_isolated(pgdat);
+
+	return too_many;
 }
 
 /**
@@ -822,7 +828,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (cc->mode == MIGRATE_ASYNC)
 			return -EAGAIN;
 
-		congestion_wait(BLK_RW_ASYNC, HZ/10);
+		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
 
 		if (fatal_signal_pending(current))
 			return -EINTR;
diff --git a/mm/internal.h b/mm/internal.h
index a59b5626f968..7dfe74f827bf 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -45,6 +45,15 @@ static inline void acct_reclaim_writeback(struct page *page)
 		__acct_reclaim_writeback(pgdat, page, nr_throttled);
 }
 
+static inline void wake_throttle_isolated(pg_data_t *pgdat)
+{
+	wait_queue_head_t *wqh;
+
+	wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_ISOLATED];
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+
 vm_fault_t do_swap_page(struct vm_fault *vmf);
 
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
@@ -121,6 +130,8 @@ extern unsigned long highest_memmap_pfn;
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
+extern void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
+								long timeout);
 
 /*
  * in mm/rmap.c:
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 09beeb55546c..7bfd62f81e16 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1006,12 +1006,12 @@ static void handle_write_error(struct address_space *mapping,
 	unlock_page(page);
 }
 
-static void
-reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
+void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
 							long timeout)
 {
 	wait_queue_head_t *wqh = &pgdat->reclaim_wait[reason];
 	long ret;
+	bool acct_writeback = (reason == VMSCAN_THROTTLE_WRITEBACK);
 	DEFINE_WAIT(wait);
 
 	/*
@@ -1023,7 +1023,8 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
 	    current->flags & (PF_IO_WORKER|PF_KTHREAD))
 		return;
 
-	if (atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
+	if (acct_writeback &&
+	    atomic_inc_return(&pgdat->nr_writeback_throttled) == 1) {
 		WRITE_ONCE(pgdat->nr_reclaim_start,
 			node_page_state(pgdat, NR_THROTTLED_WRITTEN));
 	}
@@ -1031,7 +1032,9 @@ reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason,
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = schedule_timeout(timeout);
 	finish_wait(wqh, &wait);
-	atomic_dec(&pgdat->nr_writeback_throttled);
+
+	if (acct_writeback)
+		atomic_dec(&pgdat->nr_writeback_throttled);
 
 	trace_mm_vmscan_throttled(pgdat->node_id, jiffies_to_usecs(timeout),
 				jiffies_to_usecs(timeout - ret),
@@ -2175,6 +2178,7 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
 		struct scan_control *sc)
 {
 	unsigned long inactive, isolated;
+	bool too_many;
 
 	if (current_is_kswapd())
 		return 0;
@@ -2198,7 +2202,13 @@ static int too_many_isolated(struct pglist_data *pgdat, int file,
 	if ((sc->gfp_mask & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS))
 		inactive >>= 3;
 
-	return isolated > inactive;
+	too_many = isolated > inactive;
+
+	/* Wake up tasks throttled due to too_many_isolated. */
+	if (!too_many)
+		wake_throttle_isolated(pgdat);
+
+	return too_many;
 }
 
 /*
@@ -2307,8 +2317,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 			return 0;
 
 		/* wait a bit for the reclaimer. */
-		msleep(100);
 		stalled = true;
+		reclaim_throttle(pgdat, VMSCAN_THROTTLE_ISOLATED, HZ/10);
 
 		/* We are about to die and free our memory. Return now. */
 		if (fatal_signal_pending(current))
-- 
cgit v1.2.3


From 69392a403f49e6e33f9dfb1d6edb87c8006f83c2 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 5 Nov 2021 13:42:32 -0700
Subject: mm/vmscan: throttle reclaim when no progress is being made

Memcg reclaim throttles on congestion if no reclaim progress is made.
This makes little sense, it might be due to writeback or a host of other
factors.

For !memcg reclaim, it's messy.  Direct reclaim primarily is throttled
in the page allocator if it is failing to make progress.  Kswapd
throttles if too many pages are under writeback and marked for immediate
reclaim.

This patch explicitly throttles if reclaim is failing to make progress.

[vbabka@suse.cz: Remove redundant code]

Link: https://lkml.kernel.org/r/20211022144651.19914-4-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: "Darrick J . Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Rik van Riel <riel@surriel.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h        |  1 +
 include/trace/events/vmscan.h |  4 +++-
 mm/memcontrol.c               | 10 +---------
 mm/vmscan.c                   | 28 ++++++++++++++++++++++++++++
 4 files changed, 33 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 312c1ea9aafa..58e744b78c2c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -276,6 +276,7 @@ enum lru_list {
 enum vmscan_throttle_state {
 	VMSCAN_THROTTLE_WRITEBACK,
 	VMSCAN_THROTTLE_ISOLATED,
+	VMSCAN_THROTTLE_NOPROGRESS,
 	NR_VMSCAN_THROTTLE,
 };
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index d4905bd9e9c4..f25a6149d3ba 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -29,11 +29,13 @@
 
 #define _VMSCAN_THROTTLE_WRITEBACK	(1 << VMSCAN_THROTTLE_WRITEBACK)
 #define _VMSCAN_THROTTLE_ISOLATED	(1 << VMSCAN_THROTTLE_ISOLATED)
+#define _VMSCAN_THROTTLE_NOPROGRESS	(1 << VMSCAN_THROTTLE_NOPROGRESS)
 
 #define show_throttle_flags(flags)						\
 	(flags) ? __print_flags(flags, "|",					\
 		{_VMSCAN_THROTTLE_WRITEBACK,	"VMSCAN_THROTTLE_WRITEBACK"},	\
-		{_VMSCAN_THROTTLE_ISOLATED,	"VMSCAN_THROTTLE_ISOLATED"}	\
+		{_VMSCAN_THROTTLE_ISOLATED,	"VMSCAN_THROTTLE_ISOLATED"},	\
+		{_VMSCAN_THROTTLE_NOPROGRESS,	"VMSCAN_THROTTLE_NOPROGRESS"}	\
 		) : "VMSCAN_THROTTLE_NONE"
 
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf0321d7a784..965b3cf7046b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3487,19 +3487,11 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 
 	/* try to free all pages in this cgroup */
 	while (nr_retries && page_counter_read(&memcg->memory)) {
-		int progress;
-
 		if (signal_pending(current))
 			return -EINTR;
 
-		progress = try_to_free_mem_cgroup_pages(memcg, 1,
-							GFP_KERNEL, true);
-		if (!progress) {
+		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true))
 			nr_retries--;
-			/* maybe some writeback is necessary */
-			congestion_wait(BLK_RW_ASYNC, HZ/10);
-		}
-
 	}
 
 	return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7bfd62f81e16..7d3fe5938e3b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3322,6 +3322,33 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	return zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
 }
 
+static void consider_reclaim_throttle(pg_data_t *pgdat, struct scan_control *sc)
+{
+	/* If reclaim is making progress, wake any throttled tasks. */
+	if (sc->nr_reclaimed) {
+		wait_queue_head_t *wqh;
+
+		wqh = &pgdat->reclaim_wait[VMSCAN_THROTTLE_NOPROGRESS];
+		if (waitqueue_active(wqh))
+			wake_up(wqh);
+
+		return;
+	}
+
+	/*
+	 * Do not throttle kswapd on NOPROGRESS as it will throttle on
+	 * VMSCAN_THROTTLE_WRITEBACK if there are too many pages under
+	 * writeback and marked for immediate reclaim at the tail of
+	 * the LRU.
+	 */
+	if (current_is_kswapd())
+		return;
+
+	/* Throttle if making no progress at high prioities. */
+	if (sc->priority < DEF_PRIORITY - 2)
+		reclaim_throttle(pgdat, VMSCAN_THROTTLE_NOPROGRESS, HZ/10);
+}
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -3406,6 +3433,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			continue;
 		last_pgdat = zone->zone_pgdat;
 		shrink_node(zone->zone_pgdat, sc);
+		consider_reclaim_throttle(zone->zone_pgdat, sc);
 	}
 
 	/*
-- 
cgit v1.2.3


From 7e6ec49c18988f1b8dab0677271dafde5f8d9a43 Mon Sep 17 00:00:00 2001
From: Yuanzheng Song <songyuanzheng@huawei.com>
Date: Fri, 5 Nov 2021 13:42:52 -0700
Subject: mm/vmpressure: fix data-race with memcg->socket_pressure

When reading memcg->socket_pressure in mem_cgroup_under_socket_pressure()
and writing memcg->socket_pressure in vmpressure() at the same time, the
following data-race occurs:

  BUG: KCSAN: data-race in __sk_mem_reduce_allocated / vmpressure

  write to 0xffff8881286f4938 of 8 bytes by task 24550 on cpu 3:
   vmpressure+0x218/0x230 mm/vmpressure.c:307
   shrink_node_memcgs+0x2b9/0x410 mm/vmscan.c:2658
   shrink_node+0x9d2/0x11d0 mm/vmscan.c:2769
   shrink_zones+0x29f/0x470 mm/vmscan.c:2972
   do_try_to_free_pages+0x193/0x6e0 mm/vmscan.c:3027
   try_to_free_mem_cgroup_pages+0x1c0/0x3f0 mm/vmscan.c:3345
   reclaim_high mm/memcontrol.c:2440 [inline]
   mem_cgroup_handle_over_high+0x18b/0x4d0 mm/memcontrol.c:2624
   tracehook_notify_resume include/linux/tracehook.h:197 [inline]
   exit_to_user_mode_loop kernel/entry/common.c:164 [inline]
   exit_to_user_mode_prepare+0x110/0x170 kernel/entry/common.c:191
   syscall_exit_to_user_mode+0x16/0x30 kernel/entry/common.c:266
   ret_from_fork+0x15/0x30 arch/x86/entry/entry_64.S:289

  read to 0xffff8881286f4938 of 8 bytes by interrupt on cpu 1:
   mem_cgroup_under_socket_pressure include/linux/memcontrol.h:1483 [inline]
   sk_under_memory_pressure include/net/sock.h:1314 [inline]
   __sk_mem_reduce_allocated+0x1d2/0x270 net/core/sock.c:2696
   __sk_mem_reclaim+0x44/0x50 net/core/sock.c:2711
   sk_mem_reclaim include/net/sock.h:1490 [inline]
   ......
   net_rx_action+0x17a/0x480 net/core/dev.c:6864
   __do_softirq+0x12c/0x2af kernel/softirq.c:298
   run_ksoftirqd+0x13/0x20 kernel/softirq.c:653
   smpboot_thread_fn+0x33f/0x510 kernel/smpboot.c:165
   kthread+0x1fc/0x220 kernel/kthread.c:292
   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:296

Fix it by using READ_ONCE() and WRITE_ONCE() to read and write
memcg->socket_pressure.

Link: https://lkml.kernel.org/r/20211025082843.671690-1-songyuanzheng@huawei.com
Signed-off-by: Yuanzheng Song <songyuanzheng@huawei.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Alex Shi <alexs@kernel.org>
Cc: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 2 +-
 mm/vmpressure.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f207f98bdb76..9d96238d9c21 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1606,7 +1606,7 @@ static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg)
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure)
 		return true;
 	do {
-		if (time_before(jiffies, memcg->socket_pressure))
+		if (time_before(jiffies, READ_ONCE(memcg->socket_pressure)))
 			return true;
 	} while ((memcg = parent_mem_cgroup(memcg)));
 	return false;
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 76518e4166dc..b52644771cc4 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -308,7 +308,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree,
 			 * asserted for a second in which subsequent
 			 * pressure events can occur.
 			 */
-			memcg->socket_pressure = jiffies + HZ;
+			WRITE_ONCE(memcg->socket_pressure, jiffies + HZ);
 		}
 	}
 }
-- 
cgit v1.2.3


From fa27717110ae51b9b9013ced0b5143888257bb79 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Fri, 5 Nov 2021 13:43:13 -0700
Subject: memblock: drop memblock_free_early_nid() and memblock_free_early()

memblock_free_early_nid() is unused and memblock_free_early() is an
alias for memblock_free().

Replace calls to memblock_free_early() with calls to memblock_free() and
remove memblock_free_early() and memblock_free_early_nid().

Link: https://lkml.kernel.org/r/20210930185031.18648-4-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Juergen Gross <jgross@suse.com>
Cc: Shahab Vahedi <Shahab.Vahedi@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mips/mm/init.c                  |  2 +-
 arch/powerpc/platforms/pseries/svm.c |  3 +--
 arch/s390/kernel/smp.c               |  2 +-
 drivers/base/arch_numa.c             |  2 +-
 drivers/s390/char/sclp_early.c       |  2 +-
 include/linux/memblock.h             | 12 ------------
 kernel/dma/swiotlb.c                 |  2 +-
 lib/cpumask.c                        |  2 +-
 mm/percpu.c                          |  8 ++++----
 mm/sparse.c                          |  2 +-
 10 files changed, 12 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 19347dc6bbf8..21a5a7ac0037 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free_early(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index 87f001b4c4e4..f12229ce7301 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -56,8 +56,7 @@ void __init svm_swiotlb_init(void)
 		return;
 
 
-	memblock_free_early(__pa(vstart),
-			    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	memblock_free(__pa(vstart), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1a04e5bdf655..066efd6d9345 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -880,7 +880,7 @@ void __init smp_detect_cpus(void)
 
 	/* Add CPUs present at boot */
 	__smp_rescan_cpus(info, true);
-	memblock_free_early((unsigned long)info, sizeof(*info));
+	memblock_free((unsigned long)info, sizeof(*info));
 }
 
 /*
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index a6491673dc1f..ade8934764f6 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free_early(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index f3d5c7f4c13d..f01d942e1c1d 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -139,7 +139,7 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info)
 	}
 	sclp_fill_core_info(info, sccb);
 out:
-	memblock_free_early((unsigned long)sccb, length);
+	memblock_free((unsigned long)sccb, length);
 	return rc;
 }
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 34de69b3b8ba..fc8183be340c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -441,18 +441,6 @@ static inline void *memblock_alloc_node(phys_addr_t size,
 				      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
-static inline void memblock_free_early(phys_addr_t base,
-					      phys_addr_t size)
-{
-	memblock_free(base, size);
-}
-
-static inline void memblock_free_early_nid(phys_addr_t base,
-						  phys_addr_t size, int nid)
-{
-	memblock_free(base, size);
-}
-
 static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
 {
 	__memblock_free_late(base, size);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 87c40517e822..430d2f78d540 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -247,7 +247,7 @@ swiotlb_init(int verbose)
 	return;
 
 fail_free_mem:
-	memblock_free_early(__pa(tlb), bytes);
+	memblock_free(__pa(tlb), bytes);
 fail:
 	pr_warn("Cannot allocate buffer");
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index c3c76b833384..045779446a18 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	memblock_free_early(__pa(mask), cpumask_size());
+	memblock_free(__pa(mask), cpumask_size());
 }
 #endif
 
diff --git a/mm/percpu.c b/mm/percpu.c
index e0a986818903..f58318cb04c0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
  */
 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 {
-	memblock_free_early(__pa(ai), ai->__ai_size);
+	memblock_free(__pa(ai), ai->__ai_size);
 }
 
 /**
@@ -3134,7 +3134,7 @@ out_free_areas:
 out_free:
 	pcpu_free_alloc_info(ai);
 	if (areas)
-		memblock_free_early(__pa(areas), areas_size);
+		memblock_free(__pa(areas), areas_size);
 	return rc;
 }
 #endif /* BUILD_EMBED_FIRST_CHUNK */
@@ -3256,7 +3256,7 @@ enomem:
 		free_fn(page_address(pages[j]), PAGE_SIZE);
 	rc = -ENOMEM;
 out_free_ar:
-	memblock_free_early(__pa(pages), pages_size);
+	memblock_free(__pa(pages), pages_size);
 	pcpu_free_alloc_info(ai);
 	return rc;
 }
@@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
 {
-	memblock_free_early(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/mm/sparse.c b/mm/sparse.c
index 120bc8ea5293..55fea0c2f927 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata;
 static inline void __meminit sparse_buffer_free(unsigned long size)
 {
 	WARN_ON(!sparsemap_buf || size == 0);
-	memblock_free_early(__pa(sparsemap_buf), size);
+	memblock_free(__pa(sparsemap_buf), size);
 }
 
 static void __init sparse_buffer_init(unsigned long size, int nid)
-- 
cgit v1.2.3


From 621d973901cf9fa6c6e31b31bdd36c5c5f3c9c9e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Fri, 5 Nov 2021 13:43:16 -0700
Subject: memblock: stop aliasing __memblock_free_late with memblock_free_late

memblock_free_late() is a NOP wrapper for __memblock_free_late(), there
is no point to keep this indirection.

Drop the wrapper and rename __memblock_free_late() to
memblock_free_late().

Link: https://lkml.kernel.org/r/20210930185031.18648-5-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Juergen Gross <jgross@suse.com>
Cc: Shahab Vahedi <Shahab.Vahedi@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 7 +------
 mm/memblock.c            | 8 ++++----
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index fc8183be340c..e25f964fdd60 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -133,7 +133,7 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags,
 			  struct memblock_type *type_b, phys_addr_t *out_start,
 			  phys_addr_t *out_end, int *out_nid);
 
-void __memblock_free_late(phys_addr_t base, phys_addr_t size);
+void memblock_free_late(phys_addr_t base, phys_addr_t size);
 
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
 static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
@@ -441,11 +441,6 @@ static inline void *memblock_alloc_node(phys_addr_t size,
 				      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
-static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
-{
-	__memblock_free_late(base, size);
-}
-
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
diff --git a/mm/memblock.c b/mm/memblock.c
index 5096500b2647..849060013d3c 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -366,14 +366,14 @@ void __init memblock_discard(void)
 		addr = __pa(memblock.reserved.regions);
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.reserved.max);
-		__memblock_free_late(addr, size);
+		memblock_free_late(addr, size);
 	}
 
 	if (memblock.memory.regions != memblock_memory_init_regions) {
 		addr = __pa(memblock.memory.regions);
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.memory.max);
-		__memblock_free_late(addr, size);
+		memblock_free_late(addr, size);
 	}
 
 	memblock_memory = NULL;
@@ -1589,7 +1589,7 @@ void * __init memblock_alloc_try_nid(
 }
 
 /**
- * __memblock_free_late - free pages directly to buddy allocator
+ * memblock_free_late - free pages directly to buddy allocator
  * @base: phys starting address of the  boot memory block
  * @size: size of the boot memory block in bytes
  *
@@ -1597,7 +1597,7 @@ void * __init memblock_alloc_try_nid(
  * down, but we are still initializing the system.  Pages are released directly
  * to the buddy allocator.
  */
-void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
+void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t cursor, end;
 
-- 
cgit v1.2.3


From 3ecc68349bbab6bff1d12cbc7951ca6019b2faf6 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Fri, 5 Nov 2021 13:43:19 -0700
Subject: memblock: rename memblock_free to memblock_phys_free

Since memblock_free() operates on a physical range, make its name
reflect it and rename it to memblock_phys_free(), so it will be a
logical counterpart to memblock_phys_alloc().

The callers are updated with the below semantic patch:

    @@
    expression addr;
    expression size;
    @@
    - memblock_free(addr, size);
    + memblock_phys_free(addr, size);

Link: https://lkml.kernel.org/r/20210930185031.18648-6-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Juergen Gross <jgross@suse.com>
Cc: Shahab Vahedi <Shahab.Vahedi@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_irongate.c         |  3 ++-
 arch/arc/mm/init.c                        |  2 +-
 arch/arm/mach-hisi/platmcpm.c             |  2 +-
 arch/arm/mm/init.c                        |  2 +-
 arch/arm64/mm/mmu.c                       |  4 ++--
 arch/mips/mm/init.c                       |  2 +-
 arch/mips/sgi-ip30/ip30-setup.c           |  6 +++---
 arch/powerpc/kernel/dt_cpu_ftrs.c         |  4 ++--
 arch/powerpc/kernel/paca.c                |  8 ++++----
 arch/powerpc/kernel/setup-common.c        |  2 +-
 arch/powerpc/kernel/setup_64.c            |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  2 +-
 arch/powerpc/platforms/pseries/svm.c      |  3 ++-
 arch/riscv/kernel/setup.c                 |  5 +++--
 arch/s390/kernel/setup.c                  |  8 ++++----
 arch/s390/kernel/smp.c                    |  4 ++--
 arch/s390/kernel/uv.c                     |  2 +-
 arch/s390/mm/kasan_init.c                 |  2 +-
 arch/sh/boards/mach-ap325rxa/setup.c      |  2 +-
 arch/sh/boards/mach-ecovec24/setup.c      |  4 ++--
 arch/sh/boards/mach-kfr2r09/setup.c       |  2 +-
 arch/sh/boards/mach-migor/setup.c         |  2 +-
 arch/sh/boards/mach-se/7724/setup.c       |  4 ++--
 arch/sparc/kernel/smp_64.c                |  2 +-
 arch/um/kernel/mem.c                      |  2 +-
 arch/x86/kernel/setup.c                   |  4 ++--
 arch/x86/mm/init.c                        |  2 +-
 arch/x86/xen/mmu_pv.c                     |  6 +++---
 arch/x86/xen/setup.c                      |  6 +++---
 drivers/base/arch_numa.c                  |  2 +-
 drivers/firmware/efi/memmap.c             |  2 +-
 drivers/of/kexec.c                        |  3 +--
 drivers/of/of_reserved_mem.c              |  5 +++--
 drivers/s390/char/sclp_early.c            |  2 +-
 drivers/usb/early/xhci-dbc.c              | 10 +++++-----
 drivers/xen/swiotlb-xen.c                 |  2 +-
 include/linux/memblock.h                  |  2 +-
 init/initramfs.c                          |  2 +-
 kernel/dma/swiotlb.c                      |  2 +-
 lib/cpumask.c                             |  2 +-
 mm/cma.c                                  |  2 +-
 mm/memblock.c                             |  8 ++++----
 mm/memory_hotplug.c                       |  2 +-
 mm/percpu.c                               |  8 ++++----
 mm/sparse.c                               |  2 +-
 45 files changed, 79 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index 72af1e72d833..ee26dcc49418 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -233,7 +233,8 @@ albacore_init_arch(void)
 			unsigned long size;
 
 			size = initrd_end - initrd_start;
-			memblock_free(__pa(initrd_start), PAGE_ALIGN(size));
+			memblock_phys_free(__pa(initrd_start),
+					   PAGE_ALIGN(size));
 			if (!move_initrd(pci_mem))
 				printk("irongate_init_arch: initrd too big "
 				       "(%ldK)\ndisabling initrd\n",
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 699ecf119641..59408f6a02d4 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -173,7 +173,7 @@ static void __init highmem_init(void)
 #ifdef CONFIG_HIGHMEM
 	unsigned long tmp;
 
-	memblock_free(high_mem_start, high_mem_sz);
+	memblock_phys_free(high_mem_start, high_mem_sz);
 	for (tmp = min_high_pfn; tmp < max_high_pfn; tmp++)
 		free_highmem_page(pfn_to_page(tmp));
 #endif
diff --git a/arch/arm/mach-hisi/platmcpm.c b/arch/arm/mach-hisi/platmcpm.c
index 96a484095194..258586e31333 100644
--- a/arch/arm/mach-hisi/platmcpm.c
+++ b/arch/arm/mach-hisi/platmcpm.c
@@ -339,7 +339,7 @@ err_fabric:
 err_sysctrl:
 	iounmap(relocation);
 err_reloc:
-	memblock_free(hip04_boot_method[0], hip04_boot_method[1]);
+	memblock_phys_free(hip04_boot_method[0], hip04_boot_method[1]);
 err:
 	return ret;
 }
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 6162a070a410..6d0cb0f7bc54 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -158,7 +158,7 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align)
 		panic("Failed to steal %pa bytes at %pS\n",
 		      &size, (void *)_RET_IP_);
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 
 	return phys;
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index cfd9deb347c3..f68c2d953617 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -738,8 +738,8 @@ void __init paging_init(void)
 	cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 	init_mm.pgd = swapper_pg_dir;
 
-	memblock_free(__pa_symbol(init_pg_dir),
-		      __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
+	memblock_phys_free(__pa_symbol(init_pg_dir),
+			   __pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
 
 	memblock_allow_resize();
 }
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 21a5a7ac0037..3be1c29084fa 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_phys_free(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/arch/mips/sgi-ip30/ip30-setup.c b/arch/mips/sgi-ip30/ip30-setup.c
index 44b1607e964d..75a34684e704 100644
--- a/arch/mips/sgi-ip30/ip30-setup.c
+++ b/arch/mips/sgi-ip30/ip30-setup.c
@@ -69,10 +69,10 @@ static void __init ip30_mem_init(void)
 		total_mem += size;
 
 		if (addr >= IP30_REAL_MEMORY_START)
-			memblock_free(addr, size);
+			memblock_phys_free(addr, size);
 		else if ((addr + size) > IP30_REAL_MEMORY_START)
-			memblock_free(IP30_REAL_MEMORY_START,
-				     size - IP30_MAX_PROM_MEMORY);
+			memblock_phys_free(IP30_REAL_MEMORY_START,
+					   size - IP30_MAX_PROM_MEMORY);
 	}
 	pr_info("Detected %luMB of physical memory.\n", MEM_SHIFT(total_mem));
 }
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 358aee7c2d79..42839d6bd486 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1095,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 
 	cpufeatures_setup_finished();
 
-	memblock_free(__pa(dt_cpu_features),
-			sizeof(struct dt_cpu_feature)*nr_dt_cpu_features);
+	memblock_phys_free(__pa(dt_cpu_features),
+			   sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 9bd30cac852b..4208b4044d12 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -322,8 +322,8 @@ void __init free_unused_pacas(void)
 
 	new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
 	if (new_ptrs_size < paca_ptrs_size)
-		memblock_free(__pa(paca_ptrs) + new_ptrs_size,
-					paca_ptrs_size - new_ptrs_size);
+		memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
+				   paca_ptrs_size - new_ptrs_size);
 
 	paca_nr_cpu_ids = nr_cpu_ids;
 	paca_ptrs_size = new_ptrs_size;
@@ -331,8 +331,8 @@ void __init free_unused_pacas(void)
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (early_radix_enabled()) {
 		/* Ugly fixup, see new_slb_shadow() */
-		memblock_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
-				sizeof(struct slb_shadow));
+		memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+				   sizeof(struct slb_shadow));
 		paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
 	}
 #endif
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index b1e43b69a559..5af8993a8e6d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -825,7 +825,7 @@ static void __init smp_setup_pacas(void)
 		set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
 	}
 
-	memblock_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+	memblock_phys_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
 	cpu_to_phys_id = NULL;
 }
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index eaa79a0996d1..75bc294ac40d 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -812,7 +812,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_phys_free(__pa(ptr), size);
 }
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3dd35c327d1c..b5a9d343b720 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2981,7 +2981,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	if (!phb->hose) {
 		pr_err("  Can't allocate PCI controller for %pOF\n",
 		       np);
-		memblock_free(__pa(phb), sizeof(struct pnv_phb));
+		memblock_phys_free(__pa(phb), sizeof(struct pnv_phb));
 		return;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index f12229ce7301..b7c017bb40f7 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -56,7 +56,8 @@ void __init svm_swiotlb_init(void)
 		return;
 
 
-	memblock_free(__pa(vstart), PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	memblock_phys_free(__pa(vstart),
+			   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index b9620e5f00ba..6ea7c53b82cd 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -230,13 +230,14 @@ static void __init init_resources(void)
 
 	/* Clean-up any unused pre-allocated resources */
 	if (res_idx >= 0)
-		memblock_free(__pa(mem_res), (res_idx + 1) * sizeof(*mem_res));
+		memblock_phys_free(__pa(mem_res),
+				   (res_idx + 1) * sizeof(*mem_res));
 	return;
 
  error:
 	/* Better an empty resource tree than an inconsistent one */
 	release_child_resources(&iomem_resource);
-	memblock_free(__pa(mem_res), mem_res_sz);
+	memblock_phys_free(__pa(mem_res), mem_res_sz);
 }
 
 
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 67e5fff96ee0..7fc836e9e194 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -693,7 +693,7 @@ static void __init reserve_crashkernel(void)
 	}
 
 	if (register_memory_notifier(&kdump_mem_nb)) {
-		memblock_free(crash_base, crash_size);
+		memblock_phys_free(crash_base, crash_size);
 		return;
 	}
 
@@ -748,7 +748,7 @@ static void __init free_mem_detect_info(void)
 
 	get_mem_detect_reserved(&start, &size);
 	if (size)
-		memblock_free(start, size);
+		memblock_phys_free(start, size);
 }
 
 static const char * __init get_mem_info_source(void)
@@ -793,7 +793,7 @@ static void __init check_initrd(void)
 	if (initrd_data.start && initrd_data.size &&
 	    !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
 		pr_err("The initial RAM disk does not fit into the memory\n");
-		memblock_free(initrd_data.start, initrd_data.size);
+		memblock_phys_free(initrd_data.start, initrd_data.size);
 		initrd_start = initrd_end = 0;
 	}
 #endif
@@ -890,7 +890,7 @@ static void __init setup_randomness(void)
 
 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
-	memblock_free((unsigned long) vmms, PAGE_SIZE);
+	memblock_phys_free((unsigned long)vmms, PAGE_SIZE);
 }
 
 /*
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 066efd6d9345..78a8ea6fd582 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -723,7 +723,7 @@ void __init smp_save_dump_cpus(void)
 			/* Get the CPU registers */
 			smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
 	}
-	memblock_free(page, PAGE_SIZE);
+	memblock_phys_free(page, PAGE_SIZE);
 	diag_amode31_ops.diag308_reset();
 	pcpu_set_smt(0);
 }
@@ -880,7 +880,7 @@ void __init smp_detect_cpus(void)
 
 	/* Add CPUs present at boot */
 	__smp_rescan_cpus(info, true);
-	memblock_free((unsigned long)info, sizeof(*info));
+	memblock_phys_free((unsigned long)info, sizeof(*info));
 }
 
 /*
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 5a656c7b7a67..d57457b16fe5 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -64,7 +64,7 @@ void __init setup_uv(void)
 	}
 
 	if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {
-		memblock_free(uv_stor_base, uv_info.uv_base_stor_len);
+		memblock_phys_free(uv_stor_base, uv_info.uv_base_stor_len);
 		goto fail;
 	}
 
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 3e4735168019..483b9dbe0970 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -399,5 +399,5 @@ void __init kasan_copy_shadow_mapping(void)
 
 void __init kasan_free_early_identity(void)
 {
-	memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
+	memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
 }
diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c
index bac8a058ebd7..c77b5f00a66a 100644
--- a/arch/sh/boards/mach-ap325rxa/setup.c
+++ b/arch/sh/boards/mach-ap325rxa/setup.c
@@ -560,7 +560,7 @@ static void __init ap325rxa_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 
 	ceu_dma_membase = phys;
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index bab91a99124e..2b22ce792147 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -1502,7 +1502,7 @@ static void __init ecovec_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU0 memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 	ceu0_dma_membase = phys;
 
@@ -1510,7 +1510,7 @@ static void __init ecovec_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU1 memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 	ceu1_dma_membase = phys;
 }
diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c
index eeb5ce341efd..20f4db778ed6 100644
--- a/arch/sh/boards/mach-kfr2r09/setup.c
+++ b/arch/sh/boards/mach-kfr2r09/setup.c
@@ -633,7 +633,7 @@ static void __init kfr2r09_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 
 	ceu_dma_membase = phys;
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 6703a2122c0d..f60061283c48 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -633,7 +633,7 @@ static void __init migor_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 
 	ceu_dma_membase = phys;
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 8d6541ba0186..8bbf5a6aa423 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -966,7 +966,7 @@ static void __init ms7724se_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU0 memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 	ceu0_dma_membase = phys;
 
@@ -974,7 +974,7 @@ static void __init ms7724se_mv_mem_reserve(void)
 	if (!phys)
 		panic("Failed to allocate CEU1 memory\n");
 
-	memblock_free(phys, size);
+	memblock_phys_free(phys, size);
 	memblock_remove(phys, size);
 	ceu1_dma_membase = phys;
 }
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 0224d8f19ed6..2507549538df 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1567,7 +1567,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_phys_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 8e636ce02949..d1710ebb44f4 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -47,7 +47,7 @@ void __init mem_init(void)
 	 */
 	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
 	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-	memblock_free(__pa(brk_end), uml_reserved - brk_end);
+	memblock_phys_free(__pa(brk_end), uml_reserved - brk_end);
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 40ed44ead063..49b596db5631 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -322,7 +322,7 @@ static void __init reserve_initrd(void)
 
 	relocate_initrd();
 
-	memblock_free(ramdisk_image, ramdisk_end - ramdisk_image);
+	memblock_phys_free(ramdisk_image, ramdisk_end - ramdisk_image);
 }
 
 #else
@@ -521,7 +521,7 @@ static void __init reserve_crashkernel(void)
 	}
 
 	if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
-		memblock_free(crash_base, crash_size);
+		memblock_phys_free(crash_base, crash_size);
 		return;
 	}
 
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 23a14d82e783..1895986842b9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -618,7 +618,7 @@ static void __init memory_map_top_down(unsigned long map_start,
 	 */
 	addr = memblock_phys_alloc_range(PMD_SIZE, PMD_SIZE, map_start,
 					 map_end);
-	memblock_free(addr, PMD_SIZE);
+	memblock_phys_free(addr, PMD_SIZE);
 	real_end = addr + PMD_SIZE;
 
 	/* step_size need to be small so pgt_buf from BRK could cover it */
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 3359c23573c5..676d8d292f8a 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1025,7 +1025,7 @@ static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
 	for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
 		make_lowmem_page_readwrite(vaddr);
 
-	memblock_free(paddr, size);
+	memblock_phys_free(paddr, size);
 }
 
 static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
@@ -1151,7 +1151,7 @@ static void __init xen_pagetable_p2m_free(void)
 		xen_cleanhighmap(addr, addr + size);
 		size = PAGE_ALIGN(xen_start_info->nr_pages *
 				  sizeof(unsigned long));
-		memblock_free(__pa(addr), size);
+		memblock_phys_free(__pa(addr), size);
 	} else {
 		xen_cleanmfnmap(addr);
 	}
@@ -1955,7 +1955,7 @@ void __init xen_relocate_p2m(void)
 		pfn_end = p2m_pfn_end;
 	}
 
-	memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
+	memblock_phys_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
 	while (pfn < pfn_end) {
 		if (pfn == p2m_pfn) {
 			pfn = p2m_pfn_end;
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 8bfc10330107..f387fc7e5250 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -153,7 +153,7 @@ static void __init xen_del_extra_mem(unsigned long start_pfn,
 			break;
 		}
 	}
-	memblock_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
+	memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
 }
 
 /*
@@ -719,7 +719,7 @@ static void __init xen_reserve_xen_mfnlist(void)
 		return;
 
 	xen_relocate_p2m();
-	memblock_free(start, size);
+	memblock_phys_free(start, size);
 }
 
 /**
@@ -885,7 +885,7 @@ char * __init xen_memory_setup(void)
 		xen_phys_memcpy(new_area, start, size);
 		pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
 			start, start + size, new_area, new_area + size);
-		memblock_free(start, size);
+		memblock_phys_free(start, size);
 		boot_params.hdr.ramdisk_image = new_area;
 		boot_params.ext_ramdisk_image = new_area >> 32;
 	}
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index ade8934764f6..712edef03929 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_phys_free(__pa(ptr), size);
 }
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index 2ff1883dc788..4df55a55da84 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -35,7 +35,7 @@ void __init __efi_memmap_free(u64 phys, unsigned long size, unsigned long flags)
 		if (slab_is_available())
 			memblock_free_late(phys, size);
 		else
-			memblock_free(phys, size);
+			memblock_phys_free(phys, size);
 	} else if (flags & EFI_MEMMAP_SLAB) {
 		struct page *p = pfn_to_page(PHYS_PFN(phys));
 		unsigned int order = get_order(size);
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index 053e241f593c..b9bd1cff1793 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -171,8 +171,7 @@ int ima_free_kexec_buffer(void)
 	if (ret)
 		return ret;
 
-	return memblock_free(addr, size);
-
+	return memblock_phys_free(addr, size);
 }
 
 /**
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 9da8835ba5a5..9c0fb962c22b 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -46,7 +46,7 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	if (nomap) {
 		err = memblock_mark_nomap(base, size);
 		if (err)
-			memblock_free(base, size);
+			memblock_phys_free(base, size);
 		kmemleak_ignore_phys(base);
 	}
 
@@ -284,7 +284,8 @@ void __init fdt_init_reserved_mem(void)
 				if (nomap)
 					memblock_clear_nomap(rmem->base, rmem->size);
 				else
-					memblock_free(rmem->base, rmem->size);
+					memblock_phys_free(rmem->base,
+							   rmem->size);
 			}
 		}
 	}
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index f01d942e1c1d..c0052655fc4f 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -139,7 +139,7 @@ int __init sclp_early_get_core_info(struct sclp_core_info *info)
 	}
 	sclp_fill_core_info(info, sccb);
 out:
-	memblock_free((unsigned long)sccb, length);
+	memblock_phys_free((unsigned long)sccb, length);
 	return rc;
 }
 
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index be4ecbabdd58..933d77ad0a64 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -185,7 +185,7 @@ static void __init xdbc_free_ring(struct xdbc_ring *ring)
 	if (!seg)
 		return;
 
-	memblock_free(seg->dma, PAGE_SIZE);
+	memblock_phys_free(seg->dma, PAGE_SIZE);
 	ring->segment = NULL;
 }
 
@@ -665,10 +665,10 @@ int __init early_xdbc_setup_hardware(void)
 		xdbc_free_ring(&xdbc.in_ring);
 
 		if (xdbc.table_dma)
-			memblock_free(xdbc.table_dma, PAGE_SIZE);
+			memblock_phys_free(xdbc.table_dma, PAGE_SIZE);
 
 		if (xdbc.out_dma)
-			memblock_free(xdbc.out_dma, PAGE_SIZE);
+			memblock_phys_free(xdbc.out_dma, PAGE_SIZE);
 
 		xdbc.table_base = NULL;
 		xdbc.out_buf = NULL;
@@ -987,8 +987,8 @@ free_and_quit:
 	xdbc_free_ring(&xdbc.evt_ring);
 	xdbc_free_ring(&xdbc.out_ring);
 	xdbc_free_ring(&xdbc.in_ring);
-	memblock_free(xdbc.table_dma, PAGE_SIZE);
-	memblock_free(xdbc.out_dma, PAGE_SIZE);
+	memblock_phys_free(xdbc.table_dma, PAGE_SIZE);
+	memblock_phys_free(xdbc.out_dma, PAGE_SIZE);
 	writel(0, &xdbc.xdbc_reg->control);
 	early_iounmap(xdbc.xhci_base, xdbc.xhci_length);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index e56a5faac395..4b671cc0a7ea 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -241,7 +241,7 @@ retry:
 	 */
 	rc = xen_swiotlb_fixup(start, nslabs);
 	if (rc) {
-		memblock_free(__pa(start), PAGE_ALIGN(bytes));
+		memblock_phys_free(__pa(start), PAGE_ALIGN(bytes));
 		if (nslabs > 1024 && repeat--) {
 			/* Min is 2MB */
 			nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e25f964fdd60..d32d41709513 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -103,7 +103,7 @@ void memblock_allow_resize(void);
 int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
-int memblock_free(phys_addr_t base, phys_addr_t size);
+int memblock_phys_free(phys_addr_t base, phys_addr_t size);
 int memblock_reserve(phys_addr_t base, phys_addr_t size);
 #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
 int memblock_physmem_add(phys_addr_t base, phys_addr_t size);
diff --git a/init/initramfs.c b/init/initramfs.c
index a842c0544745..1a971f070dd4 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -607,7 +607,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
 	unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
 	unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
 
-	memblock_free(__pa(aligned_start), aligned_end - aligned_start);
+	memblock_phys_free(__pa(aligned_start), aligned_end - aligned_start);
 #endif
 
 	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 430d2f78d540..b9fa173e5e56 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -247,7 +247,7 @@ swiotlb_init(int verbose)
 	return;
 
 fail_free_mem:
-	memblock_free(__pa(tlb), bytes);
+	memblock_phys_free(__pa(tlb), bytes);
 fail:
 	pr_warn("Cannot allocate buffer");
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 045779446a18..a90786b77c1c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	memblock_free(__pa(mask), cpumask_size());
+	memblock_phys_free(__pa(mask), cpumask_size());
 }
 #endif
 
diff --git a/mm/cma.c b/mm/cma.c
index 11152c3fb23c..bc9ca8f3c487 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -378,7 +378,7 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
 	return 0;
 
 free_mem:
-	memblock_free(base, size);
+	memblock_phys_free(base, size);
 err:
 	pr_err("Failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M);
 	return ret;
diff --git a/mm/memblock.c b/mm/memblock.c
index 849060013d3c..52e34abc4abe 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -806,18 +806,18 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 void __init_memblock memblock_free_ptr(void *ptr, size_t size)
 {
 	if (ptr)
-		memblock_free(__pa(ptr), size);
+		memblock_phys_free(__pa(ptr), size);
 }
 
 /**
- * memblock_free - free boot memory block
+ * memblock_phys_free - free boot memory block
  * @base: phys starting address of the  boot memory block
  * @size: size of the boot memory block in bytes
  *
  * Free boot memory block previously allocated by memblock_alloc_xx() API.
  * The freeing memory will not be released to the buddy allocator.
  */
-int __init_memblock memblock_free(phys_addr_t base, phys_addr_t size)
+int __init_memblock memblock_phys_free(phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t end = base + size - 1;
 
@@ -1937,7 +1937,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * memmap array.
 	 */
 	if (pg < pgend)
-		memblock_free(pg, pgend - pg);
+		memblock_phys_free(pg, pgend - pg);
 }
 
 /*
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9fd0be32a281..feffaa9423fe 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -2204,7 +2204,7 @@ static int __ref try_remove_memory(u64 start, u64 size)
 	arch_remove_memory(start, size, altmap);
 
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
-		memblock_free(start, size);
+		memblock_phys_free(start, size);
 		memblock_remove(start, size);
 	}
 
diff --git a/mm/percpu.c b/mm/percpu.c
index f58318cb04c0..d65ddf6f2a35 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
  */
 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 {
-	memblock_free(__pa(ai), ai->__ai_size);
+	memblock_phys_free(__pa(ai), ai->__ai_size);
 }
 
 /**
@@ -3134,7 +3134,7 @@ out_free_areas:
 out_free:
 	pcpu_free_alloc_info(ai);
 	if (areas)
-		memblock_free(__pa(areas), areas_size);
+		memblock_phys_free(__pa(areas), areas_size);
 	return rc;
 }
 #endif /* BUILD_EMBED_FIRST_CHUNK */
@@ -3256,7 +3256,7 @@ enomem:
 		free_fn(page_address(pages[j]), PAGE_SIZE);
 	rc = -ENOMEM;
 out_free_ar:
-	memblock_free(__pa(pages), pages_size);
+	memblock_phys_free(__pa(pages), pages_size);
 	pcpu_free_alloc_info(ai);
 	return rc;
 }
@@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
 {
-	memblock_free(__pa(ptr), size);
+	memblock_phys_free(__pa(ptr), size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/mm/sparse.c b/mm/sparse.c
index 55fea0c2f927..fc3ab8d3b6bc 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata;
 static inline void __meminit sparse_buffer_free(unsigned long size)
 {
 	WARN_ON(!sparsemap_buf || size == 0);
-	memblock_free(__pa(sparsemap_buf), size);
+	memblock_phys_free(__pa(sparsemap_buf), size);
 }
 
 static void __init sparse_buffer_init(unsigned long size, int nid)
-- 
cgit v1.2.3


From 4421cca0a3e4833b3bf0f20de98eb580ab8c7290 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.ibm.com>
Date: Fri, 5 Nov 2021 13:43:22 -0700
Subject: memblock: use memblock_free for freeing virtual pointers

Rename memblock_free_ptr() to memblock_free() and use memblock_free()
when freeing a virtual pointer so that memblock_free() will be a
counterpart of memblock_alloc()

The callers are updated with the below semantic patch and manual
addition of (void *) casting to pointers that are represented by
unsigned long variables.

    @@
    identifier vaddr;
    expression size;
    @@
    (
    - memblock_phys_free(__pa(vaddr), size);
    + memblock_free(vaddr, size);
    |
    - memblock_free_ptr(vaddr, size);
    + memblock_free(vaddr, size);
    )

[sfr@canb.auug.org.au: fixup]
  Link: https://lkml.kernel.org/r/20211018192940.3d1d532f@canb.auug.org.au

Link: https://lkml.kernel.org/r/20210930185031.18648-7-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Juergen Gross <jgross@suse.com>
Cc: Shahab Vahedi <Shahab.Vahedi@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_irongate.c         | 3 +--
 arch/mips/mm/init.c                       | 2 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c         | 4 ++--
 arch/powerpc/kernel/setup-common.c        | 2 +-
 arch/powerpc/kernel/setup_64.c            | 2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c | 2 +-
 arch/powerpc/platforms/pseries/svm.c      | 3 +--
 arch/riscv/kernel/setup.c                 | 5 ++---
 arch/sparc/kernel/smp_64.c                | 2 +-
 arch/um/kernel/mem.c                      | 2 +-
 arch/x86/kernel/setup_percpu.c            | 2 +-
 arch/x86/mm/kasan_init_64.c               | 4 ++--
 arch/x86/mm/numa.c                        | 2 +-
 arch/x86/mm/numa_emulation.c              | 2 +-
 arch/x86/xen/mmu_pv.c                     | 2 +-
 arch/x86/xen/p2m.c                        | 2 +-
 drivers/base/arch_numa.c                  | 4 ++--
 drivers/macintosh/smu.c                   | 2 +-
 drivers/xen/swiotlb-xen.c                 | 2 +-
 include/linux/memblock.h                  | 2 +-
 init/initramfs.c                          | 2 +-
 init/main.c                               | 4 ++--
 kernel/dma/swiotlb.c                      | 2 +-
 kernel/printk/printk.c                    | 4 ++--
 lib/bootconfig.c                          | 2 +-
 lib/cpumask.c                             | 2 +-
 mm/memblock.c                             | 6 +++---
 mm/percpu.c                               | 8 ++++----
 mm/sparse.c                               | 2 +-
 29 files changed, 40 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index ee26dcc49418..6b8ed12936b6 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -233,8 +233,7 @@ albacore_init_arch(void)
 			unsigned long size;
 
 			size = initrd_end - initrd_start;
-			memblock_phys_free(__pa(initrd_start),
-					   PAGE_ALIGN(size));
+			memblock_free((void *)initrd_start, PAGE_ALIGN(size));
 			if (!move_initrd(pci_mem))
 				printk("irongate_init_arch: initrd too big "
 				       "(%ldK)\ndisabling initrd\n",
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 3be1c29084fa..325e1552cbea 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -529,7 +529,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_phys_free(__pa(ptr), size);
+	memblock_free(ptr, size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 42839d6bd486..ba527fb52993 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1095,8 +1095,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 
 	cpufeatures_setup_finished();
 
-	memblock_phys_free(__pa(dt_cpu_features),
-			   sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
+	memblock_free(dt_cpu_features,
+		      sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 5af8993a8e6d..6b1338db8779 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -825,7 +825,7 @@ static void __init smp_setup_pacas(void)
 		set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
 	}
 
-	memblock_phys_free(__pa(cpu_to_phys_id), nr_cpu_ids * sizeof(u32));
+	memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32));
 	cpu_to_phys_id = NULL;
 }
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 75bc294ac40d..1777e992b20b 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -812,7 +812,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	memblock_phys_free(__pa(ptr), size);
+	memblock_free(ptr, size);
 }
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b5a9d343b720..004cd6a96c8a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2981,7 +2981,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	if (!phb->hose) {
 		pr_err("  Can't allocate PCI controller for %pOF\n",
 		       np);
-		memblock_phys_free(__pa(phb), sizeof(struct pnv_phb));
+		memblock_free(phb, sizeof(struct pnv_phb));
 		return;
 	}
 
diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c
index b7c017bb40f7..6332365d2891 100644
--- a/arch/powerpc/platforms/pseries/svm.c
+++ b/arch/powerpc/platforms/pseries/svm.c
@@ -56,8 +56,7 @@ void __init svm_swiotlb_init(void)
 		return;
 
 
-	memblock_phys_free(__pa(vstart),
-			   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+	memblock_free(vstart, PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
 	panic("SVM: Cannot allocate SWIOTLB buffer");
 }
 
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 6ea7c53b82cd..b42bfdc67482 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -230,14 +230,13 @@ static void __init init_resources(void)
 
 	/* Clean-up any unused pre-allocated resources */
 	if (res_idx >= 0)
-		memblock_phys_free(__pa(mem_res),
-				   (res_idx + 1) * sizeof(*mem_res));
+		memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res));
 	return;
 
  error:
 	/* Better an empty resource tree than an inconsistent one */
 	release_child_resources(&iomem_resource);
-	memblock_phys_free(__pa(mem_res), mem_res_sz);
+	memblock_free(mem_res, mem_res_sz);
 }
 
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 2507549538df..b98a7bbe6728 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1567,7 +1567,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	memblock_phys_free(__pa(ptr), size);
+	memblock_free(ptr, size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index d1710ebb44f4..0039771eb01c 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -47,7 +47,7 @@ void __init mem_init(void)
 	 */
 	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
 	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-	memblock_phys_free(__pa(brk_end), uml_reserved - brk_end);
+	memblock_free((void *)brk_end, uml_reserved - brk_end);
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5afd98559193..7b65275544b2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -135,7 +135,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_free_ptr(ptr, size);
+	memblock_free(ptr, size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index ef885370719a..e7b9b464a82f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -49,7 +49,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
 			p = early_alloc(PMD_SIZE, nid, false);
 			if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
 				return;
-			memblock_free_ptr(p, PMD_SIZE);
+			memblock_free(p, PMD_SIZE);
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
@@ -85,7 +85,7 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
 			p = early_alloc(PUD_SIZE, nid, false);
 			if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
 				return;
-			memblock_free_ptr(p, PUD_SIZE);
+			memblock_free(p, PUD_SIZE);
 		}
 
 		p = early_alloc(PAGE_SIZE, nid, true);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 1e9b93b088db..c6b1213086d6 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -355,7 +355,7 @@ void __init numa_reset_distance(void)
 
 	/* numa_distance could be 1LU marking allocation failure, test cnt */
 	if (numa_distance_cnt)
-		memblock_free_ptr(numa_distance, size);
+		memblock_free(numa_distance, size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;	/* enable table creation */
 }
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index e801e30089c4..1a02b791d273 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -517,7 +517,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
 	}
 
 	/* free the copied physical distance table */
-	memblock_free_ptr(phys_dist, phys_size);
+	memblock_free(phys_dist, phys_size);
 	return;
 
 no_emu:
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 676d8d292f8a..173de1e29bda 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1151,7 +1151,7 @@ static void __init xen_pagetable_p2m_free(void)
 		xen_cleanhighmap(addr, addr + size);
 		size = PAGE_ALIGN(xen_start_info->nr_pages *
 				  sizeof(unsigned long));
-		memblock_phys_free(__pa(addr), size);
+		memblock_free((void *)addr, size);
 	} else {
 		xen_cleanmfnmap(addr);
 	}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 141bb9dbd2fb..58db86f7b384 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -197,7 +197,7 @@ static void * __ref alloc_p2m_page(void)
 static void __ref free_p2m_page(void *p)
 {
 	if (unlikely(!slab_is_available())) {
-		memblock_free_ptr(p, PAGE_SIZE);
+		memblock_free(p, PAGE_SIZE);
 		return;
 	}
 
diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
index 712edef03929..bc1876915457 100644
--- a/drivers/base/arch_numa.c
+++ b/drivers/base/arch_numa.c
@@ -166,7 +166,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	memblock_phys_free(__pa(ptr), size);
+	memblock_free(ptr, size);
 }
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
@@ -326,7 +326,7 @@ void __init numa_free_distance(void)
 	size = numa_distance_cnt * numa_distance_cnt *
 		sizeof(numa_distance[0]);
 
-	memblock_free_ptr(numa_distance, size);
+	memblock_free(numa_distance, size);
 	numa_distance_cnt = 0;
 	numa_distance = NULL;
 }
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index fe63d5ee201b..f62152111236 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -570,7 +570,7 @@ fail_msg_node:
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
-	memblock_free_ptr(smu, sizeof(struct smu_device));
+	memblock_free(smu, sizeof(struct smu_device));
 	smu = NULL;
 fail_np:
 	of_node_put(np);
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 4b671cc0a7ea..f083194e2634 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -241,7 +241,7 @@ retry:
 	 */
 	rc = xen_swiotlb_fixup(start, nslabs);
 	if (rc) {
-		memblock_phys_free(__pa(start), PAGE_ALIGN(bytes));
+		memblock_free(start, PAGE_ALIGN(bytes));
 		if (nslabs > 1024 && repeat--) {
 			/* Min is 2MB */
 			nslabs = max(1024UL, ALIGN(nslabs >> 1, IO_TLB_SEGSIZE));
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d32d41709513..484650681bee 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -118,7 +118,7 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 
 void memblock_free_all(void);
-void memblock_free_ptr(void *ptr, size_t size);
+void memblock_free(void *ptr, size_t size);
 void reset_node_managed_pages(pg_data_t *pgdat);
 void reset_all_zones_managed_pages(void);
 
diff --git a/init/initramfs.c b/init/initramfs.c
index 1a971f070dd4..2f3d96dc3db6 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -607,7 +607,7 @@ void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
 	unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE);
 	unsigned long aligned_end = ALIGN(end, PAGE_SIZE);
 
-	memblock_phys_free(__pa(aligned_start), aligned_end - aligned_start);
+	memblock_free((void *)aligned_start, aligned_end - aligned_start);
 #endif
 
 	free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
diff --git a/init/main.c b/init/main.c
index 767ee2672176..f0001af8ebb9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -382,7 +382,7 @@ static char * __init xbc_make_cmdline(const char *key)
 	ret = xbc_snprint_cmdline(new_cmdline, len + 1, root);
 	if (ret < 0 || ret > len) {
 		pr_err("Failed to print extra kernel cmdline.\n");
-		memblock_free_ptr(new_cmdline, len + 1);
+		memblock_free(new_cmdline, len + 1);
 		return NULL;
 	}
 
@@ -925,7 +925,7 @@ static void __init print_unknown_bootoptions(void)
 		end += sprintf(end, " %s", *p);
 
 	pr_notice("Unknown command line parameters:%s\n", unknown_options);
-	memblock_free_ptr(unknown_options, len);
+	memblock_free(unknown_options, len);
 }
 
 asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b9fa173e5e56..02656d7ccbfd 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -247,7 +247,7 @@ swiotlb_init(int verbose)
 	return;
 
 fail_free_mem:
-	memblock_phys_free(__pa(tlb), bytes);
+	memblock_free(tlb, bytes);
 fail:
 	pr_warn("Cannot allocate buffer");
 }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a8d0a58deebc..2cae1bfa6be7 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1166,9 +1166,9 @@ void __init setup_log_buf(int early)
 	return;
 
 err_free_descs:
-	memblock_free_ptr(new_descs, new_descs_size);
+	memblock_free(new_descs, new_descs_size);
 err_free_log_buf:
-	memblock_free_ptr(new_log_buf, new_log_buf_len);
+	memblock_free(new_log_buf, new_log_buf_len);
 }
 
 static bool __read_mostly ignore_loglevel;
diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 5ae248b29373..547558d80e64 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -792,7 +792,7 @@ void __init xbc_destroy_all(void)
 	xbc_data = NULL;
 	xbc_data_size = 0;
 	xbc_node_num = 0;
-	memblock_free_ptr(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
+	memblock_free(xbc_nodes, sizeof(struct xbc_node) * XBC_NODE_MAX);
 	xbc_nodes = NULL;
 	brace_index = 0;
 }
diff --git a/lib/cpumask.c b/lib/cpumask.c
index a90786b77c1c..a971a82d2f43 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -188,7 +188,7 @@ EXPORT_SYMBOL(free_cpumask_var);
  */
 void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 {
-	memblock_phys_free(__pa(mask), cpumask_size());
+	memblock_free(mask, cpumask_size());
 }
 #endif
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 52e34abc4abe..fb0c7f48e627 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -472,7 +472,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
 		kfree(old_array);
 	else if (old_array != memblock_memory_init_regions &&
 		 old_array != memblock_reserved_init_regions)
-		memblock_free_ptr(old_array, old_alloc_size);
+		memblock_free(old_array, old_alloc_size);
 
 	/*
 	 * Reserve the new array if that comes from the memblock.  Otherwise, we
@@ -796,14 +796,14 @@ int __init_memblock memblock_remove(phys_addr_t base, phys_addr_t size)
 }
 
 /**
- * memblock_free_ptr - free boot memory allocation
+ * memblock_free - free boot memory allocation
  * @ptr: starting address of the  boot memory allocation
  * @size: size of the boot memory block in bytes
  *
  * Free boot memory block previously allocated by memblock_alloc_xx() API.
  * The freeing memory will not be released to the buddy allocator.
  */
-void __init_memblock memblock_free_ptr(void *ptr, size_t size)
+void __init_memblock memblock_free(void *ptr, size_t size)
 {
 	if (ptr)
 		memblock_phys_free(__pa(ptr), size);
diff --git a/mm/percpu.c b/mm/percpu.c
index d65ddf6f2a35..f5b2c2ea5a54 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -2472,7 +2472,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
  */
 void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
 {
-	memblock_phys_free(__pa(ai), ai->__ai_size);
+	memblock_free(ai, ai->__ai_size);
 }
 
 /**
@@ -3134,7 +3134,7 @@ out_free_areas:
 out_free:
 	pcpu_free_alloc_info(ai);
 	if (areas)
-		memblock_phys_free(__pa(areas), areas_size);
+		memblock_free(areas, areas_size);
 	return rc;
 }
 #endif /* BUILD_EMBED_FIRST_CHUNK */
@@ -3256,7 +3256,7 @@ enomem:
 		free_fn(page_address(pages[j]), PAGE_SIZE);
 	rc = -ENOMEM;
 out_free_ar:
-	memblock_phys_free(__pa(pages), pages_size);
+	memblock_free(pages, pages_size);
 	pcpu_free_alloc_info(ai);
 	return rc;
 }
@@ -3286,7 +3286,7 @@ static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 
 static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
 {
-	memblock_phys_free(__pa(ptr), size);
+	memblock_free(ptr, size);
 }
 
 void __init setup_per_cpu_areas(void)
diff --git a/mm/sparse.c b/mm/sparse.c
index fc3ab8d3b6bc..e5c84b0cf0c9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -451,7 +451,7 @@ static void *sparsemap_buf_end __meminitdata;
 static inline void __meminit sparse_buffer_free(unsigned long size)
 {
 	WARN_ON(!sparsemap_buf || size == 0);
-	memblock_phys_free(__pa(sparsemap_buf), size);
+	memblock_free(sparsemap_buf, size);
 }
 
 static void __init sparse_buffer_init(unsigned long size, int nid)
-- 
cgit v1.2.3


From b5389086ad7be0453c55e0069a89856d1fbdf605 Mon Sep 17 00:00:00 2001
From: Zhenguo Yao <yaozhenguo1@gmail.com>
Date: Fri, 5 Nov 2021 13:43:28 -0700
Subject: hugetlbfs: extend the definition of hugepages parameter to support
 node allocation

We can specify the number of hugepages to allocate at boot.  But the
hugepages is balanced in all nodes at present.  In some scenarios, we
only need hugepages in one node.  For example: DPDK needs hugepages
which are in the same node as NIC.

If DPDK needs four hugepages of 1G size in node1 and system has 16 numa
nodes we must reserve 64 hugepages on the kernel cmdline.  But only four
hugepages are used.  The others should be free after boot.  If the
system memory is low(for example: 64G), it will be an impossible task.

So extend the hugepages parameter to support specifying hugepages on a
specific node.  For example add following parameter:

  hugepagesz=1G hugepages=0:1,1:3

It will allocate 1 hugepage in node0 and 3 hugepages in node1.

Link: https://lkml.kernel.org/r/20211005054729.86457-1-yaozhenguo1@gmail.com
Signed-off-by: Zhenguo Yao <yaozhenguo1@gmail.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Zhenguo Yao <yaozhenguo1@gmail.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt |   8 +-
 Documentation/admin-guide/mm/hugetlbpage.rst    |  12 +-
 arch/powerpc/mm/hugetlbpage.c                   |   9 +-
 include/linux/hugetlb.h                         |   6 +-
 mm/hugetlb.c                                    | 153 ++++++++++++++++++++----
 5 files changed, 155 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e7f7904edf20..02eeda2a11ad 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1601,9 +1601,11 @@
 			the number of pages of hugepagesz to be allocated.
 			If this is the first HugeTLB parameter on the command
 			line, it specifies the number of pages to allocate for
-			the default huge page size.  See also
-			Documentation/admin-guide/mm/hugetlbpage.rst.
-			Format: <integer>
+			the default huge page size. If using node format, the
+			number of pages to allocate per-node can be specified.
+			See also Documentation/admin-guide/mm/hugetlbpage.rst.
+			Format: <integer> or (node format)
+				<node>:<integer>[,<node>:<integer>]
 
 	hugepagesz=
 			[HW] The size of the HugeTLB pages.  This is used in
diff --git a/Documentation/admin-guide/mm/hugetlbpage.rst b/Documentation/admin-guide/mm/hugetlbpage.rst
index bb90de3885d1..0166f9de3428 100644
--- a/Documentation/admin-guide/mm/hugetlbpage.rst
+++ b/Documentation/admin-guide/mm/hugetlbpage.rst
@@ -128,7 +128,9 @@ hugepages
 	implicitly specifies the number of huge pages of default size to
 	allocate.  If the number of huge pages of default size is implicitly
 	specified, it can not be overwritten by a hugepagesz,hugepages
-	parameter pair for the default size.
+	parameter pair for the default size.  This parameter also has a
+	node format.  The node format specifies the number of huge pages
+	to allocate on specific nodes.
 
 	For example, on an architecture with 2M default huge page size::
 
@@ -138,6 +140,14 @@ hugepages
 	indicating that the hugepages=512 parameter is ignored.  If a hugepages
 	parameter is preceded by an invalid hugepagesz parameter, it will
 	be ignored.
+
+	Node format example::
+
+		hugepagesz=2M hugepages=0:1,1:2
+
+	It will allocate 1 2M hugepage on node0 and 2 2M hugepages on node1.
+	If the node number is invalid,  the parameter will be ignored.
+
 default_hugepagesz
 	Specify the default huge page size.  This parameter can
 	only be specified once on the command line.  default_hugepagesz can
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9a75ba078e1b..82d8b368ca6d 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -229,17 +229,22 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
 	m->hstate = hstate;
 	return 1;
 }
+
+bool __init hugetlb_node_alloc_supported(void)
+{
+	return false;
+}
 #endif
 
 
-int __init alloc_bootmem_huge_page(struct hstate *h)
+int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
 		return pseries_alloc_bootmem_huge_page(h);
 #endif
-	return __alloc_bootmem_huge_page(h);
+	return __alloc_bootmem_huge_page(h, nid);
 }
 
 #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c0a20781b28e..44c2ab0dfa59 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -615,6 +615,7 @@ struct hstate {
 	unsigned long nr_overcommit_huge_pages;
 	struct list_head hugepage_activelist;
 	struct list_head hugepage_freelists[MAX_NUMNODES];
+	unsigned int max_huge_pages_node[MAX_NUMNODES];
 	unsigned int nr_huge_pages_node[MAX_NUMNODES];
 	unsigned int free_huge_pages_node[MAX_NUMNODES];
 	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
@@ -647,8 +648,9 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address, struct page *page);
 
 /* arch callback */
-int __init __alloc_bootmem_huge_page(struct hstate *h);
-int __init alloc_bootmem_huge_page(struct hstate *h);
+int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
+int __init alloc_bootmem_huge_page(struct hstate *h, int nid);
+bool __init hugetlb_node_alloc_supported(void);
 
 void __init hugetlb_add_hstate(unsigned order);
 bool __init arch_hugetlb_valid_size(unsigned long size);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5eab627a170b..24543dab9c4b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -77,6 +77,7 @@ static struct hstate * __initdata parsed_hstate;
 static unsigned long __initdata default_hstate_max_huge_pages;
 static bool __initdata parsed_valid_hugepagesz = true;
 static bool __initdata parsed_default_hugepagesz;
+static unsigned int default_hugepages_in_node[MAX_NUMNODES] __initdata;
 
 /*
  * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages,
@@ -2963,33 +2964,39 @@ out_subpool_put:
 	return ERR_PTR(-ENOSPC);
 }
 
-int alloc_bootmem_huge_page(struct hstate *h)
+int alloc_bootmem_huge_page(struct hstate *h, int nid)
 	__attribute__ ((weak, alias("__alloc_bootmem_huge_page")));
-int __alloc_bootmem_huge_page(struct hstate *h)
+int __alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
-	struct huge_bootmem_page *m;
+	struct huge_bootmem_page *m = NULL; /* initialize for clang */
 	int nr_nodes, node;
 
+	if (nid >= nr_online_nodes)
+		return 0;
+	/* do node specific alloc */
+	if (nid != NUMA_NO_NODE) {
+		m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h),
+				0, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+		if (!m)
+			return 0;
+		goto found;
+	}
+	/* allocate from next node when distributing huge pages */
 	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
-		void *addr;
-
-		addr = memblock_alloc_try_nid_raw(
+		m = memblock_alloc_try_nid_raw(
 				huge_page_size(h), huge_page_size(h),
 				0, MEMBLOCK_ALLOC_ACCESSIBLE, node);
-		if (addr) {
-			/*
-			 * Use the beginning of the huge page to store the
-			 * huge_bootmem_page struct (until gather_bootmem
-			 * puts them into the mem_map).
-			 */
-			m = addr;
-			goto found;
-		}
+		/*
+		 * Use the beginning of the huge page to store the
+		 * huge_bootmem_page struct (until gather_bootmem
+		 * puts them into the mem_map).
+		 */
+		if (!m)
+			return 0;
+		goto found;
 	}
-	return 0;
 
 found:
-	BUG_ON(!IS_ALIGNED(virt_to_phys(m), huge_page_size(h)));
 	/* Put them into a private list first because mem_map is not up yet */
 	INIT_LIST_HEAD(&m->list);
 	list_add(&m->list, &huge_boot_pages);
@@ -3029,12 +3036,61 @@ static void __init gather_bootmem_prealloc(void)
 		cond_resched();
 	}
 }
+static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
+{
+	unsigned long i;
+	char buf[32];
+
+	for (i = 0; i < h->max_huge_pages_node[nid]; ++i) {
+		if (hstate_is_gigantic(h)) {
+			if (!alloc_bootmem_huge_page(h, nid))
+				break;
+		} else {
+			struct page *page;
+			gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
+
+			page = alloc_fresh_huge_page(h, gfp_mask, nid,
+					&node_states[N_MEMORY], NULL);
+			if (!page)
+				break;
+			put_page(page); /* free it into the hugepage allocator */
+		}
+		cond_resched();
+	}
+	if (i == h->max_huge_pages_node[nid])
+		return;
+
+	string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32);
+	pr_warn("HugeTLB: allocating %u of page size %s failed node%d.  Only allocated %lu hugepages.\n",
+		h->max_huge_pages_node[nid], buf, nid, i);
+	h->max_huge_pages -= (h->max_huge_pages_node[nid] - i);
+	h->max_huge_pages_node[nid] = i;
+}
 
 static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 {
 	unsigned long i;
 	nodemask_t *node_alloc_noretry;
+	bool node_specific_alloc = false;
 
+	/* skip gigantic hugepages allocation if hugetlb_cma enabled */
+	if (hstate_is_gigantic(h) && hugetlb_cma_size) {
+		pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
+		return;
+	}
+
+	/* do node specific alloc */
+	for (i = 0; i < nr_online_nodes; i++) {
+		if (h->max_huge_pages_node[i] > 0) {
+			hugetlb_hstate_alloc_pages_onenode(h, i);
+			node_specific_alloc = true;
+		}
+	}
+
+	if (node_specific_alloc)
+		return;
+
+	/* below will do all node balanced alloc */
 	if (!hstate_is_gigantic(h)) {
 		/*
 		 * Bit mask controlling how hard we retry per-node allocations.
@@ -3055,11 +3111,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 
 	for (i = 0; i < h->max_huge_pages; ++i) {
 		if (hstate_is_gigantic(h)) {
-			if (hugetlb_cma_size) {
-				pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
-				goto free;
-			}
-			if (!alloc_bootmem_huge_page(h))
+			if (!alloc_bootmem_huge_page(h, NUMA_NO_NODE))
 				break;
 		} else if (!alloc_pool_huge_page(h,
 					 &node_states[N_MEMORY],
@@ -3075,7 +3127,6 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
 			h->max_huge_pages, buf, i);
 		h->max_huge_pages = i;
 	}
-free:
 	kfree(node_alloc_noretry);
 }
 
@@ -3990,6 +4041,10 @@ static int __init hugetlb_init(void)
 			}
 			default_hstate.max_huge_pages =
 				default_hstate_max_huge_pages;
+
+			for (i = 0; i < nr_online_nodes; i++)
+				default_hstate.max_huge_pages_node[i] =
+					default_hugepages_in_node[i];
 		}
 	}
 
@@ -4050,6 +4105,10 @@ void __init hugetlb_add_hstate(unsigned int order)
 	parsed_hstate = h;
 }
 
+bool __init __weak hugetlb_node_alloc_supported(void)
+{
+	return true;
+}
 /*
  * hugepages command line processing
  * hugepages normally follows a valid hugepagsz or default_hugepagsz
@@ -4061,6 +4120,10 @@ static int __init hugepages_setup(char *s)
 {
 	unsigned long *mhp;
 	static unsigned long *last_mhp;
+	int node = NUMA_NO_NODE;
+	int count;
+	unsigned long tmp;
+	char *p = s;
 
 	if (!parsed_valid_hugepagesz) {
 		pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s);
@@ -4084,8 +4147,40 @@ static int __init hugepages_setup(char *s)
 		return 0;
 	}
 
-	if (sscanf(s, "%lu", mhp) <= 0)
-		*mhp = 0;
+	while (*p) {
+		count = 0;
+		if (sscanf(p, "%lu%n", &tmp, &count) != 1)
+			goto invalid;
+		/* Parameter is node format */
+		if (p[count] == ':') {
+			if (!hugetlb_node_alloc_supported()) {
+				pr_warn("HugeTLB: architecture can't support node specific alloc, ignoring!\n");
+				return 0;
+			}
+			node = tmp;
+			p += count + 1;
+			if (node < 0 || node >= nr_online_nodes)
+				goto invalid;
+			/* Parse hugepages */
+			if (sscanf(p, "%lu%n", &tmp, &count) != 1)
+				goto invalid;
+			if (!hugetlb_max_hstate)
+				default_hugepages_in_node[node] = tmp;
+			else
+				parsed_hstate->max_huge_pages_node[node] = tmp;
+			*mhp += tmp;
+			/* Go to parse next node*/
+			if (p[count] == ',')
+				p += count + 1;
+			else
+				break;
+		} else {
+			if (p != s)
+				goto invalid;
+			*mhp = tmp;
+			break;
+		}
+	}
 
 	/*
 	 * Global state is always initialized later in hugetlb_init.
@@ -4098,6 +4193,10 @@ static int __init hugepages_setup(char *s)
 	last_mhp = mhp;
 
 	return 1;
+
+invalid:
+	pr_warn("HugeTLB: Invalid hugepages parameter %s\n", p);
+	return 0;
 }
 __setup("hugepages=", hugepages_setup);
 
@@ -4159,6 +4258,7 @@ __setup("hugepagesz=", hugepagesz_setup);
 static int __init default_hugepagesz_setup(char *s)
 {
 	unsigned long size;
+	int i;
 
 	parsed_valid_hugepagesz = false;
 	if (parsed_default_hugepagesz) {
@@ -4187,6 +4287,9 @@ static int __init default_hugepagesz_setup(char *s)
 	 */
 	if (default_hstate_max_huge_pages) {
 		default_hstate.max_huge_pages = default_hstate_max_huge_pages;
+		for (i = 0; i < nr_online_nodes; i++)
+			default_hstate.max_huge_pages_node[i] =
+				default_hugepages_in_node[i];
 		if (hstate_is_gigantic(&default_hstate))
 			hugetlb_hstate_alloc_pages(&default_hstate);
 		default_hstate_max_huge_pages = 0;
-- 
cgit v1.2.3


From 8eb42beac8d369f5443addd469879d152422a28d Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 5 Nov 2021 13:43:32 -0700
Subject: mm/migrate: de-duplicate migrate_reason strings

In order to remove the need to manually keep three different files in
synch, provide a common definition of the mapping between enum
migrate_reason, and the associated strings for each enum item.

1. Use the tracing system's mapping of enums to strings, by redefining
   and reusing the MIGRATE_REASON and supporting macros, and using that
   to populate the string array in mm/debug.c.

2. Move enum migrate_reason to migrate_mode.h. This is not strictly
   necessary for this patch, but migrate mode and migrate reason go
   together, so this will slightly clarify things.

Link: https://lkml.kernel.org/r/20210922041755.141817-2-jhubbard@nvidia.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Weizhao Ouyang <o451686892@gmail.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h      | 19 +------------------
 include/linux/migrate_mode.h | 13 +++++++++++++
 mm/debug.c                   | 20 +++++++++++---------
 3 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index c8077e936691..3d154fe03c96 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -19,24 +19,7 @@ struct migration_target_control;
  */
 #define MIGRATEPAGE_SUCCESS		0
 
-/*
- * Keep sync with:
- * - macro MIGRATE_REASON in include/trace/events/migrate.h
- * - migrate_reason_names[MR_TYPES] in mm/debug.c
- */
-enum migrate_reason {
-	MR_COMPACTION,
-	MR_MEMORY_FAILURE,
-	MR_MEMORY_HOTPLUG,
-	MR_SYSCALL,		/* also applies to cpusets */
-	MR_MEMPOLICY_MBIND,
-	MR_NUMA_MISPLACED,
-	MR_CONTIG_RANGE,
-	MR_LONGTERM_PIN,
-	MR_DEMOTION,
-	MR_TYPES
-};
-
+/* Defined in mm/debug.c: */
 extern const char *migrate_reason_names[MR_TYPES];
 
 #ifdef CONFIG_MIGRATION
diff --git a/include/linux/migrate_mode.h b/include/linux/migrate_mode.h
index 883c99249033..f37cc03f9369 100644
--- a/include/linux/migrate_mode.h
+++ b/include/linux/migrate_mode.h
@@ -19,4 +19,17 @@ enum migrate_mode {
 	MIGRATE_SYNC_NO_COPY,
 };
 
+enum migrate_reason {
+	MR_COMPACTION,
+	MR_MEMORY_FAILURE,
+	MR_MEMORY_HOTPLUG,
+	MR_SYSCALL,		/* also applies to cpusets */
+	MR_MEMPOLICY_MBIND,
+	MR_NUMA_MISPLACED,
+	MR_CONTIG_RANGE,
+	MR_LONGTERM_PIN,
+	MR_DEMOTION,
+	MR_TYPES
+};
+
 #endif		/* MIGRATE_MODE_H_INCLUDED */
diff --git a/mm/debug.c b/mm/debug.c
index fae0f81ad831..4333b6784a20 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -16,17 +16,19 @@
 #include <linux/ctype.h>
 
 #include "internal.h"
+#include <trace/events/migrate.h>
+
+/*
+ * Define EM() and EMe() so that MIGRATE_REASON from trace/events/migrate.h can
+ * be used to populate migrate_reason_names[].
+ */
+#undef EM
+#undef EMe
+#define EM(a, b)	b,
+#define EMe(a, b)	b
 
 const char *migrate_reason_names[MR_TYPES] = {
-	"compaction",
-	"memory_failure",
-	"memory_hotplug",
-	"syscall_or_cpuset",
-	"mempolicy_mbind",
-	"numa_misplaced",
-	"contig_range",
-	"longterm_pin",
-	"demotion",
+	MIGRATE_REASON
 };
 
 const struct trace_print_flags pageflag_names[] = {
-- 
cgit v1.2.3


From 20f9ba4f995247bb79e243741b8fdddbd76dd923 Mon Sep 17 00:00:00 2001
From: Yang Shi <shy828301@gmail.com>
Date: Fri, 5 Nov 2021 13:43:35 -0700
Subject: mm: migrate: make demotion knob depend on migration

The memory demotion needs to call migrate_pages() to do the jobs.  And
it is controlled by a knob, however, the knob doesn't depend on
CONFIG_MIGRATION.  The knob could be truned on even though MIGRATION is
disabled, this will not cause any crash since migrate_pages() would just
return -ENOSYS.  But it is definitely not optimal to go through demotion
path then retry regular swap every time.

And it doesn't make too much sense to have the knob visible to the users
when !MIGRATION.  Move the related code from mempolicy.[h|c] to
migrate.[h|c].

Link: https://lkml.kernel.org/r/20211015005559.246709-1-shy828301@gmail.com
Signed-off-by: Yang Shi <shy828301@gmail.com>
Acked-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  4 ----
 include/linux/migrate.h   |  4 ++++
 mm/mempolicy.c            | 61 -----------------------------------------------
 mm/migrate.c              | 61 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 097bbbb37493..3c7595e81150 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -183,8 +183,6 @@ extern bool vma_migratable(struct vm_area_struct *vma);
 extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
 extern void mpol_put_task_policy(struct task_struct *);
 
-extern bool numa_demotion_enabled;
-
 static inline bool mpol_is_preferred_many(struct mempolicy *pol)
 {
 	return  (pol->mode == MPOL_PREFERRED_MANY);
@@ -300,8 +298,6 @@ static inline nodemask_t *policy_nodemask_current(gfp_t gfp)
 	return NULL;
 }
 
-#define numa_demotion_enabled	false
-
 static inline bool mpol_is_preferred_many(struct mempolicy *pol)
 {
 	return  false;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 3d154fe03c96..2d8130e05dc0 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
 extern int migrate_page_move_mapping(struct address_space *mapping,
 		struct page *newpage, struct page *page, int extra_count);
+
+extern bool numa_demotion_enabled;
 #else
 
 static inline void putback_movable_pages(struct list_head *l) {}
@@ -65,6 +67,8 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 {
 	return -ENOSYS;
 }
+
+#define numa_demotion_enabled	false
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_COMPACTION
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ce722333d9f6..f1080e0a566a 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -3057,64 +3057,3 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
 		p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
 			       nodemask_pr_args(&nodes));
 }
-
-bool numa_demotion_enabled = false;
-
-#ifdef CONFIG_SYSFS
-static ssize_t numa_demotion_enabled_show(struct kobject *kobj,
-					  struct kobj_attribute *attr, char *buf)
-{
-	return sysfs_emit(buf, "%s\n",
-			  numa_demotion_enabled? "true" : "false");
-}
-
-static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
-					   struct kobj_attribute *attr,
-					   const char *buf, size_t count)
-{
-	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
-		numa_demotion_enabled = true;
-	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
-		numa_demotion_enabled = false;
-	else
-		return -EINVAL;
-
-	return count;
-}
-
-static struct kobj_attribute numa_demotion_enabled_attr =
-	__ATTR(demotion_enabled, 0644, numa_demotion_enabled_show,
-	       numa_demotion_enabled_store);
-
-static struct attribute *numa_attrs[] = {
-	&numa_demotion_enabled_attr.attr,
-	NULL,
-};
-
-static const struct attribute_group numa_attr_group = {
-	.attrs = numa_attrs,
-};
-
-static int __init numa_init_sysfs(void)
-{
-	int err;
-	struct kobject *numa_kobj;
-
-	numa_kobj = kobject_create_and_add("numa", mm_kobj);
-	if (!numa_kobj) {
-		pr_err("failed to create numa kobject\n");
-		return -ENOMEM;
-	}
-	err = sysfs_create_group(numa_kobj, &numa_attr_group);
-	if (err) {
-		pr_err("failed to register numa group\n");
-		goto delete_obj;
-	}
-	return 0;
-
-delete_obj:
-	kobject_put(numa_kobj);
-	return err;
-}
-subsys_initcall(numa_init_sysfs);
-#endif
diff --git a/mm/migrate.c b/mm/migrate.c
index 1852d787e6ab..51e310a94516 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -3306,3 +3306,64 @@ static int __init migrate_on_reclaim_init(void)
 }
 late_initcall(migrate_on_reclaim_init);
 #endif /* CONFIG_HOTPLUG_CPU */
+
+bool numa_demotion_enabled = false;
+
+#ifdef CONFIG_SYSFS
+static ssize_t numa_demotion_enabled_show(struct kobject *kobj,
+					  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n",
+			  numa_demotion_enabled ? "true" : "false");
+}
+
+static ssize_t numa_demotion_enabled_store(struct kobject *kobj,
+					   struct kobj_attribute *attr,
+					   const char *buf, size_t count)
+{
+	if (!strncmp(buf, "true", 4) || !strncmp(buf, "1", 1))
+		numa_demotion_enabled = true;
+	else if (!strncmp(buf, "false", 5) || !strncmp(buf, "0", 1))
+		numa_demotion_enabled = false;
+	else
+		return -EINVAL;
+
+	return count;
+}
+
+static struct kobj_attribute numa_demotion_enabled_attr =
+	__ATTR(demotion_enabled, 0644, numa_demotion_enabled_show,
+	       numa_demotion_enabled_store);
+
+static struct attribute *numa_attrs[] = {
+	&numa_demotion_enabled_attr.attr,
+	NULL,
+};
+
+static const struct attribute_group numa_attr_group = {
+	.attrs = numa_attrs,
+};
+
+static int __init numa_init_sysfs(void)
+{
+	int err;
+	struct kobject *numa_kobj;
+
+	numa_kobj = kobject_create_and_add("numa", mm_kobj);
+	if (!numa_kobj) {
+		pr_err("failed to create numa kobject\n");
+		return -ENOMEM;
+	}
+	err = sysfs_create_group(numa_kobj, &numa_attr_group);
+	if (err) {
+		pr_err("failed to register numa group\n");
+		goto delete_obj;
+	}
+	return 0;
+
+delete_obj:
+	kobject_put(numa_kobj);
+	return err;
+}
+subsys_initcall(numa_init_sysfs);
+#endif
-- 
cgit v1.2.3


From 50f9481ed9fb8a2d2a06a155634c7f9eeff9fa61 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:24 -0700
Subject: mm/memory_hotplug: remove CONFIG_MEMORY_HOTPLUG_SPARSE

CONFIG_MEMORY_HOTPLUG depends on CONFIG_SPARSEMEM, so there is no need for
CONFIG_MEMORY_HOTPLUG_SPARSE anymore; adjust all instances to use
CONFIG_MEMORY_HOTPLUG and remove CONFIG_MEMORY_HOTPLUG_SPARSE.

Link: https://lkml.kernel.org/r/20210929143600.49379-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>	[kselftest]
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Oscar Salvador <osalvador@suse.de>
Cc: Alex Shi <alexs@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/include/asm/machdep.h            |  2 +-
 arch/powerpc/kernel/setup_64.c                |  2 +-
 arch/powerpc/platforms/powernv/setup.c        |  4 ++--
 arch/powerpc/platforms/pseries/setup.c        |  2 +-
 drivers/base/Makefile                         |  2 +-
 drivers/base/node.c                           |  9 ++++-----
 drivers/virtio/Kconfig                        |  2 +-
 include/linux/memory.h                        | 24 ++++++++++--------------
 include/linux/node.h                          |  4 ++--
 lib/Kconfig.debug                             |  2 +-
 mm/Kconfig                                    |  4 ----
 mm/memory_hotplug.c                           |  2 --
 tools/testing/selftests/memory-hotplug/config |  1 -
 13 files changed, 24 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 764f2732a821..d8a2ca007082 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -32,7 +32,7 @@ struct machdep_calls {
 	void		(*iommu_save)(void);
 	void		(*iommu_restore)(void);
 #endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 	unsigned long	(*memory_block_size)(void);
 #endif
 #endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1777e992b20b..6052f5d5ded3 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -912,7 +912,7 @@ void __init setup_per_cpu_areas(void)
 }
 #endif
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 unsigned long memory_block_size_bytes(void)
 {
 	if (ppc_md.memory_block_size)
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a8db3f153063..ad56a54ac9c5 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -440,7 +440,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 }
 #endif /* CONFIG_KEXEC_CORE */
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 static unsigned long pnv_memory_block_size(void)
 {
 	/*
@@ -553,7 +553,7 @@ define_machine(powernv) {
 #ifdef CONFIG_KEXEC_CORE
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
 #endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 	.memory_block_size	= pnv_memory_block_size,
 #endif
 };
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f79126f16258..d29f6f1f7f37 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -1089,7 +1089,7 @@ define_machine(pseries) {
 	.machine_kexec          = pSeries_machine_kexec,
 	.kexec_cpu_down         = pseries_kexec_cpu_down,
 #endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 	.memory_block_size	= pseries_memory_block_size,
 #endif
 };
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index ef8e44a7d288..02f7f1358e86 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -13,7 +13,7 @@ obj-y			+= power/
 obj-$(CONFIG_ISA_BUS_API)	+= isa.o
 obj-y				+= firmware_loader/
 obj-$(CONFIG_NUMA)	+= node.o
-obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += memory.o
 ifeq ($(CONFIG_SYSFS),y)
 obj-$(CONFIG_MODULES)	+= module.o
 endif
diff --git a/drivers/base/node.c b/drivers/base/node.c
index c56d34f8158f..b5a4ba18f9f9 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -629,7 +629,7 @@ static void node_device_release(struct device *dev)
 {
 	struct node *node = to_node(dev);
 
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
 	/*
 	 * We schedule the work only when a memory section is
 	 * onlined/offlined on this node. When we come here,
@@ -782,7 +782,7 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
 	return 0;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
 static int __ref get_nid_for_pfn(unsigned long pfn)
 {
 #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@ -958,10 +958,9 @@ static int node_memory_callback(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 #endif	/* CONFIG_HUGETLBFS */
-#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
+#endif /* CONFIG_MEMORY_HOTPLUG */
 
-#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
-    !defined(CONFIG_HUGETLBFS)
+#if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS)
 static inline int node_memory_callback(struct notifier_block *self,
 				unsigned long action, void *arg)
 {
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index ce1b3f6ec325..3654def9915c 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -98,7 +98,7 @@ config VIRTIO_MEM
 	default m
 	depends on X86_64
 	depends on VIRTIO
-	depends on MEMORY_HOTPLUG_SPARSE
+	depends on MEMORY_HOTPLUG
 	depends on MEMORY_HOTREMOVE
 	depends on CONTIG_ALLOC
 	help
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 053a530c7bdd..0328ec039c38 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -110,7 +110,7 @@ struct mem_section;
 #define SLAB_CALLBACK_PRI       1
 #define IPC_CALLBACK_PRI        10
 
-#ifndef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifndef CONFIG_MEMORY_HOTPLUG
 static inline void memory_dev_init(void)
 {
 	return;
@@ -126,7 +126,14 @@ static inline int memory_notify(unsigned long val, void *v)
 {
 	return 0;
 }
-#else
+static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
+{
+	return 0;
+}
+/* These aren't inline functions due to a GCC bug. */
+#define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
+#define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
+#else /* CONFIG_MEMORY_HOTPLUG */
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
 int create_memory_block_devices(unsigned long start, unsigned long size,
@@ -148,9 +155,6 @@ struct memory_group *memory_group_find_by_id(int mgid);
 typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
 int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 			       struct memory_group *excluded, void *arg);
-#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
-
-#ifdef CONFIG_MEMORY_HOTPLUG
 #define hotplug_memory_notifier(fn, pri) ({		\
 	static __meminitdata struct notifier_block fn##_mem_nb =\
 		{ .notifier_call = fn, .priority = pri };\
@@ -158,15 +162,7 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 })
 #define register_hotmemory_notifier(nb)		register_memory_notifier(nb)
 #define unregister_hotmemory_notifier(nb) 	unregister_memory_notifier(nb)
-#else
-static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
-{
-	return 0;
-}
-/* These aren't inline functions due to a GCC bug. */
-#define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
-#define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
-#endif
+#endif	/* CONFIG_MEMORY_HOTPLUG */
 
 /*
  * Kernel text modification mutex, used for code patching. Users of this lock
diff --git a/include/linux/node.h b/include/linux/node.h
index 8e5a29897936..bb21fd631b16 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -85,7 +85,7 @@ struct node {
 	struct device	dev;
 	struct list_head access_list;
 
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS)
 	struct work_struct	node_work;
 #endif
 #ifdef CONFIG_HMEM_REPORTING
@@ -98,7 +98,7 @@ struct memory_block;
 extern struct node *node_devices[];
 typedef  void (*node_registration_func_t)(struct node *);
 
-#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
+#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA)
 void link_mem_sections(int nid, unsigned long start_pfn,
 		       unsigned long end_pfn,
 		       enum meminit_context context);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2a9b6dcdac4f..669fee1d26b8 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -877,7 +877,7 @@ config DEBUG_MEMORY_INIT
 
 config MEMORY_NOTIFIER_ERROR_INJECT
 	tristate "Memory hotplug notifier error injection module"
-	depends on MEMORY_HOTPLUG_SPARSE && NOTIFIER_ERROR_INJECTION
+	depends on MEMORY_HOTPLUG && NOTIFIER_ERROR_INJECTION
 	help
 	  This option provides the ability to inject artificial errors to
 	  memory hotplug notifier chain callbacks.  It is controlled through
diff --git a/mm/Kconfig b/mm/Kconfig
index b2bf73c90a38..0148a9c4fa2a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,10 +128,6 @@ config MEMORY_HOTPLUG
 	depends on 64BIT || BROKEN
 	select NUMA_KEEP_MEMINFO if NUMA
 
-config MEMORY_HOTPLUG_SPARSE
-	def_bool y
-	depends on SPARSEMEM && MEMORY_HOTPLUG
-
 config MEMORY_HOTPLUG_DEFAULT_ONLINE
 	bool "Online the newly added memory blocks by default"
 	depends on MEMORY_HOTPLUG
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index afaae370b8cd..fc07ce7b5842 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -220,7 +220,6 @@ static void release_memory_resource(struct resource *res)
 	kfree(res);
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
 		const char *reason)
 {
@@ -1163,7 +1162,6 @@ failed_addition:
 	mem_hotplug_done();
 	return ret;
 }
-#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 
 static void reset_node_present_pages(pg_data_t *pgdat)
 {
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index a7e8cd5bb265..1eef042a31e1 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -1,5 +1,4 @@
 CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
 CONFIG_NOTIFIER_ERROR_INJECTION=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_MEMORY_HOTREMOVE=y
-- 
cgit v1.2.3


From 6b740c6c3aa371cd70ac07f8d071f2a8af28c51c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:31 -0700
Subject: mm/memory_hotplug: remove HIGHMEM leftovers

We don't support CONFIG_MEMORY_HOTPLUG on 32 bit and consequently not
HIGHMEM.  Let's remove any leftover code -- including the unused
"status_change_nid_high" field part of the memory notifier.

Link: https://lkml.kernel.org/r/20210929143600.49379-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Alex Shi <alexs@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/memory-hotplug.rst          |  3 --
 .../translations/zh_CN/core-api/memory-hotplug.rst |  4 ---
 include/linux/memory.h                             |  1 -
 mm/memory_hotplug.c                                | 36 ++--------------------
 4 files changed, 2 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/memory-hotplug.rst b/Documentation/core-api/memory-hotplug.rst
index de7467e48067..682259ee633a 100644
--- a/Documentation/core-api/memory-hotplug.rst
+++ b/Documentation/core-api/memory-hotplug.rst
@@ -57,7 +57,6 @@ The third argument (arg) passes a pointer of struct memory_notify::
 		unsigned long start_pfn;
 		unsigned long nr_pages;
 		int status_change_nid_normal;
-		int status_change_nid_high;
 		int status_change_nid;
 	}
 
@@ -65,8 +64,6 @@ The third argument (arg) passes a pointer of struct memory_notify::
 - nr_pages is # of pages of online/offline memory.
 - status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
   is (will be) set/clear, if this is -1, then nodemask status is not changed.
-- status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
-  is (will be) set/clear, if this is -1, then nodemask status is not changed.
 - status_change_nid is set node id when N_MEMORY of nodemask is (will be)
   set/clear. It means a new(memoryless) node gets new memory by online and a
   node loses all memory. If this is -1, then nodemask status is not changed.
diff --git a/Documentation/translations/zh_CN/core-api/memory-hotplug.rst b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst
index 161f4d2c18cc..9a204eb196f2 100644
--- a/Documentation/translations/zh_CN/core-api/memory-hotplug.rst
+++ b/Documentation/translations/zh_CN/core-api/memory-hotplug.rst
@@ -63,7 +63,6 @@ memory_notify结构体的指针::
 		unsigned long start_pfn;
 		unsigned long nr_pages;
 		int status_change_nid_normal;
-		int status_change_nid_high;
 		int status_change_nid;
 	}
 
@@ -74,9 +73,6 @@ memory_notify结构体的指针::
 - status_change_nid_normal是当nodemask的N_NORMAL_MEMORY被设置/清除时设置节
   点id，如果是-1，则nodemask状态不改变。
 
-- status_change_nid_high是当nodemask的N_HIGH_MEMORY被设置/清除时设置的节点
-  id，如果这个值为-1，那么nodemask状态不会改变。
-
 - status_change_nid是当nodemask的N_MEMORY被（将）设置/清除时设置的节点id。这
   意味着一个新的（没上线的）节点通过联机获得新的内存，而一个节点失去了所有的内
   存。如果这个值为-1，那么nodemask的状态就不会改变。
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 0328ec039c38..88eb587b5143 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -96,7 +96,6 @@ struct memory_notify {
 	unsigned long start_pfn;
 	unsigned long nr_pages;
 	int status_change_nid_normal;
-	int status_change_nid_high;
 	int status_change_nid;
 };
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index fc07ce7b5842..0aa7ca3dfbc9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -21,7 +21,6 @@
 #include <linux/memory.h>
 #include <linux/memremap.h>
 #include <linux/memory_hotplug.h>
-#include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/ioport.h>
 #include <linux/delay.h>
@@ -585,10 +584,6 @@ void generic_online_page(struct page *page, unsigned int order)
 	debug_pagealloc_map_pages(page, 1 << order);
 	__free_pages_core(page, order);
 	totalram_pages_add(1UL << order);
-#ifdef CONFIG_HIGHMEM
-	if (PageHighMem(page))
-		totalhigh_pages_add(1UL << order);
-#endif
 }
 EXPORT_SYMBOL_GPL(generic_online_page);
 
@@ -625,16 +620,11 @@ static void node_states_check_changes_online(unsigned long nr_pages,
 
 	arg->status_change_nid = NUMA_NO_NODE;
 	arg->status_change_nid_normal = NUMA_NO_NODE;
-	arg->status_change_nid_high = NUMA_NO_NODE;
 
 	if (!node_state(nid, N_MEMORY))
 		arg->status_change_nid = nid;
 	if (zone_idx(zone) <= ZONE_NORMAL && !node_state(nid, N_NORMAL_MEMORY))
 		arg->status_change_nid_normal = nid;
-#ifdef CONFIG_HIGHMEM
-	if (zone_idx(zone) <= ZONE_HIGHMEM && !node_state(nid, N_HIGH_MEMORY))
-		arg->status_change_nid_high = nid;
-#endif
 }
 
 static void node_states_set_node(int node, struct memory_notify *arg)
@@ -642,9 +632,6 @@ static void node_states_set_node(int node, struct memory_notify *arg)
 	if (arg->status_change_nid_normal >= 0)
 		node_set_state(node, N_NORMAL_MEMORY);
 
-	if (arg->status_change_nid_high >= 0)
-		node_set_state(node, N_HIGH_MEMORY);
-
 	if (arg->status_change_nid >= 0)
 		node_set_state(node, N_MEMORY);
 }
@@ -1801,7 +1788,6 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
 
 	arg->status_change_nid = NUMA_NO_NODE;
 	arg->status_change_nid_normal = NUMA_NO_NODE;
-	arg->status_change_nid_high = NUMA_NO_NODE;
 
 	/*
 	 * Check whether node_states[N_NORMAL_MEMORY] will be changed.
@@ -1816,24 +1802,9 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
 	if (zone_idx(zone) <= ZONE_NORMAL && nr_pages >= present_pages)
 		arg->status_change_nid_normal = zone_to_nid(zone);
 
-#ifdef CONFIG_HIGHMEM
-	/*
-	 * node_states[N_HIGH_MEMORY] contains nodes which
-	 * have normal memory or high memory.
-	 * Here we add the present_pages belonging to ZONE_HIGHMEM.
-	 * If the zone is within the range of [0..ZONE_HIGHMEM), and
-	 * we determine that the zones in that range become empty,
-	 * we need to clear the node for N_HIGH_MEMORY.
-	 */
-	present_pages += pgdat->node_zones[ZONE_HIGHMEM].present_pages;
-	if (zone_idx(zone) <= ZONE_HIGHMEM && nr_pages >= present_pages)
-		arg->status_change_nid_high = zone_to_nid(zone);
-#endif
-
 	/*
-	 * We have accounted the pages from [0..ZONE_NORMAL), and
-	 * in case of CONFIG_HIGHMEM the pages from ZONE_HIGHMEM
-	 * as well.
+	 * We have accounted the pages from [0..ZONE_NORMAL); ZONE_HIGHMEM
+	 * does not apply as we don't support 32bit.
 	 * Here we count the possible pages from ZONE_MOVABLE.
 	 * If after having accounted all the pages, we see that the nr_pages
 	 * to be offlined is over or equal to the accounted pages,
@@ -1851,9 +1822,6 @@ static void node_states_clear_node(int node, struct memory_notify *arg)
 	if (arg->status_change_nid_normal >= 0)
 		node_clear_state(node, N_NORMAL_MEMORY);
 
-	if (arg->status_change_nid_high >= 0)
-		node_clear_state(node, N_HIGH_MEMORY);
-
 	if (arg->status_change_nid >= 0)
 		node_clear_state(node, N_MEMORY);
 }
-- 
cgit v1.2.3


From 43e3aa2a3247c9ca348884866a9846d788ec5ed6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:35 -0700
Subject: mm/memory_hotplug: remove stale function declarations

These functions no longer exist.

Link: https://lkml.kernel.org/r/20210929143600.49379-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Alex Shi <alexs@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memory_hotplug.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index e5a867c950b2..be48e003a518 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -98,9 +98,6 @@ static inline void zone_seqlock_init(struct zone *zone)
 {
 	seqlock_init(&zone->span_seqlock);
 }
-extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
-extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
-extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 extern void adjust_present_page_count(struct page *page,
 				      struct memory_group *group,
 				      long nr_pages);
-- 
cgit v1.2.3


From e14b41556d9ec20af60a9f672ac6f64dd450763c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:46 -0700
Subject: memblock: improve MEMBLOCK_HOTPLUG documentation

The description of MEMBLOCK_HOTPLUG is currently short and consequently
misleading: we're actually dealing with a memory region that might get
hotunplugged later (i.e., the platform+firmware supports it), yet it is
indicated in the firmware-provided memory map as system ram that will
just get used by the system for any purpose when not taking special
care.  The firmware marked this memory region as a hot(un)plugged (e.g.,
hotplugged before reboot), implying that it might get hotunplugged again
later.

Whether we consider this information depends on the "movable_node"
kernel commandline parameter: only with "movable_node" set, we'll try
keeping this memory hotunpluggable, for example, by not serving early
allocations from this memory region and by letting the buddy manage it
using the ZONE_MOVABLE.

Let's make this clearer by extending the documentation.

Note: kexec *has to* indicate this memory to the second kernel.  With
"movable_node" set, we don't want to place kexec-images on this memory.
Without "movable_node" set, we don't care and can place kexec-images on
this memory.  In both cases, after successful memory hotunplug, kexec
has to be re-armed to update the memory map for the second kernel and to
place the kexec-images somewhere else.

Link: https://lkml.kernel.org/r/20211004093605.5830-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 484650681bee..1d2b3e902a84 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -28,7 +28,11 @@ extern unsigned long long max_possible_pfn;
 /**
  * enum memblock_flags - definition of memory region attributes
  * @MEMBLOCK_NONE: no special request
- * @MEMBLOCK_HOTPLUG: hotpluggable region
+ * @MEMBLOCK_HOTPLUG: memory region indicated in the firmware-provided memory
+ * map during early boot as hot(un)pluggable system RAM (e.g., memory range
+ * that might get hotunplugged later). With "movable_node" set on the kernel
+ * commandline, try keeping this memory region hotunpluggable. Does not apply
+ * to memblocks added ("hotplugged") after early boot.
  * @MEMBLOCK_MIRROR: mirrored region
  * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as
  * reserved in the memory map; refer to memblock_mark_nomap() description
-- 
cgit v1.2.3


From 952eea9b01e4bbb7011329f1b7240844e61e5128 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:49 -0700
Subject: memblock: allow to specify flags with memblock_add_node()

We want to specify flags when hotplugging memory.  Let's prepare to pass
flags to memblock_add_node() by adjusting all existing users.

Note that when hotplugging memory the system is already up and running
and we might have concurrent memblock users: for example, while we're
hotplugging memory, kexec_file code might search for suitable memory
regions to place kexec images.  It's important to add the memory
directly to memblock via a single call with the right flags, instead of
adding the memory first and apply flags later: otherwise, concurrent
memblock users might temporarily stumble over memblocks with wrong
flags, which will be important in a follow-up patch that introduces a
new flag to properly handle add_memory_driver_managed().

Link: https://lkml.kernel.org/r/20211004093605.5830-4-david@redhat.com
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Shahab Vahedi <shahab@synopsys.com>	[arch/arc]
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arc/mm/init.c               | 4 ++--
 arch/ia64/mm/contig.c            | 2 +-
 arch/ia64/mm/init.c              | 2 +-
 arch/m68k/mm/mcfmmu.c            | 3 ++-
 arch/m68k/mm/motorola.c          | 6 ++++--
 arch/mips/loongson64/init.c      | 4 +++-
 arch/mips/sgi-ip27/ip27-memory.c | 3 ++-
 arch/s390/kernel/setup.c         | 3 ++-
 include/linux/memblock.h         | 3 ++-
 include/linux/mm.h               | 2 +-
 mm/memblock.c                    | 9 +++++----
 mm/memory_hotplug.c              | 2 +-
 12 files changed, 26 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 59408f6a02d4..ce4e939a7f07 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -59,13 +59,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 
 		low_mem_sz = size;
 		in_use = 1;
-		memblock_add_node(base, size, 0);
+		memblock_add_node(base, size, 0, MEMBLOCK_NONE);
 	} else {
 #ifdef CONFIG_HIGHMEM
 		high_mem_start = base;
 		high_mem_sz = size;
 		in_use = 1;
-		memblock_add_node(base, size, 1);
+		memblock_add_node(base, size, 1, MEMBLOCK_NONE);
 		memblock_reserve(base, size);
 #endif
 	}
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 42e025cfbd08..24901d809301 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -153,7 +153,7 @@ find_memory (void)
 	efi_memmap_walk(find_max_min_low_pfn, NULL);
 	max_pfn = max_low_pfn;
 
-	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0);
+	memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE);
 
 	find_initrd();
 
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 5c6da8d83c1a..5d165607bf35 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -378,7 +378,7 @@ int __init register_active_ranges(u64 start, u64 len, int nid)
 #endif
 
 	if (start < end)
-		memblock_add_node(__pa(start), end - start, nid);
+		memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE);
 	return 0;
 }
 
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index eac9dde65193..6f1f25125294 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -174,7 +174,8 @@ void __init cf_bootmem_alloc(void)
 	m68k_memory[0].addr = _rambase;
 	m68k_memory[0].size = _ramend - _rambase;
 
-	memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0);
+	memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0,
+			  MEMBLOCK_NONE);
 
 	/* compute total pages in system */
 	num_pages = PFN_DOWN(_ramend - _rambase);
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 9f3f77785aa7..2b05bb2bac00 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -410,7 +410,8 @@ void __init paging_init(void)
 
 	min_addr = m68k_memory[0].addr;
 	max_addr = min_addr + m68k_memory[0].size;
-	memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0);
+	memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0,
+			  MEMBLOCK_NONE);
 	for (i = 1; i < m68k_num_memory;) {
 		if (m68k_memory[i].addr < min_addr) {
 			printk("Ignoring memory chunk at 0x%lx:0x%lx before the first chunk\n",
@@ -421,7 +422,8 @@ void __init paging_init(void)
 				(m68k_num_memory - i) * sizeof(struct m68k_mem_info));
 			continue;
 		}
-		memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i);
+		memblock_add_node(m68k_memory[i].addr, m68k_memory[i].size, i,
+				  MEMBLOCK_NONE);
 		addr = m68k_memory[i].addr + m68k_memory[i].size;
 		if (addr > max_addr)
 			max_addr = addr;
diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c
index 76e0a9636a0e..4ac5ba80bbf6 100644
--- a/arch/mips/loongson64/init.c
+++ b/arch/mips/loongson64/init.c
@@ -77,7 +77,9 @@ void __init szmem(unsigned int node)
 				(u32)node_id, mem_type, mem_start, mem_size);
 			pr_info("       start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n",
 				start_pfn, end_pfn, num_physpages);
-			memblock_add_node(PFN_PHYS(start_pfn), PFN_PHYS(node_psize), node);
+			memblock_add_node(PFN_PHYS(start_pfn),
+					  PFN_PHYS(node_psize), node,
+					  MEMBLOCK_NONE);
 			break;
 		case SYSTEM_RAM_RESERVED:
 			pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n",
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 6173684b5aaa..adc2faeecf7c 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -341,7 +341,8 @@ static void __init szmem(void)
 				continue;
 			}
 			memblock_add_node(PFN_PHYS(slot_getbasepfn(node, slot)),
-					  PFN_PHYS(slot_psize), node);
+					  PFN_PHYS(slot_psize), node,
+					  MEMBLOCK_NONE);
 		}
 	}
 }
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7fc836e9e194..8a378d426239 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -593,7 +593,8 @@ static void __init setup_resources(void)
 	 * part of the System RAM resource.
 	 */
 	if (crashk_res.end) {
-		memblock_add_node(crashk_res.start, resource_size(&crashk_res), 0);
+		memblock_add_node(crashk_res.start, resource_size(&crashk_res),
+				  0, MEMBLOCK_NONE);
 		memblock_reserve(crashk_res.start, resource_size(&crashk_res));
 		insert_resource(&iomem_resource, &crashk_res);
 	}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1d2b3e902a84..8b6560dc7a9e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -104,7 +104,8 @@ static inline void memblock_discard(void) {}
 #endif
 
 void memblock_allow_resize(void);
-int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid);
+int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid,
+		      enum memblock_flags flags);
 int memblock_add(phys_addr_t base, phys_addr_t size);
 int memblock_remove(phys_addr_t base, phys_addr_t size);
 int memblock_phys_free(phys_addr_t base, phys_addr_t size);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7e37c726b9db..15058d9cec99 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2425,7 +2425,7 @@ static inline unsigned long get_num_physpages(void)
  * unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
  * 							 max_highmem_pfn};
  * for_each_valid_physical_page_range()
- * 	memblock_add_node(base, size, nid)
+ *	memblock_add_node(base, size, nid, MEMBLOCK_NONE)
  * free_area_init(max_zone_pfns);
  */
 void free_area_init(unsigned long *max_zone_pfn);
diff --git a/mm/memblock.c b/mm/memblock.c
index fb0c7f48e627..1b0cd8a52a13 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -655,6 +655,7 @@ repeat:
  * @base: base address of the new region
  * @size: size of the new region
  * @nid: nid of the new region
+ * @flags: flags of the new region
  *
  * Add new memblock region [@base, @base + @size) to the "memory"
  * type. See memblock_add_range() description for mode details
@@ -663,14 +664,14 @@ repeat:
  * 0 on success, -errno on failure.
  */
 int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
-				       int nid)
+				      int nid, enum memblock_flags flags)
 {
 	phys_addr_t end = base + size - 1;
 
-	memblock_dbg("%s: [%pa-%pa] nid=%d %pS\n", __func__,
-		     &base, &end, nid, (void *)_RET_IP_);
+	memblock_dbg("%s: [%pa-%pa] nid=%d flags=%x %pS\n", __func__,
+		     &base, &end, nid, flags, (void *)_RET_IP_);
 
-	return memblock_add_range(&memblock.memory, base, size, nid, 0);
+	return memblock_add_range(&memblock.memory, base, size, nid, flags);
 }
 
 /**
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9d254e88221e..cedeaa4d6f0e 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1370,7 +1370,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 	mem_hotplug_begin();
 
 	if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
-		ret = memblock_add_node(start, size, nid);
+		ret = memblock_add_node(start, size, nid, MEMBLOCK_NONE);
 		if (ret)
 			goto error_mem_hotplug_end;
 	}
-- 
cgit v1.2.3


From f7892d8e288d4b090176f26d9bf7943dbbb639a6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 5 Nov 2021 13:44:53 -0700
Subject: memblock: add MEMBLOCK_DRIVER_MANAGED to mimic
 IORESOURCE_SYSRAM_DRIVER_MANAGED

Let's add a flag that corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED,
indicating that we're dealing with a memory region that is never
indicated in the firmware-provided memory map, but always detected and
added by a driver.

Similar to MEMBLOCK_HOTPLUG, most infrastructure has to treat such
memory regions like ordinary MEMBLOCK_NONE memory regions -- for
example, when selecting memory regions to add to the vmcore for dumping
in the crashkernel via for_each_mem_range().

However, especially kexec_file is not supposed to select such memblocks
via for_each_free_mem_range() / for_each_free_mem_range_reverse() to
place kexec images, similar to how we handle
IORESOURCE_SYSRAM_DRIVER_MANAGED without CONFIG_ARCH_KEEP_MEMBLOCK.

We'll make sure that memory hotplug code sets the flag where applicable
(IORESOURCE_SYSRAM_DRIVER_MANAGED) next.  This prepares architectures
that need CONFIG_ARCH_KEEP_MEMBLOCK, such as arm64, for virtio-mem
support.

Note that kexec *must not* indicate this memory to the second kernel and
*must not* place kexec-images on this memory.  Let's add a comment to
kexec_walk_memblock(), documenting how we handle MEMBLOCK_DRIVER_MANAGED
now just like using IORESOURCE_SYSRAM_DRIVER_MANAGED in
locate_mem_hole_callback() for kexec_walk_resources().

Also note that MEMBLOCK_HOTPLUG cannot be reused due to different
semantics:
	MEMBLOCK_HOTPLUG: memory is indicated as "System RAM" in the
	firmware-provided memory map and added to the system early during
	boot; kexec *has to* indicate this memory to the second kernel and
	can place kexec-images on this memory. After memory hotunplug,
	kexec has to be re-armed. We mostly ignore this flag when
	"movable_node" is not set on the kernel command line, because
	then we're told to not care about hotunpluggability of such
	memory regions.

	MEMBLOCK_DRIVER_MANAGED: memory is not indicated as "System RAM" in
	the firmware-provided memory map; this memory is always detected
	and added to the system by a driver; memory might not actually be
	physically hotunpluggable. kexec *must not* indicate this memory to
	the second kernel and *must not* place kexec-images on this memory.

Link: https://lkml.kernel.org/r/20211004093605.5830-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Jianyong Wu <Jianyong.Wu@arm.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Shahab Vahedi <shahab@synopsys.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 16 ++++++++++++++--
 kernel/kexec_file.c      |  5 +++++
 mm/memblock.c            |  4 ++++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8b6560dc7a9e..7df557b16c1e 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -37,12 +37,17 @@ extern unsigned long long max_possible_pfn;
  * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as
  * reserved in the memory map; refer to memblock_mark_nomap() description
  * for further details
+ * @MEMBLOCK_DRIVER_MANAGED: memory region that is always detected and added
+ * via a driver, and never indicated in the firmware-provided memory map as
+ * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the
+ * kernel resource tree.
  */
 enum memblock_flags {
 	MEMBLOCK_NONE		= 0x0,	/* No special request */
 	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
 	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
 	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
+	MEMBLOCK_DRIVER_MANAGED = 0x8,	/* always detected via a driver */
 };
 
 /**
@@ -213,7 +218,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
  */
 #define for_each_mem_range(i, p_start, p_end) \
 	__for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,	\
-			     MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
+			     MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED, \
+			     p_start, p_end, NULL)
 
 /**
  * for_each_mem_range_rev - reverse iterate through memblock areas from
@@ -224,7 +230,8 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type,
  */
 #define for_each_mem_range_rev(i, p_start, p_end)			\
 	__for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \
-				 MEMBLOCK_HOTPLUG, p_start, p_end, NULL)
+				 MEMBLOCK_HOTPLUG | MEMBLOCK_DRIVER_MANAGED,\
+				 p_start, p_end, NULL)
 
 /**
  * for_each_reserved_mem_range - iterate over all reserved memblock areas
@@ -254,6 +261,11 @@ static inline bool memblock_is_nomap(struct memblock_region *m)
 	return m->flags & MEMBLOCK_NOMAP;
 }
 
+static inline bool memblock_is_driver_managed(struct memblock_region *m)
+{
+	return m->flags & MEMBLOCK_DRIVER_MANAGED;
+}
+
 int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
 			    unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 33400ff051a8..8347fc158d2b 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -556,6 +556,11 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
 	if (kbuf->image->type == KEXEC_TYPE_CRASH)
 		return func(&crashk_res, kbuf);
 
+	/*
+	 * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See
+	 * IORESOURCE_SYSRAM_DRIVER_MANAGED handling in
+	 * locate_mem_hole_callback().
+	 */
 	if (kbuf->top_down) {
 		for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE,
 						&mstart, &mend, NULL) {
diff --git a/mm/memblock.c b/mm/memblock.c
index 1b0cd8a52a13..659bf0ffb086 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -982,6 +982,10 @@ static bool should_skip_region(struct memblock_type *type,
 	if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
 		return true;
 
+	/* skip driver-managed memory unless we were asked for it explicitly */
+	if (!(flags & MEMBLOCK_DRIVER_MANAGED) && memblock_is_driver_managed(m))
+		return true;
+
 	return false;
 }
 
-- 
cgit v1.2.3


From d2c20e51e3966bc668ef1ef21fbe90704286c8d0 Mon Sep 17 00:00:00 2001
From: Ira Weiny <ira.weiny@intel.com>
Date: Fri, 5 Nov 2021 13:45:06 -0700
Subject: mm/highmem: remove deprecated kmap_atomic

kmap_atomic() is being deprecated in favor of kmap_local_page().

Replace the uses of kmap_atomic() within the highmem code.

On profiling clear_huge_page() using ftrace an improvement of 62% was
observed on the below setup.

Setup:-
Below data has been collected on Qualcomm's SM7250 SoC THP enabled
(kernel v4.19.113) with only CPU-0(Cortex-A55) and CPU-7(Cortex-A76)
switched on and set to max frequency, also DDR set to perf governor.

FTRACE Data:-

Base data:-
Number of iterations: 48
Mean of allocation time: 349.5 us
std deviation: 74.5 us

v4 data:-
Number of iterations: 48
Mean of allocation time: 131 us
std deviation: 32.7 us

The following simple userspace experiment to allocate
100MB(BUF_SZ) of pages and writing to it gave us a good insight,
we observed an improvement of 42% in allocation and writing timings.
-------------------------------------------------------------
Test code snippet
-------------------------------------------------------------
      clock_start();
      buf = malloc(BUF_SZ); /* Allocate 100 MB of memory */

        for(i=0; i < BUF_SZ_PAGES; i++)
        {
                *((int *)(buf + (i*PAGE_SIZE))) = 1;
        }
      clock_end();
-------------------------------------------------------------

Malloc test timings for 100MB anon allocation:-

Base data:-
Number of iterations: 100
Mean of allocation time: 31831 us
std deviation: 4286 us

v4 data:-
Number of iterations: 100
Mean of allocation time: 18193 us
std deviation: 4915 us

[willy@infradead.org: fix zero_user_segments()]
  Link: https://lkml.kernel.org/r/YYVhHCJcm2DM2G9u@casper.infradead.org

Link: https://lkml.kernel.org/r/20210204073255.20769-2-prathu.baronia@oneplus.com
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Prathu Baronia <prathu.baronia@oneplus.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/highmem.h | 28 ++++++++++++++--------------
 mm/highmem.c            |  6 +++---
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index b4c49f9cc379..31ebee36f26c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -143,9 +143,9 @@ static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
 #ifndef clear_user_highpage
 static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
 {
-	void *addr = kmap_atomic(page);
+	void *addr = kmap_local_page(page);
 	clear_user_page(addr, vaddr, page);
-	kunmap_atomic(addr);
+	kunmap_local(addr);
 }
 #endif
 
@@ -177,9 +177,9 @@ alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 
 static inline void clear_highpage(struct page *page)
 {
-	void *kaddr = kmap_atomic(page);
+	void *kaddr = kmap_local_page(page);
 	clear_page(kaddr);
-	kunmap_atomic(kaddr);
+	kunmap_local(kaddr);
 }
 
 #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
@@ -202,7 +202,7 @@ static inline void zero_user_segments(struct page *page,
 		unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
 {
-	void *kaddr = kmap_atomic(page);
+	void *kaddr = kmap_local_page(page);
 	unsigned int i;
 
 	BUG_ON(end1 > page_size(page) || end2 > page_size(page));
@@ -213,7 +213,7 @@ static inline void zero_user_segments(struct page *page,
 	if (end2 > start2)
 		memset(kaddr + start2, 0, end2 - start2);
 
-	kunmap_atomic(kaddr);
+	kunmap_local(kaddr);
 	for (i = 0; i < compound_nr(page); i++)
 		flush_dcache_page(page + i);
 }
@@ -238,11 +238,11 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
 {
 	char *vfrom, *vto;
 
-	vfrom = kmap_atomic(from);
-	vto = kmap_atomic(to);
+	vfrom = kmap_local_page(from);
+	vto = kmap_local_page(to);
 	copy_user_page(vto, vfrom, vaddr, to);
-	kunmap_atomic(vto);
-	kunmap_atomic(vfrom);
+	kunmap_local(vto);
+	kunmap_local(vfrom);
 }
 
 #endif
@@ -253,11 +253,11 @@ static inline void copy_highpage(struct page *to, struct page *from)
 {
 	char *vfrom, *vto;
 
-	vfrom = kmap_atomic(from);
-	vto = kmap_atomic(to);
+	vfrom = kmap_local_page(from);
+	vto = kmap_local_page(to);
 	copy_page(vto, vfrom);
-	kunmap_atomic(vto);
-	kunmap_atomic(vfrom);
+	kunmap_local(vto);
+	kunmap_local(vfrom);
 }
 
 #endif
diff --git a/mm/highmem.c b/mm/highmem.c
index 4212ad0e4a19..eb3b8c288de4 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -383,7 +383,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 			unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
 
 			if (end1 > start1) {
-				kaddr = kmap_atomic(page + i);
+				kaddr = kmap_local_page(page + i);
 				memset(kaddr + start1, 0, this_end - start1);
 			}
 			end1 -= this_end;
@@ -398,7 +398,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 
 			if (end2 > start2) {
 				if (!kaddr)
-					kaddr = kmap_atomic(page + i);
+					kaddr = kmap_local_page(page + i);
 				memset(kaddr + start2, 0, this_end - start2);
 			}
 			end2 -= this_end;
@@ -406,7 +406,7 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		}
 
 		if (kaddr) {
-			kunmap_atomic(kaddr);
+			kunmap_local(kaddr);
 			flush_dcache_page(page + i);
 		}
 
-- 
cgit v1.2.3


From 53944f171a89dff4e2a3d76f42e6eedb551bb861 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 5 Nov 2021 13:45:18 -0700
Subject: mm: remove HARDENED_USERCOPY_FALLBACK

This has served its purpose and is no longer used.  All usercopy
violations appear to have been handled by now, any remaining instances
(or new bugs) will cause copies to be rejected.

This isn't a direct revert of commit 2d891fbc3bb6 ("usercopy: Allow
strict enforcement of whitelists"); since usercopy_fallback is
effectively 0, the fallback handling is removed too.

This also removes the usercopy_fallback module parameter on slab_common.

Link: https://github.com/KSPP/linux/issues/153
Link: https://lkml.kernel.org/r/20210921061149.1091163-1-steve@sk2.org
Signed-off-by: Stephen Kitt <steve@sk2.org>
Suggested-by: Kees Cook <keescook@chromium.org>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Joel Stanley <joel@jms.id.au>	[defconfig change]
Acked-by: David Rientjes <rientjes@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E . Hallyn" <serge@hallyn.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/configs/skiroot_defconfig |  1 -
 include/linux/slab.h                   |  2 --
 mm/slab.c                              | 13 -------------
 mm/slab_common.c                       |  8 --------
 mm/slub.c                              | 14 --------------
 security/Kconfig                       | 14 --------------
 6 files changed, 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index b806a5d3a695..c3ba614c973d 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -275,7 +275,6 @@ CONFIG_NLS_UTF8=y
 CONFIG_ENCRYPTED_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_HARDENED_USERCOPY=y
-# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
 CONFIG_HARDENED_USERCOPY_PAGESPAN=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 837cb16232ef..181045148b06 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -142,8 +142,6 @@ struct mem_cgroup;
 void __init kmem_cache_init(void);
 bool slab_is_available(void);
 
-extern bool usercopy_fallback;
-
 struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
 			unsigned int align, slab_flags_t flags,
 			void (*ctor)(void *));
diff --git a/mm/slab.c b/mm/slab.c
index 64ec17a3bc2b..da132a9ae6f8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -4204,19 +4204,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
 	    n <= cachep->useroffset - offset + cachep->usersize)
 		return;
 
-	/*
-	 * If the copy is still within the allocated object, produce
-	 * a warning instead of rejecting the copy. This is intended
-	 * to be a temporary method to find any missing usercopy
-	 * whitelists.
-	 */
-	if (usercopy_fallback &&
-	    offset <= cachep->object_size &&
-	    n <= cachep->object_size - offset) {
-		usercopy_warn("SLAB object", cachep->name, to_user, offset, n);
-		return;
-	}
-
 	usercopy_abort("SLAB object", cachep->name, to_user, offset, n);
 }
 #endif /* CONFIG_HARDENED_USERCOPY */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index ec2bb0beed75..e5d080a93009 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -37,14 +37,6 @@ LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
-#ifdef CONFIG_HARDENED_USERCOPY
-bool usercopy_fallback __ro_after_init =
-		IS_ENABLED(CONFIG_HARDENED_USERCOPY_FALLBACK);
-module_param(usercopy_fallback, bool, 0400);
-MODULE_PARM_DESC(usercopy_fallback,
-		"WARN instead of reject usercopy whitelist violations");
-#endif
-
 static LIST_HEAD(slab_caches_to_rcu_destroy);
 static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work);
 static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
diff --git a/mm/slub.c b/mm/slub.c
index e9a51dcf8bf9..432145d7b4ec 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4489,7 +4489,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
 {
 	struct kmem_cache *s;
 	unsigned int offset;
-	size_t object_size;
 	bool is_kfence = is_kfence_address(ptr);
 
 	ptr = kasan_reset_tag(ptr);
@@ -4522,19 +4521,6 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
 	    n <= s->useroffset - offset + s->usersize)
 		return;
 
-	/*
-	 * If the copy is still within the allocated object, produce
-	 * a warning instead of rejecting the copy. This is intended
-	 * to be a temporary method to find any missing usercopy
-	 * whitelists.
-	 */
-	object_size = slab_ksize(s);
-	if (usercopy_fallback &&
-	    offset <= object_size && n <= object_size - offset) {
-		usercopy_warn("SLUB object", s->name, to_user, offset, n);
-		return;
-	}
-
 	usercopy_abort("SLUB object", s->name, to_user, offset, n);
 }
 #endif /* CONFIG_HARDENED_USERCOPY */
diff --git a/security/Kconfig b/security/Kconfig
index 0ced7fd33e4d..d9698900c9b7 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -163,20 +163,6 @@ config HARDENED_USERCOPY
 	  or are part of the kernel text. This kills entire classes
 	  of heap overflow exploits and similar kernel memory exposures.
 
-config HARDENED_USERCOPY_FALLBACK
-	bool "Allow usercopy whitelist violations to fallback to object size"
-	depends on HARDENED_USERCOPY
-	default y
-	help
-	  This is a temporary option that allows missing usercopy whitelists
-	  to be discovered via a WARN() to the kernel log, instead of
-	  rejecting the copy, falling back to non-whitelisted hardened
-	  usercopy that checks the slab allocation size instead of the
-	  whitelist size. This option will be removed once it seems like
-	  all missing usercopy whitelists have been identified and fixed.
-	  Booting with "slab_common.usercopy_fallback=Y/N" can change
-	  this setting.
-
 config HARDENED_USERCOPY_PAGESPAN
 	bool "Refuse to copy allocations that span multiple pages"
 	depends on HARDENED_USERCOPY
-- 
cgit v1.2.3


From a1554c002699cbc9ced2e9f44f9c1357181bead3 Mon Sep 17 00:00:00 2001
From: Mianhan Liu <liumh1@shanghaitech.edu.cn>
Date: Fri, 5 Nov 2021 13:45:21 -0700
Subject: include/linux/mm.h: move nr_free_buffer_pages from swap.h to mm.h

nr_free_buffer_pages could be exposed through mm.h instead of swap.h.
The advantage of this change is that it can reduce the obsolete
includes.  For example, net/ipv4/tcp.c wouldn't need swap.h any more
since it has already included mm.h.  Similarly, after checking all the
other files, it comes that tcp.c, udp.c meter.c ,...  follow the same
rule, so these files can have swap.h removed too.

Moreover, after preprocessing all the files that use
nr_free_buffer_pages, it turns out that those files have already
included mm.h.Thus, we can move nr_free_buffer_pages from swap.h to mm.h
safely.  This change will not affect the compilation of other files.

Link: https://lkml.kernel.org/r/20210912133640.1624-1-liumh1@shanghaitech.edu.cn
Signed-off-by: Mianhan Liu <liumh1@shanghaitech.edu.cn>
Cc: Jakub Kicinski <kuba@kernel.org>
CC: Ulf Hansson <ulf.hansson@linaro.org>
Cc: "David S . Miller" <davem@davemloft.net>
Cc: Simon Horman <horms@verge.net.au>
Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Vlad Yasevich <vyasevich@gmail.com>
Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc_test.c    | 1 -
 include/linux/mm.h             | 2 ++
 include/linux/swap.h           | 1 -
 net/ipv4/tcp.c                 | 1 -
 net/ipv4/udp.c                 | 1 -
 net/netfilter/ipvs/ip_vs_ctl.c | 1 -
 net/openvswitch/meter.c        | 1 -
 net/sctp/protocol.c            | 1 -
 8 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c
index 63524551a13a..e6a2fd2c6d5c 100644
--- a/drivers/mmc/core/mmc_test.c
+++ b/drivers/mmc/core/mmc_test.c
@@ -10,7 +10,6 @@
 #include <linux/slab.h>
 
 #include <linux/scatterlist.h>
-#include <linux/swap.h>		/* For nr_free_buffer_pages() */
 #include <linux/list.h>
 
 #include <linux/debugfs.h>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 15058d9cec99..b1720aa63727 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -875,6 +875,8 @@ void put_pages_list(struct list_head *pages);
 void split_page(struct page *page, unsigned int order);
 void copy_huge_page(struct page *dst, struct page *src);
 
+unsigned long nr_free_buffer_pages(void);
+
 /*
  * Compound pages have a destructor function.  Provide a
  * prototype for that function and accessor functions.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba52f3a3478e..53f759a59996 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -335,7 +335,6 @@ void workingset_update_node(struct xa_node *node);
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalreserve_pages;
-extern unsigned long nr_free_buffer_pages(void);
 
 /* Definition of global_zone_page_state not available yet */
 #define nr_free_pages() global_zone_page_state(NR_FREE_PAGES)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f5c336f8b0c8..c5e07a2c6c52 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -260,7 +260,6 @@
 #include <linux/random.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
-#include <linux/swap.h>
 #include <linux/cache.h>
 #include <linux/err.h>
 #include <linux/time.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2fffcf2b54f3..319dd7bbfe33 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -78,7 +78,6 @@
 #include <asm/ioctls.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
-#include <linux/swap.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/module.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 29ec3ef63edc..6ea4b882435e 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -24,7 +24,6 @@
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include <linux/workqueue.h>
-#include <linux/swap.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 896b8f5bc885..04a060ac7fdf 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -12,7 +12,6 @@
 #include <linux/openvswitch.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
-#include <linux/swap.h>
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index ec0f52567c16..35928fefae33 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -33,7 +33,6 @@
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
-#include <linux/swap.h>
 #include <linux/slab.h>
 #include <net/net_namespace.h>
 #include <net/protocol.h>
-- 
cgit v1.2.3


From f39f21b3ddc7fc0f87eb6dc75ddc81b5bbfb7672 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:45:25 -0700
Subject: stacktrace: move filter_irq_stacks() to kernel/stacktrace.c

filter_irq_stacks() has little to do with the stackdepot implementation,
except that it is usually used by users (such as KASAN) of stackdepot to
reduce the stack trace.

However, filter_irq_stacks() itself is not useful without a stack trace
as obtained by stack_trace_save() and friends.

Therefore, move filter_irq_stacks() to kernel/stacktrace.c, so that new
users of filter_irq_stacks() do not have to start depending on
STACKDEPOT only for filter_irq_stacks().

Link: https://lkml.kernel.org/r/20210923104803.2620285-1-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Aleksandr Nogikh <nogikh@google.com>
Cc: Taras Madan <tarasmadan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stackdepot.h |  2 --
 include/linux/stacktrace.h |  1 +
 kernel/stacktrace.c        | 30 ++++++++++++++++++++++++++++++
 lib/stackdepot.c           | 24 ------------------------
 4 files changed, 31 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index b2f7e7c6ba54..d29860966bc9 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -25,8 +25,6 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries);
 
-unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
-
 #ifdef CONFIG_STACKDEPOT
 int stack_depot_init(void);
 #else
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 9edecb494e9e..bef158815e83 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -21,6 +21,7 @@ unsigned int stack_trace_save_tsk(struct task_struct *task,
 unsigned int stack_trace_save_regs(struct pt_regs *regs, unsigned long *store,
 				   unsigned int size, unsigned int skipnr);
 unsigned int stack_trace_save_user(unsigned long *store, unsigned int size);
+unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
 
 /* Internal interfaces. Do not use in generic code */
 #ifdef CONFIG_ARCH_STACKWALK
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 9f8117c7cfdd..9c625257023d 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
+#include <linux/interrupt.h>
 
 /**
  * stack_trace_print - Print the entries in the stack trace
@@ -373,3 +374,32 @@ unsigned int stack_trace_save_user(unsigned long *store, unsigned int size)
 #endif /* CONFIG_USER_STACKTRACE_SUPPORT */
 
 #endif /* !CONFIG_ARCH_STACKWALK */
+
+static inline bool in_irqentry_text(unsigned long ptr)
+{
+	return (ptr >= (unsigned long)&__irqentry_text_start &&
+		ptr < (unsigned long)&__irqentry_text_end) ||
+		(ptr >= (unsigned long)&__softirqentry_text_start &&
+		 ptr < (unsigned long)&__softirqentry_text_end);
+}
+
+/**
+ * filter_irq_stacks - Find first IRQ stack entry in trace
+ * @entries:	Pointer to stack trace array
+ * @nr_entries:	Number of entries in the storage array
+ *
+ * Return: Number of trace entries until IRQ stack starts.
+ */
+unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries)
+{
+	unsigned int i;
+
+	for (i = 0; i < nr_entries; i++) {
+		if (in_irqentry_text(entries[i])) {
+			/* Include the irqentry function into the stack. */
+			return i + 1;
+		}
+	}
+	return nr_entries;
+}
+EXPORT_SYMBOL_GPL(filter_irq_stacks);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index bda58597e375..09485dc5bd12 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -20,7 +20,6 @@
  */
 
 #include <linux/gfp.h>
-#include <linux/interrupt.h>
 #include <linux/jhash.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -371,26 +370,3 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 	return __stack_depot_save(entries, nr_entries, alloc_flags, true);
 }
 EXPORT_SYMBOL_GPL(stack_depot_save);
-
-static inline int in_irqentry_text(unsigned long ptr)
-{
-	return (ptr >= (unsigned long)&__irqentry_text_start &&
-		ptr < (unsigned long)&__irqentry_text_end) ||
-		(ptr >= (unsigned long)&__softirqentry_text_start &&
-		 ptr < (unsigned long)&__softirqentry_text_end);
-}
-
-unsigned int filter_irq_stacks(unsigned long *entries,
-					     unsigned int nr_entries)
-{
-	unsigned int i;
-
-	for (i = 0; i < nr_entries; i++) {
-		if (in_irqentry_text(entries[i])) {
-			/* Include the irqentry function into the stack. */
-			return i + 1;
-		}
-	}
-	return nr_entries;
-}
-EXPORT_SYMBOL_GPL(filter_irq_stacks);
-- 
cgit v1.2.3


From 07e8481d3c38f461d7b79c1d5c9afe013b162b0c Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 5 Nov 2021 13:45:46 -0700
Subject: kfence: always use static branches to guard kfence_alloc()

Regardless of KFENCE mode (CONFIG_KFENCE_STATIC_KEYS: either using
static keys to gate allocations, or using a simple dynamic branch),
always use a static branch to avoid the dynamic branch in kfence_alloc()
if KFENCE was disabled at boot.

For CONFIG_KFENCE_STATIC_KEYS=n, this now avoids the dynamic branch if
KFENCE was disabled at boot.

To simplify, also unifies the location where kfence_allocation_gate is
read-checked to just be inline in kfence_alloc().

Link: https://lkml.kernel.org/r/20211019102524.2807208-1-elver@google.com
Signed-off-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfence.h | 21 +++++++++++----------
 mm/kfence/core.c       | 16 +++++++---------
 2 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfence.h b/include/linux/kfence.h
index 3fe6dd8a18c1..4b5e3679a72c 100644
--- a/include/linux/kfence.h
+++ b/include/linux/kfence.h
@@ -14,6 +14,9 @@
 
 #ifdef CONFIG_KFENCE
 
+#include <linux/atomic.h>
+#include <linux/static_key.h>
+
 /*
  * We allocate an even number of pages, as it simplifies calculations to map
  * address to metadata indices; effectively, the very first page serves as an
@@ -22,13 +25,8 @@
 #define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
 extern char *__kfence_pool;
 
-#ifdef CONFIG_KFENCE_STATIC_KEYS
-#include <linux/static_key.h>
 DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
-#else
-#include <linux/atomic.h>
 extern atomic_t kfence_allocation_gate;
-#endif
 
 /**
  * is_kfence_address() - check if an address belongs to KFENCE pool
@@ -116,13 +114,16 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
  */
 static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
 {
-#ifdef CONFIG_KFENCE_STATIC_KEYS
-	if (static_branch_unlikely(&kfence_allocation_key))
+#if defined(CONFIG_KFENCE_STATIC_KEYS) || CONFIG_KFENCE_SAMPLE_INTERVAL == 0
+	if (!static_branch_unlikely(&kfence_allocation_key))
+		return NULL;
 #else
-	if (unlikely(!atomic_read(&kfence_allocation_gate)))
+	if (!static_branch_likely(&kfence_allocation_key))
+		return NULL;
 #endif
-		return __kfence_alloc(s, size, flags);
-	return NULL;
+	if (likely(atomic_read(&kfence_allocation_gate)))
+		return NULL;
+	return __kfence_alloc(s, size, flags);
 }
 
 /**
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 802905b1c89b..09945784df9e 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -104,10 +104,11 @@ struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
 static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
 static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
 
-#ifdef CONFIG_KFENCE_STATIC_KEYS
-/* The static key to set up a KFENCE allocation. */
+/*
+ * The static key to set up a KFENCE allocation; or if static keys are not used
+ * to gate allocations, to avoid a load and compare if KFENCE is disabled.
+ */
 DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
-#endif
 
 /* Gates the allocation, ensuring only one succeeds in a given period. */
 atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
@@ -774,6 +775,8 @@ void __init kfence_init(void)
 		return;
 	}
 
+	if (!IS_ENABLED(CONFIG_KFENCE_STATIC_KEYS))
+		static_branch_enable(&kfence_allocation_key);
 	WRITE_ONCE(kfence_enabled, true);
 	queue_delayed_work(system_unbound_wq, &kfence_timer, 0);
 	pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
@@ -866,12 +869,7 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
 		return NULL;
 	}
 
-	/*
-	 * allocation_gate only needs to become non-zero, so it doesn't make
-	 * sense to continue writing to it and pay the associated contention
-	 * cost, in case we have a large number of concurrent allocations.
-	 */
-	if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1)
+	if (atomic_inc_return(&kfence_allocation_gate) > 1)
 		return NULL;
 #ifdef CONFIG_KFENCE_STATIC_KEYS
 	/*
-- 
cgit v1.2.3


From d2f272b35a84ace2ef04334a9822fd726a7f061b Mon Sep 17 00:00:00 2001
From: SeongJae Park <sjpark@amazon.de>
Date: Fri, 5 Nov 2021 13:46:04 -0700
Subject: include/linux/damon.h: fix kernel-doc comments for 'damon_callback'

A few Kernel-doc comments in 'damon.h' are broken.  This fixes them.

Link: https://lkml.kernel.org/r/20210917123958.3819-5-sj@kernel.org
Signed-off-by: SeongJae Park <sjpark@amazon.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index d68b67b8d458..755d70804705 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -62,7 +62,7 @@ struct damon_target {
 struct damon_ctx;
 
 /**
- * struct damon_primitive	Monitoring primitives for given use cases.
+ * struct damon_primitive - Monitoring primitives for given use cases.
  *
  * @init:			Initialize primitive-internal data structures.
  * @update:			Update primitive-internal data structures.
@@ -108,8 +108,8 @@ struct damon_primitive {
 	void (*cleanup)(struct damon_ctx *context);
 };
 
-/*
- * struct damon_callback	Monitoring events notification callbacks.
+/**
+ * struct damon_callback - Monitoring events notification callbacks.
  *
  * @before_start:	Called before starting the monitoring.
  * @after_sampling:	Called after each sampling.
-- 
cgit v1.2.3


From fda504fade7f124858d7022341dc46ff35b45274 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:46:18 -0700
Subject: mm/damon/core: account age of target regions

Patch series "Implement Data Access Monitoring-based Memory Operation Schemes".

Introduction
============

DAMON[1] can be used as a primitive for data access aware memory
management optimizations.  For that, users who want such optimizations
should run DAMON, read the monitoring results, analyze it, plan a new
memory management scheme, and apply the new scheme by themselves.  Such
efforts will be inevitable for some complicated optimizations.

However, in many other cases, the users would simply want the system to
apply a memory management action to a memory region of a specific size
having a specific access frequency for a specific time.  For example,
"page out a memory region larger than 100 MiB keeping only rare accesses
more than 2 minutes", or "Do not use THP for a memory region larger than
2 MiB rarely accessed for more than 1 seconds".

To make the works easier and non-redundant, this patchset implements a
new feature of DAMON, which is called Data Access Monitoring-based
Operation Schemes (DAMOS).  Using the feature, users can describe the
normal schemes in a simple way and ask DAMON to execute those on its
own.

[1] https://damonitor.github.io

Evaluations
===========

DAMOS is accurate and useful for memory management optimizations.  An
experimental DAMON-based operation scheme for THP, 'ethp', removes
76.15% of THP memory overheads while preserving 51.25% of THP speedup.
Another experimental DAMON-based 'proactive reclamation' implementation,
'prcl', reduces 93.38% of residential sets and 23.63% of system memory
footprint while incurring only 1.22% runtime overhead in the best case
(parsec3/freqmine).

NOTE that the experimental THP optimization and proactive reclamation
are not for production but only for proof of concepts.

Please refer to the showcase web site's evaluation document[1] for
detailed evaluation setup and results.

[1] https://damonitor.github.io/doc/html/v34/vm/damon/eval.html

Long-term Support Trees
-----------------------

For people who want to test DAMON but using LTS kernels, there are
another couple of trees based on two latest LTS kernels respectively and
containing the 'damon/master' backports.

- For v5.4.y: https://git.kernel.org/sj/h/damon/for-v5.4.y
- For v5.10.y: https://git.kernel.org/sj/h/damon/for-v5.10.y

Sequence Of Patches
===================

The 1st patch accounts age of each region.  The 2nd patch implements the
core of the DAMON-based operation schemes feature.  The 3rd patch makes
the default monitoring primitives for virtual address spaces to support
the schemes.  From this point, the kernel space users can use DAMOS.
The 4th patch exports the feature to the user space via the debugfs
interface.  The 5th patch implements schemes statistics feature for
easier tuning of the schemes and runtime access pattern analysis, and
the 6th patch adds selftests for these changes.  Finally, the 7th patch
documents this new feature.

This patch (of 7):

DAMON can be used for data access pattern aware memory management
optimizations.  For that, users should run DAMON, read the monitoring
results, analyze it, plan a new memory management scheme, and apply the
new scheme by themselves.  It would not be too hard, but still require
some level of effort.  For complicated cases, this effort is inevitable.

That said, in many cases, users would simply want to apply an actions to
a memory region of a specific size having a specific access frequency
for a specific time.  For example, "page out a memory region larger than
100 MiB but having a low access frequency more than 10 minutes", or "Use
THP for a memory region larger than 2 MiB having a high access frequency
for more than 2 seconds".

For such optimizations, users will need to first account the age of each
region themselves.  To reduce such efforts, this implements a simple age
account of each region in DAMON.  For each aggregation step, DAMON
compares the access frequency with that from last aggregation and reset
the age of the region if the change is significant.  Else, the age is
incremented.  Also, in case of the merge of regions, the region
size-weighted average of the ages is set as the age of merged new
region.

Link: https://lkml.kernel.org/r/20211001125604.29660-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211001125604.29660-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Marco Elver <elver@google.com>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Greg Thelen <gthelen@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: David Rienjes <rientjes@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 10 ++++++++++
 mm/damon/core.c       | 13 +++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 755d70804705..3e8215debbd4 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -31,12 +31,22 @@ struct damon_addr_range {
  * @sampling_addr:	Address of the sample for the next access check.
  * @nr_accesses:	Access frequency of this region.
  * @list:		List head for siblings.
+ * @age:		Age of this region.
+ *
+ * @age is initially zero, increased for each aggregation interval, and reset
+ * to zero again if the access frequency is significantly changed.  If two
+ * regions are merged into a new region, both @nr_accesses and @age of the new
+ * region are set as region size-weighted average of those of the two regions.
  */
 struct damon_region {
 	struct damon_addr_range ar;
 	unsigned long sampling_addr;
 	unsigned int nr_accesses;
 	struct list_head list;
+
+	unsigned int age;
+/* private: Internal value for age calculation. */
+	unsigned int last_nr_accesses;
 };
 
 /**
diff --git a/mm/damon/core.c b/mm/damon/core.c
index d993db50280c..3efbe80779db 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -45,6 +45,9 @@ struct damon_region *damon_new_region(unsigned long start, unsigned long end)
 	region->nr_accesses = 0;
 	INIT_LIST_HEAD(&region->list);
 
+	region->age = 0;
+	region->last_nr_accesses = 0;
+
 	return region;
 }
 
@@ -444,6 +447,7 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
 
 		damon_for_each_region(r, t) {
 			trace_damon_aggregated(t, r, damon_nr_regions(t));
+			r->last_nr_accesses = r->nr_accesses;
 			r->nr_accesses = 0;
 		}
 	}
@@ -461,6 +465,7 @@ static void damon_merge_two_regions(struct damon_target *t,
 
 	l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) /
 			(sz_l + sz_r);
+	l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r);
 	l->ar.end = r->ar.end;
 	damon_destroy_region(r, t);
 }
@@ -480,6 +485,11 @@ static void damon_merge_regions_of(struct damon_target *t, unsigned int thres,
 	struct damon_region *r, *prev = NULL, *next;
 
 	damon_for_each_region_safe(r, next, t) {
+		if (diff_of(r->nr_accesses, r->last_nr_accesses) > thres)
+			r->age = 0;
+		else
+			r->age++;
+
 		if (prev && prev->ar.end == r->ar.start &&
 		    diff_of(prev->nr_accesses, r->nr_accesses) <= thres &&
 		    sz_damon_region(prev) + sz_damon_region(r) <= sz_limit)
@@ -527,6 +537,9 @@ static void damon_split_region_at(struct damon_ctx *ctx,
 
 	r->ar.end = new->ar.start;
 
+	new->age = r->age;
+	new->last_nr_accesses = r->last_nr_accesses;
+
 	damon_insert_region(new, r, damon_next_region(r), t);
 }
 
-- 
cgit v1.2.3


From 1f366e421c8f69583ed37b56d86e3747331869c3 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:46:22 -0700
Subject: mm/damon/core: implement DAMON-based Operation Schemes (DAMOS)

In many cases, users might use DAMON for simple data access aware memory
management optimizations such as applying an operation scheme to a
memory region of a specific size having a specific access frequency for
a specific time.  For example, "page out a memory region larger than 100
MiB but having a low access frequency more than 10 minutes", or "Use THP
for a memory region larger than 2 MiB having a high access frequency for
more than 2 seconds".

Most simple form of the solution would be doing offline data access
pattern profiling using DAMON and modifying the application source code
or system configuration based on the profiling results.  Or, developing
a daemon constructed with two modules (one for access monitoring and the
other for applying memory management actions via mlock(), madvise(),
sysctl, etc) is imaginable.

To avoid users spending their time for implementation of such simple
data access monitoring-based operation schemes, this makes DAMON to
handle such schemes directly.  With this change, users can simply
specify their desired schemes to DAMON.  Then, DAMON will automatically
apply the schemes to the user-specified target processes.

Each of the schemes is composed with conditions for filtering of the
target memory regions and desired memory management action for the
target.  Specifically, the format is::

    <min/max size> <min/max access frequency> <min/max age> <action>

The filtering conditions are size of memory region, number of accesses
to the region monitored by DAMON, and the age of the region.  The age of
region is incremented periodically but reset when its addresses or
access frequency has significantly changed or the action of a scheme was
applied.  For the action, current implementation supports a few of
madvise()-like hints, ``WILLNEED``, ``COLD``, ``PAGEOUT``, ``HUGEPAGE``,
and ``NOHUGEPAGE``.

Because DAMON supports various address spaces and application of the
actions to a monitoring target region is dependent to the type of the
target address space, the application code should be implemented by each
primitives and registered to the framework.  Note that this only
implements the framework part.  Following commit will implement the
action applications for virtual address spaces primitives.

Link: https://lkml.kernel.org/r/20211001125604.29660-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rienjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  66 ++++++++++++++++++++++++++++++
 mm/damon/core.c       | 109 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 175 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 3e8215debbd4..dbe18b0fb795 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -69,6 +69,48 @@ struct damon_target {
 	struct list_head list;
 };
 
+/**
+ * enum damos_action - Represents an action of a Data Access Monitoring-based
+ * Operation Scheme.
+ *
+ * @DAMOS_WILLNEED:	Call ``madvise()`` for the region with MADV_WILLNEED.
+ * @DAMOS_COLD:		Call ``madvise()`` for the region with MADV_COLD.
+ * @DAMOS_PAGEOUT:	Call ``madvise()`` for the region with MADV_PAGEOUT.
+ * @DAMOS_HUGEPAGE:	Call ``madvise()`` for the region with MADV_HUGEPAGE.
+ * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ */
+enum damos_action {
+	DAMOS_WILLNEED,
+	DAMOS_COLD,
+	DAMOS_PAGEOUT,
+	DAMOS_HUGEPAGE,
+	DAMOS_NOHUGEPAGE,
+};
+
+/**
+ * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
+ * @min_sz_region:	Minimum size of target regions.
+ * @max_sz_region:	Maximum size of target regions.
+ * @min_nr_accesses:	Minimum ``->nr_accesses`` of target regions.
+ * @max_nr_accesses:	Maximum ``->nr_accesses`` of target regions.
+ * @min_age_region:	Minimum age of target regions.
+ * @max_age_region:	Maximum age of target regions.
+ * @action:		&damo_action to be applied to the target regions.
+ * @list:		List head for siblings.
+ *
+ * Note that both the minimums and the maximums are inclusive.
+ */
+struct damos {
+	unsigned long min_sz_region;
+	unsigned long max_sz_region;
+	unsigned int min_nr_accesses;
+	unsigned int max_nr_accesses;
+	unsigned int min_age_region;
+	unsigned int max_age_region;
+	enum damos_action action;
+	struct list_head list;
+};
+
 struct damon_ctx;
 
 /**
@@ -79,6 +121,7 @@ struct damon_ctx;
  * @prepare_access_checks:	Prepare next access check of target regions.
  * @check_accesses:		Check the accesses to target regions.
  * @reset_aggregated:		Reset aggregated accesses monitoring results.
+ * @apply_scheme:		Apply a DAMON-based operation scheme.
  * @target_valid:		Determine if the target is valid.
  * @cleanup:			Clean up the context.
  *
@@ -104,6 +147,9 @@ struct damon_ctx;
  * of its update.  The value will be used for regions adjustment threshold.
  * @reset_aggregated should reset the access monitoring results that aggregated
  * by @check_accesses.
+ * @apply_scheme is called from @kdamond when a region for user provided
+ * DAMON-based operation scheme is found.  It should apply the scheme's action
+ * to the region.  This is not used for &DAMON_ARBITRARY_TARGET case.
  * @target_valid should check whether the target is still valid for the
  * monitoring.
  * @cleanup is called from @kdamond just before its termination.
@@ -114,6 +160,8 @@ struct damon_primitive {
 	void (*prepare_access_checks)(struct damon_ctx *context);
 	unsigned int (*check_accesses)(struct damon_ctx *context);
 	void (*reset_aggregated)(struct damon_ctx *context);
+	int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t,
+			struct damon_region *r, struct damos *scheme);
 	bool (*target_valid)(void *target);
 	void (*cleanup)(struct damon_ctx *context);
 };
@@ -192,6 +240,7 @@ struct damon_callback {
  * @min_nr_regions:	The minimum number of adaptive monitoring regions.
  * @max_nr_regions:	The maximum number of adaptive monitoring regions.
  * @adaptive_targets:	Head of monitoring targets (&damon_target) list.
+ * @schemes:		Head of schemes (&damos) list.
  */
 struct damon_ctx {
 	unsigned long sample_interval;
@@ -213,6 +262,7 @@ struct damon_ctx {
 	unsigned long min_nr_regions;
 	unsigned long max_nr_regions;
 	struct list_head adaptive_targets;
+	struct list_head schemes;
 };
 
 #define damon_next_region(r) \
@@ -233,6 +283,12 @@ struct damon_ctx {
 #define damon_for_each_target_safe(t, next, ctx)	\
 	list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list)
 
+#define damon_for_each_scheme(s, ctx) \
+	list_for_each_entry(s, &(ctx)->schemes, list)
+
+#define damon_for_each_scheme_safe(s, next, ctx) \
+	list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
+
 #ifdef CONFIG_DAMON
 
 struct damon_region *damon_new_region(unsigned long start, unsigned long end);
@@ -242,6 +298,14 @@ inline void damon_insert_region(struct damon_region *r,
 void damon_add_region(struct damon_region *r, struct damon_target *t);
 void damon_destroy_region(struct damon_region *r, struct damon_target *t);
 
+struct damos *damon_new_scheme(
+		unsigned long min_sz_region, unsigned long max_sz_region,
+		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+		unsigned int min_age_region, unsigned int max_age_region,
+		enum damos_action action);
+void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
+void damon_destroy_scheme(struct damos *s);
+
 struct damon_target *damon_new_target(unsigned long id);
 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
 void damon_free_target(struct damon_target *t);
@@ -255,6 +319,8 @@ int damon_set_targets(struct damon_ctx *ctx,
 int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 		unsigned long aggr_int, unsigned long primitive_upd_int,
 		unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_set_schemes(struct damon_ctx *ctx,
+			struct damos **schemes, ssize_t nr_schemes);
 int damon_nr_running_ctxs(void);
 
 int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 3efbe80779db..0ed97b21cbb6 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -85,6 +85,50 @@ void damon_destroy_region(struct damon_region *r, struct damon_target *t)
 	damon_free_region(r);
 }
 
+struct damos *damon_new_scheme(
+		unsigned long min_sz_region, unsigned long max_sz_region,
+		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
+		unsigned int min_age_region, unsigned int max_age_region,
+		enum damos_action action)
+{
+	struct damos *scheme;
+
+	scheme = kmalloc(sizeof(*scheme), GFP_KERNEL);
+	if (!scheme)
+		return NULL;
+	scheme->min_sz_region = min_sz_region;
+	scheme->max_sz_region = max_sz_region;
+	scheme->min_nr_accesses = min_nr_accesses;
+	scheme->max_nr_accesses = max_nr_accesses;
+	scheme->min_age_region = min_age_region;
+	scheme->max_age_region = max_age_region;
+	scheme->action = action;
+	INIT_LIST_HEAD(&scheme->list);
+
+	return scheme;
+}
+
+void damon_add_scheme(struct damon_ctx *ctx, struct damos *s)
+{
+	list_add_tail(&s->list, &ctx->schemes);
+}
+
+static void damon_del_scheme(struct damos *s)
+{
+	list_del(&s->list);
+}
+
+static void damon_free_scheme(struct damos *s)
+{
+	kfree(s);
+}
+
+void damon_destroy_scheme(struct damos *s)
+{
+	damon_del_scheme(s);
+	damon_free_scheme(s);
+}
+
 /*
  * Construct a damon_target struct
  *
@@ -156,6 +200,7 @@ struct damon_ctx *damon_new_ctx(void)
 	ctx->max_nr_regions = 1000;
 
 	INIT_LIST_HEAD(&ctx->adaptive_targets);
+	INIT_LIST_HEAD(&ctx->schemes);
 
 	return ctx;
 }
@@ -175,7 +220,13 @@ static void damon_destroy_targets(struct damon_ctx *ctx)
 
 void damon_destroy_ctx(struct damon_ctx *ctx)
 {
+	struct damos *s, *next_s;
+
 	damon_destroy_targets(ctx);
+
+	damon_for_each_scheme_safe(s, next_s, ctx)
+		damon_destroy_scheme(s);
+
 	kfree(ctx);
 }
 
@@ -250,6 +301,30 @@ int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
 	return 0;
 }
 
+/**
+ * damon_set_schemes() - Set data access monitoring based operation schemes.
+ * @ctx:	monitoring context
+ * @schemes:	array of the schemes
+ * @nr_schemes:	number of entries in @schemes
+ *
+ * This function should not be called while the kdamond of the context is
+ * running.
+ *
+ * Return: 0 if success, or negative error code otherwise.
+ */
+int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes,
+			ssize_t nr_schemes)
+{
+	struct damos *s, *next;
+	ssize_t i;
+
+	damon_for_each_scheme_safe(s, next, ctx)
+		damon_destroy_scheme(s);
+	for (i = 0; i < nr_schemes; i++)
+		damon_add_scheme(ctx, schemes[i]);
+	return 0;
+}
+
 /**
  * damon_nr_running_ctxs() - Return number of currently running contexts.
  */
@@ -453,6 +528,39 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
 	}
 }
 
+static void damon_do_apply_schemes(struct damon_ctx *c,
+				   struct damon_target *t,
+				   struct damon_region *r)
+{
+	struct damos *s;
+	unsigned long sz;
+
+	damon_for_each_scheme(s, c) {
+		sz = r->ar.end - r->ar.start;
+		if (sz < s->min_sz_region || s->max_sz_region < sz)
+			continue;
+		if (r->nr_accesses < s->min_nr_accesses ||
+				s->max_nr_accesses < r->nr_accesses)
+			continue;
+		if (r->age < s->min_age_region || s->max_age_region < r->age)
+			continue;
+		if (c->primitive.apply_scheme)
+			c->primitive.apply_scheme(c, t, r, s);
+		r->age = 0;
+	}
+}
+
+static void kdamond_apply_schemes(struct damon_ctx *c)
+{
+	struct damon_target *t;
+	struct damon_region *r;
+
+	damon_for_each_target(t, c) {
+		damon_for_each_region(r, t)
+			damon_do_apply_schemes(c, t, r);
+	}
+}
+
 #define sz_damon_region(r) (r->ar.end - r->ar.start)
 
 /*
@@ -693,6 +801,7 @@ static int kdamond_fn(void *data)
 			if (ctx->callback.after_aggregation &&
 					ctx->callback.after_aggregation(ctx))
 				set_kdamond_stop(ctx);
+			kdamond_apply_schemes(ctx);
 			kdamond_reset_aggregated(ctx);
 			kdamond_split_regions(ctx);
 			if (ctx->primitive.reset_aggregated)
-- 
cgit v1.2.3


From 6dea8add4d2875b80843e4a4c8acd334a4db8c8f Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:46:25 -0700
Subject: mm/damon/vaddr: support DAMON-based Operation Schemes

This makes DAMON's default primitives for virtual address spaces to
support DAMON-based Operation Schemes (DAMOS) by implementing actions
application functions and registering it to the monitoring context.  The
implementation simply links 'madvise()' for related DAMOS actions.  That
is, 'madvise(MADV_WILLNEED)' is called for 'WILLNEED' DAMOS action and
similar for other actions ('COLD', 'PAGEOUT', 'HUGEPAGE', 'NOHUGEPAGE').

So, the kernel space DAMON users can now use the DAMON-based
optimizations with only small amount of code.

Link: https://lkml.kernel.org/r/20211001125604.29660-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rienjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  2 ++
 mm/damon/vaddr.c      | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index dbe18b0fb795..be6b6e81e8ee 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -337,6 +337,8 @@ void damon_va_prepare_access_checks(struct damon_ctx *ctx);
 unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
 bool damon_va_target_valid(void *t);
 void damon_va_cleanup(struct damon_ctx *ctx);
+int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme);
 void damon_va_set_primitives(struct damon_ctx *ctx);
 
 #endif	/* CONFIG_DAMON_VADDR */
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 58c1fb2aafa9..3e1c74d36bab 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -7,6 +7,7 @@
 
 #define pr_fmt(fmt) "damon-va: " fmt
 
+#include <asm-generic/mman-common.h>
 #include <linux/damon.h>
 #include <linux/hugetlb.h>
 #include <linux/mm.h>
@@ -658,6 +659,60 @@ bool damon_va_target_valid(void *target)
 	return false;
 }
 
+#ifndef CONFIG_ADVISE_SYSCALLS
+static int damos_madvise(struct damon_target *target, struct damon_region *r,
+			int behavior)
+{
+	return -EINVAL;
+}
+#else
+static int damos_madvise(struct damon_target *target, struct damon_region *r,
+			int behavior)
+{
+	struct mm_struct *mm;
+	int ret = -ENOMEM;
+
+	mm = damon_get_mm(target);
+	if (!mm)
+		goto out;
+
+	ret = do_madvise(mm, PAGE_ALIGN(r->ar.start),
+			PAGE_ALIGN(r->ar.end - r->ar.start), behavior);
+	mmput(mm);
+out:
+	return ret;
+}
+#endif	/* CONFIG_ADVISE_SYSCALLS */
+
+int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme)
+{
+	int madv_action;
+
+	switch (scheme->action) {
+	case DAMOS_WILLNEED:
+		madv_action = MADV_WILLNEED;
+		break;
+	case DAMOS_COLD:
+		madv_action = MADV_COLD;
+		break;
+	case DAMOS_PAGEOUT:
+		madv_action = MADV_PAGEOUT;
+		break;
+	case DAMOS_HUGEPAGE:
+		madv_action = MADV_HUGEPAGE;
+		break;
+	case DAMOS_NOHUGEPAGE:
+		madv_action = MADV_NOHUGEPAGE;
+		break;
+	default:
+		pr_warn("Wrong action %d\n", scheme->action);
+		return -EINVAL;
+	}
+
+	return damos_madvise(t, r, madv_action);
+}
+
 void damon_va_set_primitives(struct damon_ctx *ctx)
 {
 	ctx->primitive.init = damon_va_init;
@@ -667,6 +722,7 @@ void damon_va_set_primitives(struct damon_ctx *ctx)
 	ctx->primitive.reset_aggregated = NULL;
 	ctx->primitive.target_valid = damon_va_target_valid;
 	ctx->primitive.cleanup = NULL;
+	ctx->primitive.apply_scheme = damon_va_apply_scheme;
 }
 
 #include "vaddr-test.h"
-- 
cgit v1.2.3


From 2f0b548c9f03a78f4ce6ab48986e3108028936a6 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:46:32 -0700
Subject: mm/damon/schemes: implement statistics feature

To tune the DAMON-based operation schemes, knowing how many and how
large regions are affected by each of the schemes will be helful.  Those
stats could be used for not only the tuning, but also monitoring of the
working set size and the number of regions, if the scheme does not
change the program behavior too much.

For the reason, this implements the statistics for the schemes.  The
total number and size of the regions that each scheme is applied are
exported to users via '->stat_count' and '->stat_sz' of 'struct damos'.
Admins can also check the number by reading 'schemes' debugfs file.  The
last two integers now represents the stats.  To allow collecting the
stats without changing the program behavior, this also adds new scheme
action, 'DAMOS_STAT'.  Note that 'DAMOS_STAT' is not only making no
memory operation actions, but also does not reset the age of regions.

Link: https://lkml.kernel.org/r/20211001125604.29660-6-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rienjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 10 +++++++++-
 mm/damon/core.c       |  7 ++++++-
 mm/damon/dbgfs.c      |  5 +++--
 mm/damon/vaddr.c      |  2 ++
 4 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index be6b6e81e8ee..f301bb53381c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -78,6 +78,7 @@ struct damon_target {
  * @DAMOS_PAGEOUT:	Call ``madvise()`` for the region with MADV_PAGEOUT.
  * @DAMOS_HUGEPAGE:	Call ``madvise()`` for the region with MADV_HUGEPAGE.
  * @DAMOS_NOHUGEPAGE:	Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_STAT:		Do nothing but count the stat.
  */
 enum damos_action {
 	DAMOS_WILLNEED,
@@ -85,6 +86,7 @@ enum damos_action {
 	DAMOS_PAGEOUT,
 	DAMOS_HUGEPAGE,
 	DAMOS_NOHUGEPAGE,
+	DAMOS_STAT,		/* Do nothing but only record the stat */
 };
 
 /**
@@ -96,9 +98,13 @@ enum damos_action {
  * @min_age_region:	Minimum age of target regions.
  * @max_age_region:	Maximum age of target regions.
  * @action:		&damo_action to be applied to the target regions.
+ * @stat_count:		Total number of regions that this scheme is applied.
+ * @stat_sz:		Total size of regions that this scheme is applied.
  * @list:		List head for siblings.
  *
- * Note that both the minimums and the maximums are inclusive.
+ * For each aggregation interval, DAMON applies @action to monitoring target
+ * regions fit in the condition and updates the statistics.  Note that both
+ * the minimums and the maximums are inclusive.
  */
 struct damos {
 	unsigned long min_sz_region;
@@ -108,6 +114,8 @@ struct damos {
 	unsigned int min_age_region;
 	unsigned int max_age_region;
 	enum damos_action action;
+	unsigned long stat_count;
+	unsigned long stat_sz;
 	struct list_head list;
 };
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 0ed97b21cbb6..2f6785737902 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -103,6 +103,8 @@ struct damos *damon_new_scheme(
 	scheme->min_age_region = min_age_region;
 	scheme->max_age_region = max_age_region;
 	scheme->action = action;
+	scheme->stat_count = 0;
+	scheme->stat_sz = 0;
 	INIT_LIST_HEAD(&scheme->list);
 
 	return scheme;
@@ -544,9 +546,12 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 			continue;
 		if (r->age < s->min_age_region || s->max_age_region < r->age)
 			continue;
+		s->stat_count++;
+		s->stat_sz += sz;
 		if (c->primitive.apply_scheme)
 			c->primitive.apply_scheme(c, t, r, s);
-		r->age = 0;
+		if (s->action != DAMOS_STAT)
+			r->age = 0;
 	}
 }
 
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 78b7a04490c5..28d6abf27763 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -106,11 +106,11 @@ static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len)
 
 	damon_for_each_scheme(s, c) {
 		rc = scnprintf(&buf[written], len - written,
-				"%lu %lu %u %u %u %u %d\n",
+				"%lu %lu %u %u %u %u %d %lu %lu\n",
 				s->min_sz_region, s->max_sz_region,
 				s->min_nr_accesses, s->max_nr_accesses,
 				s->min_age_region, s->max_age_region,
-				s->action);
+				s->action, s->stat_count, s->stat_sz);
 		if (!rc)
 			return -ENOMEM;
 
@@ -159,6 +159,7 @@ static bool damos_action_valid(int action)
 	case DAMOS_PAGEOUT:
 	case DAMOS_HUGEPAGE:
 	case DAMOS_NOHUGEPAGE:
+	case DAMOS_STAT:
 		return true;
 	default:
 		return false;
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 3e1c74d36bab..953c145b4f08 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -705,6 +705,8 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
 	case DAMOS_NOHUGEPAGE:
 		madv_action = MADV_NOHUGEPAGE;
 		break;
+	case DAMOS_STAT:
+		return 0;
 	default:
 		pr_warn("Wrong action %d\n", scheme->action);
 		return -EINVAL;
-- 
cgit v1.2.3


From a28397beb55b68bd0f15c6778540e8ae1bc26d21 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:46:56 -0700
Subject: mm/damon: implement primitives for physical address space monitoring

This implements the monitoring primitives for the physical memory
address space.  Internally, it uses the PTE Accessed bit, similar to
that of the virtual address spaces monitoring primitives.  It supports
only user memory pages, as idle pages tracking does.  If the monitoring
target physical memory address range contains non-user memory pages,
access check of the pages will do nothing but simply treat the pages as
not accessed.

Link: https://lkml.kernel.org/r/20211012205711.29216-6-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rienjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  10 +++
 mm/damon/Kconfig      |   8 ++
 mm/damon/Makefile     |   1 +
 mm/damon/paddr.c      | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 243 insertions(+)
 create mode 100644 mm/damon/paddr.c

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index f301bb53381c..715dadd21f7c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -351,4 +351,14 @@ void damon_va_set_primitives(struct damon_ctx *ctx);
 
 #endif	/* CONFIG_DAMON_VADDR */
 
+#ifdef CONFIG_DAMON_PADDR
+
+/* Monitoring primitives for the physical memory address space */
+void damon_pa_prepare_access_checks(struct damon_ctx *ctx);
+unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
+bool damon_pa_target_valid(void *t);
+void damon_pa_set_primitives(struct damon_ctx *ctx);
+
+#endif	/* CONFIG_DAMON_PADDR */
+
 #endif	/* _DAMON_H */
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig
index ba8898c7eb8e..2a5923be631e 100644
--- a/mm/damon/Kconfig
+++ b/mm/damon/Kconfig
@@ -32,6 +32,14 @@ config DAMON_VADDR
 	  This builds the default data access monitoring primitives for DAMON
 	  that work for virtual address spaces.
 
+config DAMON_PADDR
+	bool "Data access monitoring primitives for the physical address space"
+	depends on DAMON && MMU
+	select PAGE_IDLE_FLAG
+	help
+	  This builds the default data access monitoring primitives for DAMON
+	  that works for the physical address space.
+
 config DAMON_VADDR_KUNIT_TEST
 	bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS
 	depends on DAMON_VADDR && KUNIT=y
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index 99b1bfe01ff5..8d9b0df79702 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -2,4 +2,5 @@
 
 obj-$(CONFIG_DAMON)		:= core.o
 obj-$(CONFIG_DAMON_VADDR)	+= prmtv-common.o vaddr.o
+obj-$(CONFIG_DAMON_PADDR)	+= prmtv-common.o paddr.o
 obj-$(CONFIG_DAMON_DBGFS)	+= dbgfs.o
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
new file mode 100644
index 000000000000..d7a2ecd09ed0
--- /dev/null
+++ b/mm/damon/paddr.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON Primitives for The Physical Address Space
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define pr_fmt(fmt) "damon-pa: " fmt
+
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
+#include <linux/pagemap.h>
+#include <linux/rmap.h>
+
+#include "prmtv-common.h"
+
+static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
+		unsigned long addr, void *arg)
+{
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+		.address = addr,
+	};
+
+	while (page_vma_mapped_walk(&pvmw)) {
+		addr = pvmw.address;
+		if (pvmw.pte)
+			damon_ptep_mkold(pvmw.pte, vma->vm_mm, addr);
+		else
+			damon_pmdp_mkold(pvmw.pmd, vma->vm_mm, addr);
+	}
+	return true;
+}
+
+static void damon_pa_mkold(unsigned long paddr)
+{
+	struct page *page = damon_get_page(PHYS_PFN(paddr));
+	struct rmap_walk_control rwc = {
+		.rmap_one = __damon_pa_mkold,
+		.anon_lock = page_lock_anon_vma_read,
+	};
+	bool need_lock;
+
+	if (!page)
+		return;
+
+	if (!page_mapped(page) || !page_rmapping(page)) {
+		set_page_idle(page);
+		goto out;
+	}
+
+	need_lock = !PageAnon(page) || PageKsm(page);
+	if (need_lock && !trylock_page(page))
+		goto out;
+
+	rmap_walk(page, &rwc);
+
+	if (need_lock)
+		unlock_page(page);
+
+out:
+	put_page(page);
+}
+
+static void __damon_pa_prepare_access_check(struct damon_ctx *ctx,
+					    struct damon_region *r)
+{
+	r->sampling_addr = damon_rand(r->ar.start, r->ar.end);
+
+	damon_pa_mkold(r->sampling_addr);
+}
+
+void damon_pa_prepare_access_checks(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	struct damon_region *r;
+
+	damon_for_each_target(t, ctx) {
+		damon_for_each_region(r, t)
+			__damon_pa_prepare_access_check(ctx, r);
+	}
+}
+
+struct damon_pa_access_chk_result {
+	unsigned long page_sz;
+	bool accessed;
+};
+
+static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma,
+		unsigned long addr, void *arg)
+{
+	struct damon_pa_access_chk_result *result = arg;
+	struct page_vma_mapped_walk pvmw = {
+		.page = page,
+		.vma = vma,
+		.address = addr,
+	};
+
+	result->accessed = false;
+	result->page_sz = PAGE_SIZE;
+	while (page_vma_mapped_walk(&pvmw)) {
+		addr = pvmw.address;
+		if (pvmw.pte) {
+			result->accessed = pte_young(*pvmw.pte) ||
+				!page_is_idle(page) ||
+				mmu_notifier_test_young(vma->vm_mm, addr);
+		} else {
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+			result->accessed = pmd_young(*pvmw.pmd) ||
+				!page_is_idle(page) ||
+				mmu_notifier_test_young(vma->vm_mm, addr);
+			result->page_sz = ((1UL) << HPAGE_PMD_SHIFT);
+#else
+			WARN_ON_ONCE(1);
+#endif	/* CONFIG_TRANSPARENT_HUGEPAGE */
+		}
+		if (result->accessed) {
+			page_vma_mapped_walk_done(&pvmw);
+			break;
+		}
+	}
+
+	/* If accessed, stop walking */
+	return !result->accessed;
+}
+
+static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz)
+{
+	struct page *page = damon_get_page(PHYS_PFN(paddr));
+	struct damon_pa_access_chk_result result = {
+		.page_sz = PAGE_SIZE,
+		.accessed = false,
+	};
+	struct rmap_walk_control rwc = {
+		.arg = &result,
+		.rmap_one = __damon_pa_young,
+		.anon_lock = page_lock_anon_vma_read,
+	};
+	bool need_lock;
+
+	if (!page)
+		return false;
+
+	if (!page_mapped(page) || !page_rmapping(page)) {
+		if (page_is_idle(page))
+			result.accessed = false;
+		else
+			result.accessed = true;
+		put_page(page);
+		goto out;
+	}
+
+	need_lock = !PageAnon(page) || PageKsm(page);
+	if (need_lock && !trylock_page(page)) {
+		put_page(page);
+		return NULL;
+	}
+
+	rmap_walk(page, &rwc);
+
+	if (need_lock)
+		unlock_page(page);
+	put_page(page);
+
+out:
+	*page_sz = result.page_sz;
+	return result.accessed;
+}
+
+static void __damon_pa_check_access(struct damon_ctx *ctx,
+				    struct damon_region *r)
+{
+	static unsigned long last_addr;
+	static unsigned long last_page_sz = PAGE_SIZE;
+	static bool last_accessed;
+
+	/* If the region is in the last checked page, reuse the result */
+	if (ALIGN_DOWN(last_addr, last_page_sz) ==
+				ALIGN_DOWN(r->sampling_addr, last_page_sz)) {
+		if (last_accessed)
+			r->nr_accesses++;
+		return;
+	}
+
+	last_accessed = damon_pa_young(r->sampling_addr, &last_page_sz);
+	if (last_accessed)
+		r->nr_accesses++;
+
+	last_addr = r->sampling_addr;
+}
+
+unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
+{
+	struct damon_target *t;
+	struct damon_region *r;
+	unsigned int max_nr_accesses = 0;
+
+	damon_for_each_target(t, ctx) {
+		damon_for_each_region(r, t) {
+			__damon_pa_check_access(ctx, r);
+			max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
+		}
+	}
+
+	return max_nr_accesses;
+}
+
+bool damon_pa_target_valid(void *t)
+{
+	return true;
+}
+
+void damon_pa_set_primitives(struct damon_ctx *ctx)
+{
+	ctx->primitive.init = NULL;
+	ctx->primitive.update = NULL;
+	ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks;
+	ctx->primitive.check_accesses = damon_pa_check_accesses;
+	ctx->primitive.reset_aggregated = NULL;
+	ctx->primitive.target_valid = damon_pa_target_valid;
+	ctx->primitive.cleanup = NULL;
+	ctx->primitive.apply_scheme = NULL;
+}
-- 
cgit v1.2.3


From 57223ac295845b1d72ec1bd02b5fab992b77a021 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:13 -0700
Subject: mm/damon/paddr: support the pageout scheme

Introduction
============

This patchset 1) makes the engine for general data access
pattern-oriented memory management (DAMOS) be more useful for production
environments, and 2) implements a static kernel module for lightweight
proactive reclamation using the engine.

Proactive Reclamation
---------------------

On general memory over-committed systems, proactively reclaiming cold
pages helps saving memory and reducing latency spikes that incurred by
the direct reclaim or the CPU consumption of kswapd, while incurring
only minimal performance degradation[2].

A Free Pages Reporting[8] based memory over-commit virtualization system
would be one more specific use case.  In the system, the guest VMs
reports their free memory to host, and the host reallocates the reported
memory to other guests.  As a result, the system's memory utilization
can be maximized.  However, the guests could be not so memory-frugal,
because some kernel subsystems and user-space applications are designed
to use as much memory as available.  Then, guests would report only
small amount of free memory to host, results in poor memory utilization.
Running the proactive reclamation in such guests could help mitigating
this problem.

Google has also implemented this idea and using it in their data center.
They further proposed upstreaming it in LSFMM'19, and "the general
consensus was that, while this sort of proactive reclaim would be useful
for a number of users, the cost of this particular solution was too high
to consider merging it upstream"[3].  The cost mainly comes from the
coldness tracking.  Roughly speaking, the implementation periodically
scans the 'Accessed' bit of each page.  For the reason, the overhead
linearly increases as the size of the memory and the scanning frequency
grows.  As a result, Google is known to dedicating one CPU for the work.
That's a reasonable option to someone like Google, but it wouldn't be so
to some others.

DAMON and DAMOS: An engine for data access pattern-oriented memory management
-----------------------------------------------------------------------------

DAMON[4] is a framework for general data access monitoring.  Its
adaptive monitoring overhead control feature minimizes its monitoring
overhead.  It also let the upper-bound of the overhead be configurable
by clients, regardless of the size of the monitoring target memory.
While monitoring 70 GiB memory of a production system every 5
milliseconds, it consumes less than 1% single CPU time.  For this, it
could sacrify some of the quality of the monitoring results.
Nevertheless, the lower-bound of the quality is configurable, and it
uses a best-effort algorithm for better quality.  Our test results[5]
show the quality is practical enough.  From the production system
monitoring, we were able to find a 4 KiB region in the 70 GiB memory
that shows highest access frequency.

We normally don't monitor the data access pattern just for fun but to
improve something like memory management.  Proactive reclamation is one
such usage.  For such general cases, DAMON provides a feature called
DAMon-based Operation Schemes (DAMOS)[6].  It makes DAMON an engine for
general data access pattern oriented memory management.  Using this,
clients can ask DAMON to find memory regions of specific data access
pattern and apply some memory management action (e.g., page out, move to
head of the LRU list, use huge page, ...).  We call the request
'scheme'.

Proactive Reclamation on top of DAMON/DAMOS
-------------------------------------------

Therefore, by using DAMON for the cold pages detection, the proactive
reclamation's monitoring overhead issue can be solved.  Actually, we
previously implemented a version of proactive reclamation using DAMOS
and achieved noticeable improvements with our evaluation setup[5].
Nevertheless, it more for a proof-of-concept, rather than production
uses.  It supports only virtual address spaces of processes, and require
additional tuning efforts for given workloads and the hardware.  For the
tuning, we introduced a simple auto-tuning user space tool[8].  Google
is also known to using a ML-based similar approach for their fleets[2].
But, making it just works with intuitive knobs in the kernel would be
helpful for general users.

To this end, this patchset improves DAMOS to be ready for such
production usages, and implements another version of the proactive
reclamation, namely DAMON_RECLAIM, on top of it.

DAMOS Improvements: Aggressiveness Control, Prioritization, and Watermarks
--------------------------------------------------------------------------

First of all, the current version of DAMOS supports only virtual address
spaces.  This patchset makes it supports the physical address space for
the page out action.

Next major problem of the current version of DAMOS is the lack of the
aggressiveness control, which can results in arbitrary overhead.  For
example, if huge memory regions having the data access pattern of
interest are found, applying the requested action to all of the regions
could incur significant overhead.  It can be controlled by tuning the
target data access pattern with manual or automated approaches[2,7].
But, some people would prefer the kernel to just work with only
intuitive tuning or default values.

For such cases, this patchset implements a safeguard, namely time/size
quota.  Using this, the clients can specify up to how much time can be
used for applying the action, and/or up to how much memory regions the
action can be applied within a user-specified time duration.  A followup
question is, to which memory regions should the action applied within
the limits? We implement a simple regions prioritization mechanism for
each action and make DAMOS to apply the action to high priority regions
first.  It also allows clients tune the prioritization mechanism to use
different weights for size, access frequency, and age of memory regions.
This means we could use not only LRU but also LFU or some fancy
algorithms like CAR[9] with lightweight overhead.

Though DAMON is lightweight, someone would want to remove even the cold
pages monitoring overhead when it is unnecessary.  Currently, it should
manually turned on and off by clients, but some clients would simply
want to turn it on and off based on some metrics like free memory ratio
or memory fragmentation.  For such cases, this patchset implements a
watermarks-based automatic activation feature.  It allows the clients
configure the metric of their interest, and three watermarks of the
metric.  If the metric is higher than the high watermark or lower than
the low watermark, the scheme is deactivated.  If the metric is lower
than the mid watermark but higher than the low watermark, the scheme is
activated.

DAMON-based Reclaim
-------------------

Using the improved version of DAMOS, this patchset implements a static
kernel module called 'damon_reclaim'.  It finds memory regions that
didn't accessed for specific time duration and page out.  Consuming too
much CPU for the paging out operations, or doing pageout too frequently
can be critical for systems configuring their swap devices with
software-defined in-memory block devices like zram/zswap or total number
of writes limited devices like SSDs, respectively.  To avoid the
problems, the time/size quotas can be configured.  Under the quotas, it
pages out memory regions that didn't accessed longer first.  Also, to
remove the monitoring overhead under peaceful situation, and to fall
back to the LRU-list based page granularity reclamation when it doesn't
make progress, the three watermarks based activation mechanism is used,
with the free memory ratio as the watermark metric.

For convenient configurations, it provides several module parameters.
Using these, sysadmins can enable/disable it, and tune its parameters
including the coldness identification time threshold, the time/size
quotas and the three watermarks.

Evaluation
==========

In short, DAMON_RECLAIM with 50ms/s time quota and regions
prioritization on v5.15-rc5 Linux kernel with ZRAM swap device achieves
38.58% memory saving with only 1.94% runtime overhead.  For this,
DAMON_RECLAIM consumes only 4.97% of single CPU time.

Setup
-----

We evaluate DAMON_RECLAIM to show how each of the DAMOS improvements
make effect.  For this, we measure DAMON_RECLAIM's CPU consumption,
entire system memory footprint, total number of major page faults, and
runtime of 24 realistic workloads in PARSEC3 and SPLASH-2X benchmark
suites on my QEMU/KVM based virtual machine.  The virtual machine runs
on an i3.metal AWS instance, has 130GiB memory, and runs a linux kernel
built on latest -mm tree[1] plus this patchset.  It also utilizes a 4
GiB ZRAM swap device.  We repeats the measurement 5 times and use
averages.

[1] https://github.com/hnaz/linux-mm/tree/v5.15-rc5-mmots-2021-10-13-19-55

Detailed Results
----------------

The results are summarized in the below table.

With coldness identification threshold of 5 seconds, DAMON_RECLAIM
without the time quota-based speed limit achieves 47.21% memory saving,
but incur 4.59% runtime slowdown to the workloads on average.  For this,
DAMON_RECLAIM consumes about 11.28% single CPU time.

Applying time quotas of 200ms/s, 50ms/s, and 10ms/s without the regions
prioritization reduces the slowdown to 4.89%, 2.65%, and 1.5%,
respectively.  Time quota of 200ms/s (20%) makes no real change compared
to the quota unapplied version, because the quota unapplied version
consumes only 11.28% CPU time.  DAMON_RECLAIM's CPU utilization also
similarly reduced: 11.24%, 5.51%, and 2.01% of single CPU time.  That
is, the overhead is proportional to the speed limit.  Nevertheless, it
also reduces the memory saving because it becomes less aggressive.  In
detail, the three variants show 48.76%, 37.83%, and 7.85% memory saving,
respectively.

Applying the regions prioritization (page out regions that not accessed
longer first within the time quota) further reduces the performance
degradation.  Runtime slowdowns and total number of major page faults
increase has been 4.89%/218,690% -> 4.39%/166,136% (200ms/s),
2.65%/111,886% -> 1.94%/59,053% (50ms/s), and 1.5%/34,973.40% ->
2.08%/8,781.75% (10ms/s).  The runtime under 10ms/s time quota has
increased with prioritization, but apparently that's under the margin of
error.

    time quota   prioritization  memory_saving  cpu_util  slowdown  pgmajfaults overhead
    N            N               47.21%         11.28%    4.59%     194,802%
    200ms/s      N               48.76%         11.24%    4.89%     218,690%
    50ms/s       N               37.83%         5.51%     2.65%     111,886%
    10ms/s       N               7.85%          2.01%     1.5%      34,793.40%
    200ms/s      Y               50.08%         10.38%    4.39%     166,136%
    50ms/s       Y               38.58%         4.97%     1.94%     59,053%
    10ms/s       Y               3.63%          1.73%     2.08%     8,781.75%

Baseline and Complete Git Trees
===============================

The patches are based on the latest -mm tree
(v5.15-rc5-mmots-2021-10-13-19-55).  You can also clone the complete git tree
from:

    $ git clone git://github.com/sjp38/linux -b damon_reclaim/patches/v1

The web is also available:
https://git.kernel.org/pub/scm/linux/kernel/git/sj/linux.git/tag/?h=damon_reclaim/patches/v1

Sequence Of Patches
===================

The first patch makes DAMOS support the physical address space for the
page out action.  Following five patches (patches 2-6) implement the
time/size quotas.  Next four patches (patches 7-10) implement the memory
regions prioritization within the limit.  Then, three following patches
(patches 11-13) implement the watermarks-based schemes activation.

Finally, the last two patches (patches 14-15) implement and document the
DAMON-based reclamation using the advanced DAMOS.

[1] https://www.kernel.org/doc/html/v5.15-rc1/vm/damon/index.html
[2] https://research.google/pubs/pub48551/
[3] https://lwn.net/Articles/787611/
[4] https://damonitor.github.io
[5] https://damonitor.github.io/doc/html/latest/vm/damon/eval.html
[6] https://lore.kernel.org/linux-mm/20211001125604.29660-1-sj@kernel.org/
[7] https://github.com/awslabs/damoos
[8] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html
[9] https://www.usenix.org/conference/fast-04/car-clock-adaptive-replacement

This patch (of 15):

This makes the DAMON primitives for physical address space support the
pageout action for DAMON-based Operation Schemes.  With this commit,
hence, users can easily implement system-level data access-aware
reclamations using DAMOS.

[sj@kernel.org: fix missing-prototype build warning]
  Link: https://lkml.kernel.org/r/20211025064220.13904-1-sj@kernel.org

Link: https://lkml.kernel.org/r/20211019150731.16699-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211019150731.16699-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Marco Elver <elver@google.com>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Greg Thelen <gthelen@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: David Rientjes <rientjes@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  2 ++
 mm/damon/paddr.c      | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 715dadd21f7c..9a327bc787b5 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -357,6 +357,8 @@ void damon_va_set_primitives(struct damon_ctx *ctx);
 void damon_pa_prepare_access_checks(struct damon_ctx *ctx);
 unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
 bool damon_pa_target_valid(void *t);
+int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme);
 void damon_pa_set_primitives(struct damon_ctx *ctx);
 
 #endif	/* CONFIG_DAMON_PADDR */
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index d7a2ecd09ed0..957ada55de77 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -11,7 +11,9 @@
 #include <linux/page_idle.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
+#include <linux/swap.h>
 
+#include "../internal.h"
 #include "prmtv-common.h"
 
 static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma,
@@ -211,6 +213,39 @@ bool damon_pa_target_valid(void *t)
 	return true;
 }
 
+int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme)
+{
+	unsigned long addr;
+	LIST_HEAD(page_list);
+
+	if (scheme->action != DAMOS_PAGEOUT)
+		return -EINVAL;
+
+	for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) {
+		struct page *page = damon_get_page(PHYS_PFN(addr));
+
+		if (!page)
+			continue;
+
+		ClearPageReferenced(page);
+		test_and_clear_page_young(page);
+		if (isolate_lru_page(page)) {
+			put_page(page);
+			continue;
+		}
+		if (PageUnevictable(page)) {
+			putback_lru_page(page);
+		} else {
+			list_add(&page->lru, &page_list);
+			put_page(page);
+		}
+	}
+	reclaim_pages(&page_list);
+	cond_resched();
+	return 0;
+}
+
 void damon_pa_set_primitives(struct damon_ctx *ctx)
 {
 	ctx->primitive.init = NULL;
@@ -220,5 +255,5 @@ void damon_pa_set_primitives(struct damon_ctx *ctx)
 	ctx->primitive.reset_aggregated = NULL;
 	ctx->primitive.target_valid = damon_pa_target_valid;
 	ctx->primitive.cleanup = NULL;
-	ctx->primitive.apply_scheme = NULL;
+	ctx->primitive.apply_scheme = damon_pa_apply_scheme;
 }
-- 
cgit v1.2.3


From 2b8a248d5873343aa16f6c5ede30517693995f13 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:16 -0700
Subject: mm/damon/schemes: implement size quota for schemes application speed
 control

There could be arbitrarily large memory regions fulfilling the target
data access pattern of a DAMON-based operation scheme.  In the case,
applying the action of the scheme could incur too high overhead.  To
provide an intuitive way for avoiding it, this implements a feature
called size quota.  If the quota is set, DAMON tries to apply the action
only up to the given amount of memory regions within a given time
window.

Link: https://lkml.kernel.org/r/20211019150731.16699-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 36 +++++++++++++++++++++++++++----
 mm/damon/core.c       | 60 ++++++++++++++++++++++++++++++++++++++++++++-------
 mm/damon/dbgfs.c      |  4 +++-
 3 files changed, 87 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 9a327bc787b5..3a1ce9d9921c 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -89,6 +89,26 @@ enum damos_action {
 	DAMOS_STAT,		/* Do nothing but only record the stat */
 };
 
+/**
+ * struct damos_quota - Controls the aggressiveness of the given scheme.
+ * @sz:			Maximum bytes of memory that the action can be applied.
+ * @reset_interval:	Charge reset interval in milliseconds.
+ *
+ * To avoid consuming too much CPU time or IO resources for applying the
+ * &struct damos->action to large memory, DAMON allows users to set a size
+ * quota.  The quota can be set by writing non-zero values to &sz.  If the size
+ * quota is set, DAMON tries to apply the action only up to &sz bytes within
+ * &reset_interval.
+ */
+struct damos_quota {
+	unsigned long sz;
+	unsigned long reset_interval;
+
+/* private: For charging the quota */
+	unsigned long charged_sz;
+	unsigned long charged_from;
+};
+
 /**
  * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
  * @min_sz_region:	Minimum size of target regions.
@@ -98,13 +118,20 @@ enum damos_action {
  * @min_age_region:	Minimum age of target regions.
  * @max_age_region:	Maximum age of target regions.
  * @action:		&damo_action to be applied to the target regions.
+ * @quota:		Control the aggressiveness of this scheme.
  * @stat_count:		Total number of regions that this scheme is applied.
  * @stat_sz:		Total size of regions that this scheme is applied.
  * @list:		List head for siblings.
  *
- * For each aggregation interval, DAMON applies @action to monitoring target
- * regions fit in the condition and updates the statistics.  Note that both
- * the minimums and the maximums are inclusive.
+ * For each aggregation interval, DAMON finds regions which fit in the
+ * condition (&min_sz_region, &max_sz_region, &min_nr_accesses,
+ * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to
+ * those.  To avoid consuming too much CPU time or IO resources for the
+ * &action, &quota is used.
+ *
+ * After applying the &action to each region, &stat_count and &stat_sz is
+ * updated to reflect the number of regions and total size of regions that the
+ * &action is applied.
  */
 struct damos {
 	unsigned long min_sz_region;
@@ -114,6 +141,7 @@ struct damos {
 	unsigned int min_age_region;
 	unsigned int max_age_region;
 	enum damos_action action;
+	struct damos_quota quota;
 	unsigned long stat_count;
 	unsigned long stat_sz;
 	struct list_head list;
@@ -310,7 +338,7 @@ struct damos *damon_new_scheme(
 		unsigned long min_sz_region, unsigned long max_sz_region,
 		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 		unsigned int min_age_region, unsigned int max_age_region,
-		enum damos_action action);
+		enum damos_action action, struct damos_quota *quota);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 2f6785737902..cce14a0d5c72 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -89,7 +89,7 @@ struct damos *damon_new_scheme(
 		unsigned long min_sz_region, unsigned long max_sz_region,
 		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 		unsigned int min_age_region, unsigned int max_age_region,
-		enum damos_action action)
+		enum damos_action action, struct damos_quota *quota)
 {
 	struct damos *scheme;
 
@@ -107,6 +107,11 @@ struct damos *damon_new_scheme(
 	scheme->stat_sz = 0;
 	INIT_LIST_HEAD(&scheme->list);
 
+	scheme->quota.sz = quota->sz;
+	scheme->quota.reset_interval = quota->reset_interval;
+	scheme->quota.charged_sz = 0;
+	scheme->quota.charged_from = 0;
+
 	return scheme;
 }
 
@@ -530,15 +535,25 @@ static void kdamond_reset_aggregated(struct damon_ctx *c)
 	}
 }
 
+static void damon_split_region_at(struct damon_ctx *ctx,
+		struct damon_target *t, struct damon_region *r,
+		unsigned long sz_r);
+
 static void damon_do_apply_schemes(struct damon_ctx *c,
 				   struct damon_target *t,
 				   struct damon_region *r)
 {
 	struct damos *s;
-	unsigned long sz;
 
 	damon_for_each_scheme(s, c) {
-		sz = r->ar.end - r->ar.start;
+		struct damos_quota *quota = &s->quota;
+		unsigned long sz = r->ar.end - r->ar.start;
+
+		/* Check the quota */
+		if (quota->sz && quota->charged_sz >= quota->sz)
+			continue;
+
+		/* Check the target regions condition */
 		if (sz < s->min_sz_region || s->max_sz_region < sz)
 			continue;
 		if (r->nr_accesses < s->min_nr_accesses ||
@@ -546,22 +561,51 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 			continue;
 		if (r->age < s->min_age_region || s->max_age_region < r->age)
 			continue;
-		s->stat_count++;
-		s->stat_sz += sz;
-		if (c->primitive.apply_scheme)
+
+		/* Apply the scheme */
+		if (c->primitive.apply_scheme) {
+			if (quota->sz && quota->charged_sz + sz > quota->sz) {
+				sz = ALIGN_DOWN(quota->sz - quota->charged_sz,
+						DAMON_MIN_REGION);
+				if (!sz)
+					goto update_stat;
+				damon_split_region_at(c, t, r, sz);
+			}
 			c->primitive.apply_scheme(c, t, r, s);
+			quota->charged_sz += sz;
+		}
 		if (s->action != DAMOS_STAT)
 			r->age = 0;
+
+update_stat:
+		s->stat_count++;
+		s->stat_sz += sz;
 	}
 }
 
 static void kdamond_apply_schemes(struct damon_ctx *c)
 {
 	struct damon_target *t;
-	struct damon_region *r;
+	struct damon_region *r, *next_r;
+	struct damos *s;
+
+	damon_for_each_scheme(s, c) {
+		struct damos_quota *quota = &s->quota;
+
+		if (!quota->sz)
+			continue;
+
+		/* New charge window starts */
+		if (time_after_eq(jiffies, quota->charged_from +
+					msecs_to_jiffies(
+						quota->reset_interval))) {
+			quota->charged_from = jiffies;
+			quota->charged_sz = 0;
+		}
+	}
 
 	damon_for_each_target(t, c) {
-		damon_for_each_region(r, t)
+		damon_for_each_region_safe(r, next_r, t)
 			damon_do_apply_schemes(c, t, r);
 	}
 }
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index c90988a20fa4..a04bd50cc4c4 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -188,6 +188,8 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
 
 	*nr_schemes = 0;
 	while (pos < len && *nr_schemes < max_nr_schemes) {
+		struct damos_quota quota = {};
+
 		ret = sscanf(&str[pos], "%lu %lu %u %u %u %u %u%n",
 				&min_sz, &max_sz, &min_nr_a, &max_nr_a,
 				&min_age, &max_age, &action, &parsed);
@@ -200,7 +202,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
 
 		pos += parsed;
 		scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
-				min_age, max_age, action);
+				min_age, max_age, action, &quota);
 		if (!scheme)
 			goto fail;
 
-- 
cgit v1.2.3


From 50585192bc2ef9309d32dabdbb5e735679f4f128 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:20 -0700
Subject: mm/damon/schemes: skip already charged targets and regions

If DAMOS has stopped applying action in the middle of a group of memory
regions due to its size quota, it starts the work again from the
beginning of the address space in the next charge window.  If there is a
huge memory region at the beginning of the address space and it fulfills
the scheme's target data access pattern always, the action will applied
to only the region.

This mitigates the case by skipping memory regions that charged in
current charge window at the beginning of next charge window.

Link: https://lkml.kernel.org/r/20211019150731.16699-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  5 +++++
 mm/damon/core.c       | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 3a1ce9d9921c..585d985768fd 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -107,6 +107,8 @@ struct damos_quota {
 /* private: For charging the quota */
 	unsigned long charged_sz;
 	unsigned long charged_from;
+	struct damon_target *charge_target_from;
+	unsigned long charge_addr_from;
 };
 
 /**
@@ -307,6 +309,9 @@ struct damon_ctx {
 #define damon_prev_region(r) \
 	(container_of(r->list.prev, struct damon_region, list))
 
+#define damon_last_region(t) \
+	(list_last_entry(&t->regions_list, struct damon_region, list))
+
 #define damon_for_each_region(r, t) \
 	list_for_each_entry(r, &t->regions_list, list)
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index cce14a0d5c72..693b75bc3450 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -111,6 +111,8 @@ struct damos *damon_new_scheme(
 	scheme->quota.reset_interval = quota->reset_interval;
 	scheme->quota.charged_sz = 0;
 	scheme->quota.charged_from = 0;
+	scheme->quota.charge_target_from = NULL;
+	scheme->quota.charge_addr_from = 0;
 
 	return scheme;
 }
@@ -553,6 +555,37 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 		if (quota->sz && quota->charged_sz >= quota->sz)
 			continue;
 
+		/* Skip previously charged regions */
+		if (quota->charge_target_from) {
+			if (t != quota->charge_target_from)
+				continue;
+			if (r == damon_last_region(t)) {
+				quota->charge_target_from = NULL;
+				quota->charge_addr_from = 0;
+				continue;
+			}
+			if (quota->charge_addr_from &&
+					r->ar.end <= quota->charge_addr_from)
+				continue;
+
+			if (quota->charge_addr_from && r->ar.start <
+					quota->charge_addr_from) {
+				sz = ALIGN_DOWN(quota->charge_addr_from -
+						r->ar.start, DAMON_MIN_REGION);
+				if (!sz) {
+					if (r->ar.end - r->ar.start <=
+							DAMON_MIN_REGION)
+						continue;
+					sz = DAMON_MIN_REGION;
+				}
+				damon_split_region_at(c, t, r, sz);
+				r = damon_next_region(r);
+				sz = r->ar.end - r->ar.start;
+			}
+			quota->charge_target_from = NULL;
+			quota->charge_addr_from = 0;
+		}
+
 		/* Check the target regions condition */
 		if (sz < s->min_sz_region || s->max_sz_region < sz)
 			continue;
@@ -573,6 +606,10 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 			}
 			c->primitive.apply_scheme(c, t, r, s);
 			quota->charged_sz += sz;
+			if (quota->sz && quota->charged_sz >= quota->sz) {
+				quota->charge_target_from = t;
+				quota->charge_addr_from = r->ar.end + 1;
+			}
 		}
 		if (s->action != DAMOS_STAT)
 			r->age = 0;
-- 
cgit v1.2.3


From 1cd2430300594a230dba9178ac9e286d868d9da2 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:23 -0700
Subject: mm/damon/schemes: implement time quota

The size quota feature of DAMOS is useful for IO resource-critical
systems, but not so intuitive for CPU time-critical systems.  Systems
using zram or zswap-like swap device would be examples.

To provide another intuitive ways for such systems, this implements
time-based quota for DAMON-based Operation Schemes.  If the quota is
set, DAMOS tries to use only up to the user-defined quota of CPU time
within a given time window.

Link: https://lkml.kernel.org/r/20211019150731.16699-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 25 ++++++++++++++++++++-----
 mm/damon/core.c       | 45 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 60 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 585d985768fd..1e7671bf3d23 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -91,20 +91,35 @@ enum damos_action {
 
 /**
  * struct damos_quota - Controls the aggressiveness of the given scheme.
+ * @ms:			Maximum milliseconds that the scheme can use.
  * @sz:			Maximum bytes of memory that the action can be applied.
  * @reset_interval:	Charge reset interval in milliseconds.
  *
  * To avoid consuming too much CPU time or IO resources for applying the
- * &struct damos->action to large memory, DAMON allows users to set a size
- * quota.  The quota can be set by writing non-zero values to &sz.  If the size
- * quota is set, DAMON tries to apply the action only up to &sz bytes within
- * &reset_interval.
+ * &struct damos->action to large memory, DAMON allows users to set time and/or
+ * size quotas.  The quotas can be set by writing non-zero values to &ms and
+ * &sz, respectively.  If the time quota is set, DAMON tries to use only up to
+ * &ms milliseconds within &reset_interval for applying the action.  If the
+ * size quota is set, DAMON tries to apply the action only up to &sz bytes
+ * within &reset_interval.
+ *
+ * Internally, the time quota is transformed to a size quota using estimated
+ * throughput of the scheme's action.  DAMON then compares it against &sz and
+ * uses smaller one as the effective quota.
  */
 struct damos_quota {
+	unsigned long ms;
 	unsigned long sz;
 	unsigned long reset_interval;
 
-/* private: For charging the quota */
+/* private: */
+	/* For throughput estimation */
+	unsigned long total_charged_sz;
+	unsigned long total_charged_ns;
+
+	unsigned long esz;	/* Effective size quota in bytes */
+
+	/* For charging the quota */
 	unsigned long charged_sz;
 	unsigned long charged_from;
 	struct damon_target *charge_target_from;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 693b75bc3450..d1da4bef96ed 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -107,8 +107,12 @@ struct damos *damon_new_scheme(
 	scheme->stat_sz = 0;
 	INIT_LIST_HEAD(&scheme->list);
 
+	scheme->quota.ms = quota->ms;
 	scheme->quota.sz = quota->sz;
 	scheme->quota.reset_interval = quota->reset_interval;
+	scheme->quota.total_charged_sz = 0;
+	scheme->quota.total_charged_ns = 0;
+	scheme->quota.esz = 0;
 	scheme->quota.charged_sz = 0;
 	scheme->quota.charged_from = 0;
 	scheme->quota.charge_target_from = NULL;
@@ -550,9 +554,10 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 	damon_for_each_scheme(s, c) {
 		struct damos_quota *quota = &s->quota;
 		unsigned long sz = r->ar.end - r->ar.start;
+		struct timespec64 begin, end;
 
 		/* Check the quota */
-		if (quota->sz && quota->charged_sz >= quota->sz)
+		if (quota->esz && quota->charged_sz >= quota->esz)
 			continue;
 
 		/* Skip previously charged regions */
@@ -597,16 +602,21 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 
 		/* Apply the scheme */
 		if (c->primitive.apply_scheme) {
-			if (quota->sz && quota->charged_sz + sz > quota->sz) {
-				sz = ALIGN_DOWN(quota->sz - quota->charged_sz,
+			if (quota->esz &&
+					quota->charged_sz + sz > quota->esz) {
+				sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
 						DAMON_MIN_REGION);
 				if (!sz)
 					goto update_stat;
 				damon_split_region_at(c, t, r, sz);
 			}
+			ktime_get_coarse_ts64(&begin);
 			c->primitive.apply_scheme(c, t, r, s);
+			ktime_get_coarse_ts64(&end);
+			quota->total_charged_ns += timespec64_to_ns(&end) -
+				timespec64_to_ns(&begin);
 			quota->charged_sz += sz;
-			if (quota->sz && quota->charged_sz >= quota->sz) {
+			if (quota->esz && quota->charged_sz >= quota->esz) {
 				quota->charge_target_from = t;
 				quota->charge_addr_from = r->ar.end + 1;
 			}
@@ -620,6 +630,29 @@ update_stat:
 	}
 }
 
+/* Shouldn't be called if quota->ms and quota->sz are zero */
+static void damos_set_effective_quota(struct damos_quota *quota)
+{
+	unsigned long throughput;
+	unsigned long esz;
+
+	if (!quota->ms) {
+		quota->esz = quota->sz;
+		return;
+	}
+
+	if (quota->total_charged_ns)
+		throughput = quota->total_charged_sz * 1000000 /
+			quota->total_charged_ns;
+	else
+		throughput = PAGE_SIZE * 1024;
+	esz = throughput * quota->ms;
+
+	if (quota->sz && quota->sz < esz)
+		esz = quota->sz;
+	quota->esz = esz;
+}
+
 static void kdamond_apply_schemes(struct damon_ctx *c)
 {
 	struct damon_target *t;
@@ -629,15 +662,17 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
 	damon_for_each_scheme(s, c) {
 		struct damos_quota *quota = &s->quota;
 
-		if (!quota->sz)
+		if (!quota->ms && !quota->sz)
 			continue;
 
 		/* New charge window starts */
 		if (time_after_eq(jiffies, quota->charged_from +
 					msecs_to_jiffies(
 						quota->reset_interval))) {
+			quota->total_charged_sz += quota->charged_sz;
 			quota->charged_from = jiffies;
 			quota->charged_sz = 0;
+			damos_set_effective_quota(quota);
 		}
 	}
 
-- 
cgit v1.2.3


From 38683e003153f7abfa612d7b7fe147efa4624af2 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:33 -0700
Subject: mm/damon/schemes: prioritize regions within the quotas

This makes DAMON apply schemes to regions having higher priority first,
if it cannot apply schemes to all regions due to the quotas.

The prioritization function should be implemented in the monitoring
primitives.  Those would commonly calculate the priority of the region
using attributes of regions, namely 'size', 'nr_accesses', and 'age'.
For example, some primitive would calculate the priority of each region
using a weighted sum of 'nr_accesses' and 'age' of the region.

The optimal weights would depend on give environments, so this makes
those customizable.  Nevertheless, the score calculation functions are
only encouraged to respect the weights, not mandated.

Link: https://lkml.kernel.org/r/20211019150731.16699-8-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 26 +++++++++++++++++++++
 mm/damon/core.c       | 62 +++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 81 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 1e7671bf3d23..5d47ad9e3911 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -14,6 +14,8 @@
 
 /* Minimal region size.  Every damon_region is aligned by this. */
 #define DAMON_MIN_REGION	PAGE_SIZE
+/* Max priority score for DAMON-based operation schemes */
+#define DAMOS_MAX_SCORE		(99)
 
 /**
  * struct damon_addr_range - Represents an address region of [@start, @end).
@@ -95,6 +97,10 @@ enum damos_action {
  * @sz:			Maximum bytes of memory that the action can be applied.
  * @reset_interval:	Charge reset interval in milliseconds.
  *
+ * @weight_sz:		Weight of the region's size for prioritization.
+ * @weight_nr_accesses:	Weight of the region's nr_accesses for prioritization.
+ * @weight_age:		Weight of the region's age for prioritization.
+ *
  * To avoid consuming too much CPU time or IO resources for applying the
  * &struct damos->action to large memory, DAMON allows users to set time and/or
  * size quotas.  The quotas can be set by writing non-zero values to &ms and
@@ -106,12 +112,22 @@ enum damos_action {
  * Internally, the time quota is transformed to a size quota using estimated
  * throughput of the scheme's action.  DAMON then compares it against &sz and
  * uses smaller one as the effective quota.
+ *
+ * For selecting regions within the quota, DAMON prioritizes current scheme's
+ * target memory regions using the &struct damon_primitive->get_scheme_score.
+ * You could customize the prioritization logic by setting &weight_sz,
+ * &weight_nr_accesses, and &weight_age, because monitoring primitives are
+ * encouraged to respect those.
  */
 struct damos_quota {
 	unsigned long ms;
 	unsigned long sz;
 	unsigned long reset_interval;
 
+	unsigned int weight_sz;
+	unsigned int weight_nr_accesses;
+	unsigned int weight_age;
+
 /* private: */
 	/* For throughput estimation */
 	unsigned long total_charged_sz;
@@ -124,6 +140,10 @@ struct damos_quota {
 	unsigned long charged_from;
 	struct damon_target *charge_target_from;
 	unsigned long charge_addr_from;
+
+	/* For prioritization */
+	unsigned long histogram[DAMOS_MAX_SCORE + 1];
+	unsigned int min_score;
 };
 
 /**
@@ -174,6 +194,7 @@ struct damon_ctx;
  * @prepare_access_checks:	Prepare next access check of target regions.
  * @check_accesses:		Check the accesses to target regions.
  * @reset_aggregated:		Reset aggregated accesses monitoring results.
+ * @get_scheme_score:		Get the score of a region for a scheme.
  * @apply_scheme:		Apply a DAMON-based operation scheme.
  * @target_valid:		Determine if the target is valid.
  * @cleanup:			Clean up the context.
@@ -200,6 +221,8 @@ struct damon_ctx;
  * of its update.  The value will be used for regions adjustment threshold.
  * @reset_aggregated should reset the access monitoring results that aggregated
  * by @check_accesses.
+ * @get_scheme_score should return the priority score of a region for a scheme
+ * as an integer in [0, &DAMOS_MAX_SCORE].
  * @apply_scheme is called from @kdamond when a region for user provided
  * DAMON-based operation scheme is found.  It should apply the scheme's action
  * to the region.  This is not used for &DAMON_ARBITRARY_TARGET case.
@@ -213,6 +236,9 @@ struct damon_primitive {
 	void (*prepare_access_checks)(struct damon_ctx *context);
 	unsigned int (*check_accesses)(struct damon_ctx *context);
 	void (*reset_aggregated)(struct damon_ctx *context);
+	int (*get_scheme_score)(struct damon_ctx *context,
+			struct damon_target *t, struct damon_region *r,
+			struct damos *scheme);
 	int (*apply_scheme)(struct damon_ctx *context, struct damon_target *t,
 			struct damon_region *r, struct damos *scheme);
 	bool (*target_valid)(void *target);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index d1da4bef96ed..fad25778e2ec 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -12,6 +12,7 @@
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <linux/slab.h>
+#include <linux/string.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/damon.h>
@@ -110,6 +111,9 @@ struct damos *damon_new_scheme(
 	scheme->quota.ms = quota->ms;
 	scheme->quota.sz = quota->sz;
 	scheme->quota.reset_interval = quota->reset_interval;
+	scheme->quota.weight_sz = quota->weight_sz;
+	scheme->quota.weight_nr_accesses = quota->weight_nr_accesses;
+	scheme->quota.weight_age = quota->weight_age;
 	scheme->quota.total_charged_sz = 0;
 	scheme->quota.total_charged_ns = 0;
 	scheme->quota.esz = 0;
@@ -545,6 +549,28 @@ static void damon_split_region_at(struct damon_ctx *ctx,
 		struct damon_target *t, struct damon_region *r,
 		unsigned long sz_r);
 
+static bool __damos_valid_target(struct damon_region *r, struct damos *s)
+{
+	unsigned long sz;
+
+	sz = r->ar.end - r->ar.start;
+	return s->min_sz_region <= sz && sz <= s->max_sz_region &&
+		s->min_nr_accesses <= r->nr_accesses &&
+		r->nr_accesses <= s->max_nr_accesses &&
+		s->min_age_region <= r->age && r->age <= s->max_age_region;
+}
+
+static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
+		struct damon_region *r, struct damos *s)
+{
+	bool ret = __damos_valid_target(r, s);
+
+	if (!ret || !s->quota.esz || !c->primitive.get_scheme_score)
+		return ret;
+
+	return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score;
+}
+
 static void damon_do_apply_schemes(struct damon_ctx *c,
 				   struct damon_target *t,
 				   struct damon_region *r)
@@ -591,13 +617,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 			quota->charge_addr_from = 0;
 		}
 
-		/* Check the target regions condition */
-		if (sz < s->min_sz_region || s->max_sz_region < sz)
-			continue;
-		if (r->nr_accesses < s->min_nr_accesses ||
-				s->max_nr_accesses < r->nr_accesses)
-			continue;
-		if (r->age < s->min_age_region || s->max_age_region < r->age)
+		if (!damos_valid_target(c, t, r, s))
 			continue;
 
 		/* Apply the scheme */
@@ -661,6 +681,8 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
 
 	damon_for_each_scheme(s, c) {
 		struct damos_quota *quota = &s->quota;
+		unsigned long cumulated_sz;
+		unsigned int score, max_score = 0;
 
 		if (!quota->ms && !quota->sz)
 			continue;
@@ -674,6 +696,32 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
 			quota->charged_sz = 0;
 			damos_set_effective_quota(quota);
 		}
+
+		if (!c->primitive.get_scheme_score)
+			continue;
+
+		/* Fill up the score histogram */
+		memset(quota->histogram, 0, sizeof(quota->histogram));
+		damon_for_each_target(t, c) {
+			damon_for_each_region(r, t) {
+				if (!__damos_valid_target(r, s))
+					continue;
+				score = c->primitive.get_scheme_score(
+						c, t, r, s);
+				quota->histogram[score] +=
+					r->ar.end - r->ar.start;
+				if (score > max_score)
+					max_score = score;
+			}
+		}
+
+		/* Set the min score limit */
+		for (cumulated_sz = 0, score = max_score; ; score--) {
+			cumulated_sz += quota->histogram[score];
+			if (cumulated_sz >= quota->esz || !score)
+				break;
+		}
+		quota->min_score = score;
 	}
 
 	damon_for_each_target(t, c) {
-- 
cgit v1.2.3


From 198f0f4c58b9f481e4e51c8c70a6ab9852bbab7f Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:37 -0700
Subject: mm/damon/vaddr,paddr: support pageout prioritization

This makes the default monitoring primitives for virtual address spaces
and the physical address sapce to support memory regions prioritization
for 'PAGEOUT' DAMOS action.  It calculates hotness of each region as
weighted sum of 'nr_accesses' and 'age' of the region and get the
priority score as reverse of the hotness, so that cold regions can be
paged out first.

Link: https://lkml.kernel.org/r/20211019150731.16699-9-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h   |  4 ++++
 mm/damon/paddr.c        | 14 ++++++++++++++
 mm/damon/prmtv-common.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/damon/prmtv-common.h |  3 +++
 mm/damon/vaddr.c        | 15 +++++++++++++++
 5 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 5d47ad9e3911..1217566a0ebc 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -421,6 +421,8 @@ bool damon_va_target_valid(void *t);
 void damon_va_cleanup(struct damon_ctx *ctx);
 int damon_va_apply_scheme(struct damon_ctx *context, struct damon_target *t,
 		struct damon_region *r, struct damos *scheme);
+int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme);
 void damon_va_set_primitives(struct damon_ctx *ctx);
 
 #endif	/* CONFIG_DAMON_VADDR */
@@ -433,6 +435,8 @@ unsigned int damon_pa_check_accesses(struct damon_ctx *ctx);
 bool damon_pa_target_valid(void *t);
 int damon_pa_apply_scheme(struct damon_ctx *context, struct damon_target *t,
 		struct damon_region *r, struct damos *scheme);
+int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme);
 void damon_pa_set_primitives(struct damon_ctx *ctx);
 
 #endif	/* CONFIG_DAMON_PADDR */
diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 957ada55de77..a496d6f203d6 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -246,6 +246,19 @@ int damon_pa_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
 	return 0;
 }
 
+int damon_pa_scheme_score(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme)
+{
+	switch (scheme->action) {
+	case DAMOS_PAGEOUT:
+		return damon_pageout_score(context, r, scheme);
+	default:
+		break;
+	}
+
+	return DAMOS_MAX_SCORE;
+}
+
 void damon_pa_set_primitives(struct damon_ctx *ctx)
 {
 	ctx->primitive.init = NULL;
@@ -256,4 +269,5 @@ void damon_pa_set_primitives(struct damon_ctx *ctx)
 	ctx->primitive.target_valid = damon_pa_target_valid;
 	ctx->primitive.cleanup = NULL;
 	ctx->primitive.apply_scheme = damon_pa_apply_scheme;
+	ctx->primitive.get_scheme_score = damon_pa_scheme_score;
 }
diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c
index 7e62ee54fb54..92a04f5831d6 100644
--- a/mm/damon/prmtv-common.c
+++ b/mm/damon/prmtv-common.c
@@ -85,3 +85,49 @@ void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr)
 	put_page(page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 }
+
+#define DAMON_MAX_SUBSCORE	(100)
+#define DAMON_MAX_AGE_IN_LOG	(32)
+
+int damon_pageout_score(struct damon_ctx *c, struct damon_region *r,
+			struct damos *s)
+{
+	unsigned int max_nr_accesses;
+	int freq_subscore;
+	unsigned int age_in_sec;
+	int age_in_log, age_subscore;
+	unsigned int freq_weight = s->quota.weight_nr_accesses;
+	unsigned int age_weight = s->quota.weight_age;
+	int hotness;
+
+	max_nr_accesses = c->aggr_interval / c->sample_interval;
+	freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses;
+
+	age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000;
+	for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
+			age_in_log++, age_in_sec >>= 1)
+		;
+
+	/* If frequency is 0, higher age means it's colder */
+	if (freq_subscore == 0)
+		age_in_log *= -1;
+
+	/*
+	 * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
+	 * Scale it to be in [0, 100] and set it as age subscore.
+	 */
+	age_in_log += DAMON_MAX_AGE_IN_LOG;
+	age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
+		DAMON_MAX_AGE_IN_LOG / 2;
+
+	hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
+	if (freq_weight + age_weight)
+		hotness /= freq_weight + age_weight;
+	/*
+	 * Transform it to fit in [0, DAMOS_MAX_SCORE]
+	 */
+	hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
+
+	/* Return coldness of the region */
+	return DAMOS_MAX_SCORE - hotness;
+}
diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h
index 7093d19e5d42..61f27037603e 100644
--- a/mm/damon/prmtv-common.h
+++ b/mm/damon/prmtv-common.h
@@ -15,3 +15,6 @@ struct page *damon_get_page(unsigned long pfn);
 
 void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr);
 void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr);
+
+int damon_pageout_score(struct damon_ctx *c, struct damon_region *r,
+			struct damos *s);
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 758501b8d97d..675cd8c7df9b 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -633,6 +633,20 @@ int damon_va_apply_scheme(struct damon_ctx *ctx, struct damon_target *t,
 	return damos_madvise(t, r, madv_action);
 }
 
+int damon_va_scheme_score(struct damon_ctx *context, struct damon_target *t,
+		struct damon_region *r, struct damos *scheme)
+{
+
+	switch (scheme->action) {
+	case DAMOS_PAGEOUT:
+		return damon_pageout_score(context, r, scheme);
+	default:
+		break;
+	}
+
+	return DAMOS_MAX_SCORE;
+}
+
 void damon_va_set_primitives(struct damon_ctx *ctx)
 {
 	ctx->primitive.init = damon_va_init;
@@ -643,6 +657,7 @@ void damon_va_set_primitives(struct damon_ctx *ctx)
 	ctx->primitive.target_valid = damon_va_target_valid;
 	ctx->primitive.cleanup = NULL;
 	ctx->primitive.apply_scheme = damon_va_apply_scheme;
+	ctx->primitive.get_scheme_score = damon_va_scheme_score;
 }
 
 #include "vaddr-test.h"
-- 
cgit v1.2.3


From ee801b7dd7822a82fd7663048ad649545fac6df3 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 5 Nov 2021 13:47:47 -0700
Subject: mm/damon/schemes: activate schemes based on a watermarks mechanism

DAMON-based operation schemes need to be manually turned on and off.  In
some use cases, however, the condition for turning a scheme on and off
would depend on the system's situation.  For example, schemes for
proactive pages reclamation would need to be turned on when some memory
pressure is detected, and turned off when the system has enough free
memory.

For easier control of schemes activation based on the system situation,
this introduces a watermarks-based mechanism.  The client can describe
the watermark metric (e.g., amount of free memory in the system),
watermark check interval, and three watermarks, namely high, mid, and
low.  If the scheme is deactivated, it only gets the metric and compare
that to the three watermarks for every check interval.  If the metric is
higher than the high watermark, the scheme is deactivated.  If the
metric is between the mid watermark and the low watermark, the scheme is
activated.  If the metric is lower than the low watermark, the scheme is
deactivated again.  This is to allow users fall back to traditional
page-granularity mechanisms.

Link: https://lkml.kernel.org/r/20211019150731.16699-12-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Amit Shah <amit@kernel.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: David Woodhouse <dwmw@amazon.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Leonard Foerster <foersleo@amazon.de>
Cc: Marco Elver <elver@google.com>
Cc: Markus Boehme <markubo@amazon.de>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 52 ++++++++++++++++++++++++++-
 mm/damon/core.c       | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/damon/dbgfs.c      |  5 ++-
 3 files changed, 151 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 1217566a0ebc..c93325efddd7 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -146,6 +146,45 @@ struct damos_quota {
 	unsigned int min_score;
 };
 
+/**
+ * enum damos_wmark_metric - Represents the watermark metric.
+ *
+ * @DAMOS_WMARK_NONE:		Ignore the watermarks of the given scheme.
+ * @DAMOS_WMARK_FREE_MEM_RATE:	Free memory rate of the system in [0,1000].
+ */
+enum damos_wmark_metric {
+	DAMOS_WMARK_NONE,
+	DAMOS_WMARK_FREE_MEM_RATE,
+};
+
+/**
+ * struct damos_watermarks - Controls when a given scheme should be activated.
+ * @metric:	Metric for the watermarks.
+ * @interval:	Watermarks check time interval in microseconds.
+ * @high:	High watermark.
+ * @mid:	Middle watermark.
+ * @low:	Low watermark.
+ *
+ * If &metric is &DAMOS_WMARK_NONE, the scheme is always active.  Being active
+ * means DAMON does monitoring and applying the action of the scheme to
+ * appropriate memory regions.  Else, DAMON checks &metric of the system for at
+ * least every &interval microseconds and works as below.
+ *
+ * If &metric is higher than &high, the scheme is inactivated.  If &metric is
+ * between &mid and &low, the scheme is activated.  If &metric is lower than
+ * &low, the scheme is inactivated.
+ */
+struct damos_watermarks {
+	enum damos_wmark_metric metric;
+	unsigned long interval;
+	unsigned long high;
+	unsigned long mid;
+	unsigned long low;
+
+/* private: */
+	bool activated;
+};
+
 /**
  * struct damos - Represents a Data Access Monitoring-based Operation Scheme.
  * @min_sz_region:	Minimum size of target regions.
@@ -156,6 +195,7 @@ struct damos_quota {
  * @max_age_region:	Maximum age of target regions.
  * @action:		&damo_action to be applied to the target regions.
  * @quota:		Control the aggressiveness of this scheme.
+ * @wmarks:		Watermarks for automated (in)activation of this scheme.
  * @stat_count:		Total number of regions that this scheme is applied.
  * @stat_sz:		Total size of regions that this scheme is applied.
  * @list:		List head for siblings.
@@ -166,6 +206,14 @@ struct damos_quota {
  * those.  To avoid consuming too much CPU time or IO resources for the
  * &action, &quota is used.
  *
+ * To do the work only when needed, schemes can be activated for specific
+ * system situations using &wmarks.  If all schemes that registered to the
+ * monitoring context are inactive, DAMON stops monitoring either, and just
+ * repeatedly checks the watermarks.
+ *
+ * If all schemes that registered to a &struct damon_ctx are inactive, DAMON
+ * stops monitoring and just repeatedly checks the watermarks.
+ *
  * After applying the &action to each region, &stat_count and &stat_sz is
  * updated to reflect the number of regions and total size of regions that the
  * &action is applied.
@@ -179,6 +227,7 @@ struct damos {
 	unsigned int max_age_region;
 	enum damos_action action;
 	struct damos_quota quota;
+	struct damos_watermarks wmarks;
 	unsigned long stat_count;
 	unsigned long stat_sz;
 	struct list_head list;
@@ -384,7 +433,8 @@ struct damos *damon_new_scheme(
 		unsigned long min_sz_region, unsigned long max_sz_region,
 		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 		unsigned int min_age_region, unsigned int max_age_region,
-		enum damos_action action, struct damos_quota *quota);
+		enum damos_action action, struct damos_quota *quota,
+		struct damos_watermarks *wmarks);
 void damon_add_scheme(struct damon_ctx *ctx, struct damos *s);
 void damon_destroy_scheme(struct damos *s);
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index fad25778e2ec..6993c60ae31c 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -10,6 +10,7 @@
 #include <linux/damon.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
+#include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -90,7 +91,8 @@ struct damos *damon_new_scheme(
 		unsigned long min_sz_region, unsigned long max_sz_region,
 		unsigned int min_nr_accesses, unsigned int max_nr_accesses,
 		unsigned int min_age_region, unsigned int max_age_region,
-		enum damos_action action, struct damos_quota *quota)
+		enum damos_action action, struct damos_quota *quota,
+		struct damos_watermarks *wmarks)
 {
 	struct damos *scheme;
 
@@ -122,6 +124,13 @@ struct damos *damon_new_scheme(
 	scheme->quota.charge_target_from = NULL;
 	scheme->quota.charge_addr_from = 0;
 
+	scheme->wmarks.metric = wmarks->metric;
+	scheme->wmarks.interval = wmarks->interval;
+	scheme->wmarks.high = wmarks->high;
+	scheme->wmarks.mid = wmarks->mid;
+	scheme->wmarks.low = wmarks->low;
+	scheme->wmarks.activated = true;
+
 	return scheme;
 }
 
@@ -582,6 +591,9 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 		unsigned long sz = r->ar.end - r->ar.start;
 		struct timespec64 begin, end;
 
+		if (!s->wmarks.activated)
+			continue;
+
 		/* Check the quota */
 		if (quota->esz && quota->charged_sz >= quota->esz)
 			continue;
@@ -684,6 +696,9 @@ static void kdamond_apply_schemes(struct damon_ctx *c)
 		unsigned long cumulated_sz;
 		unsigned int score, max_score = 0;
 
+		if (!s->wmarks.activated)
+			continue;
+
 		if (!quota->ms && !quota->sz)
 			continue;
 
@@ -924,6 +939,83 @@ static bool kdamond_need_stop(struct damon_ctx *ctx)
 	return true;
 }
 
+static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric)
+{
+	struct sysinfo i;
+
+	switch (metric) {
+	case DAMOS_WMARK_FREE_MEM_RATE:
+		si_meminfo(&i);
+		return i.freeram * 1000 / i.totalram;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+/*
+ * Returns zero if the scheme is active.  Else, returns time to wait for next
+ * watermark check in micro-seconds.
+ */
+static unsigned long damos_wmark_wait_us(struct damos *scheme)
+{
+	unsigned long metric;
+
+	if (scheme->wmarks.metric == DAMOS_WMARK_NONE)
+		return 0;
+
+	metric = damos_wmark_metric_value(scheme->wmarks.metric);
+	/* higher than high watermark or lower than low watermark */
+	if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) {
+		if (scheme->wmarks.activated)
+			pr_debug("inactivate a scheme (%d) for %s wmark\n",
+					scheme->action,
+					metric > scheme->wmarks.high ?
+					"high" : "low");
+		scheme->wmarks.activated = false;
+		return scheme->wmarks.interval;
+	}
+
+	/* inactive and higher than middle watermark */
+	if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) &&
+			!scheme->wmarks.activated)
+		return scheme->wmarks.interval;
+
+	if (!scheme->wmarks.activated)
+		pr_debug("activate a scheme (%d)\n", scheme->action);
+	scheme->wmarks.activated = true;
+	return 0;
+}
+
+static void kdamond_usleep(unsigned long usecs)
+{
+	if (usecs > 100 * 1000)
+		schedule_timeout_interruptible(usecs_to_jiffies(usecs));
+	else
+		usleep_range(usecs, usecs + 1);
+}
+
+/* Returns negative error code if it's not activated but should return */
+static int kdamond_wait_activation(struct damon_ctx *ctx)
+{
+	struct damos *s;
+	unsigned long wait_time;
+	unsigned long min_wait_time = 0;
+
+	while (!kdamond_need_stop(ctx)) {
+		damon_for_each_scheme(s, ctx) {
+			wait_time = damos_wmark_wait_us(s);
+			if (!min_wait_time || wait_time < min_wait_time)
+				min_wait_time = wait_time;
+		}
+		if (!min_wait_time)
+			return 0;
+
+		kdamond_usleep(min_wait_time);
+	}
+	return -EBUSY;
+}
+
 static void set_kdamond_stop(struct damon_ctx *ctx)
 {
 	mutex_lock(&ctx->kdamond_lock);
@@ -952,6 +1044,9 @@ static int kdamond_fn(void *data)
 	sz_limit = damon_region_sz_limit(ctx);
 
 	while (!kdamond_need_stop(ctx)) {
+		if (kdamond_wait_activation(ctx))
+			continue;
+
 		if (ctx->primitive.prepare_access_checks)
 			ctx->primitive.prepare_access_checks(ctx);
 		if (ctx->callback.after_sampling &&
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 20c4feb8b918..9f13060d1058 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -195,6 +195,9 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
 	*nr_schemes = 0;
 	while (pos < len && *nr_schemes < max_nr_schemes) {
 		struct damos_quota quota = {};
+		struct damos_watermarks wmarks = {
+			.metric = DAMOS_WMARK_NONE,
+		};
 
 		ret = sscanf(&str[pos],
 				"%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u%n",
@@ -212,7 +215,7 @@ static struct damos **str_to_schemes(const char *str, ssize_t len,
 
 		pos += parsed;
 		scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a,
-				min_age, max_age, action, &quota);
+				min_age, max_age, action, &quota, &wmarks);
 		if (!scheme)
 			goto fail;
 
-- 
cgit v1.2.3


From b5ca3e83ddb05342b1b30700b999cb9b107511f6 Mon Sep 17 00:00:00 2001
From: Xin Hao <xhao@linux.alibaba.com>
Date: Fri, 5 Nov 2021 13:48:07 -0700
Subject: mm/damon/dbgfs: add adaptive_targets list check before enable
 monitor_on

When the ctx->adaptive_targets list is empty, I did some test on
monitor_on interface like this.

    # cat /sys/kernel/debug/damon/target_ids
    #
    # echo on > /sys/kernel/debug/damon/monitor_on
    # damon: kdamond (5390) starts

Though the ctx->adaptive_targets list is empty, but the kthread_run
still be called, and the kdamond.x thread still be created, this is
meaningless.

So there adds a judgment in 'dbgfs_monitor_on_write', if the
ctx->adaptive_targets list is empty, return -EINVAL.

Link: https://lkml.kernel.org/r/0a60a6e8ec9d71989e0848a4dc3311996ca3b5d4.1634720326.git.xhao@linux.alibaba.com
Signed-off-by: Xin Hao <xhao@linux.alibaba.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  1 +
 mm/damon/core.c       |  5 +++++
 mm/damon/dbgfs.c      | 15 ++++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index c93325efddd7..fa7f32614b65 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -440,6 +440,7 @@ void damon_destroy_scheme(struct damos *s);
 
 struct damon_target *damon_new_target(unsigned long id);
 void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
+bool damon_targets_empty(struct damon_ctx *ctx);
 void damon_free_target(struct damon_target *t);
 void damon_destroy_target(struct damon_target *t);
 unsigned int damon_nr_regions(struct damon_target *t);
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 6993c60ae31c..46a6afea3030 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -180,6 +180,11 @@ void damon_add_target(struct damon_ctx *ctx, struct damon_target *t)
 	list_add_tail(&t->list, &ctx->adaptive_targets);
 }
 
+bool damon_targets_empty(struct damon_ctx *ctx)
+{
+	return list_empty(&ctx->adaptive_targets);
+}
+
 static void damon_del_target(struct damon_target *t)
 {
 	list_del(&t->list);
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index 6828e463348b..befb27a29aab 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -878,12 +878,21 @@ static ssize_t dbgfs_monitor_on_write(struct file *file,
 		return -EINVAL;
 	}
 
-	if (!strncmp(kbuf, "on", count))
+	if (!strncmp(kbuf, "on", count)) {
+		int i;
+
+		for (i = 0; i < dbgfs_nr_ctxs; i++) {
+			if (damon_targets_empty(dbgfs_ctxs[i])) {
+				kfree(kbuf);
+				return -EINVAL;
+			}
+		}
 		ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs);
-	else if (!strncmp(kbuf, "off", count))
+	} else if (!strncmp(kbuf, "off", count)) {
 		ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs);
-	else
+	} else {
 		ret = -EINVAL;
+	}
 
 	if (!ret)
 		ret = count;
-- 
cgit v1.2.3


From 0f91d13366a402420bf98eaaf393db03946c13e0 Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Fri, 5 Nov 2021 13:48:22 -0700
Subject: mm/damon: simplify stop mechanism

A kernel thread can exit gracefully with kthread_stop().  So we don't
need a new flag 'kdamond_stop'.  And to make sure the task struct is not
freed when accessing it, get reference to it before termination.

Link: https://lkml.kernel.org/r/20211027130517.4404-1-changbin.du@gmail.com
Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h |  1 -
 mm/damon/core.c       | 51 +++++++++++++++------------------------------------
 2 files changed, 15 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index fa7f32614b65..321de9d72360 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -381,7 +381,6 @@ struct damon_ctx {
 
 /* public: */
 	struct task_struct *kdamond;
-	bool kdamond_stop;
 	struct mutex kdamond_lock;
 
 	struct damon_primitive primitive;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 46a6afea3030..f37c17b53814 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -390,17 +390,6 @@ static unsigned long damon_region_sz_limit(struct damon_ctx *ctx)
 	return sz;
 }
 
-static bool damon_kdamond_running(struct damon_ctx *ctx)
-{
-	bool running;
-
-	mutex_lock(&ctx->kdamond_lock);
-	running = ctx->kdamond != NULL;
-	mutex_unlock(&ctx->kdamond_lock);
-
-	return running;
-}
-
 static int kdamond_fn(void *data);
 
 /*
@@ -418,7 +407,6 @@ static int __damon_start(struct damon_ctx *ctx)
 	mutex_lock(&ctx->kdamond_lock);
 	if (!ctx->kdamond) {
 		err = 0;
-		ctx->kdamond_stop = false;
 		ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
 				nr_running_ctxs);
 		if (IS_ERR(ctx->kdamond)) {
@@ -474,13 +462,15 @@ int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
  */
 static int __damon_stop(struct damon_ctx *ctx)
 {
+	struct task_struct *tsk;
+
 	mutex_lock(&ctx->kdamond_lock);
-	if (ctx->kdamond) {
-		ctx->kdamond_stop = true;
+	tsk = ctx->kdamond;
+	if (tsk) {
+		get_task_struct(tsk);
 		mutex_unlock(&ctx->kdamond_lock);
-		while (damon_kdamond_running(ctx))
-			usleep_range(ctx->sample_interval,
-					ctx->sample_interval * 2);
+		kthread_stop(tsk);
+		put_task_struct(tsk);
 		return 0;
 	}
 	mutex_unlock(&ctx->kdamond_lock);
@@ -925,12 +915,8 @@ static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
 static bool kdamond_need_stop(struct damon_ctx *ctx)
 {
 	struct damon_target *t;
-	bool stop;
 
-	mutex_lock(&ctx->kdamond_lock);
-	stop = ctx->kdamond_stop;
-	mutex_unlock(&ctx->kdamond_lock);
-	if (stop)
+	if (kthread_should_stop())
 		return true;
 
 	if (!ctx->primitive.target_valid)
@@ -1021,13 +1007,6 @@ static int kdamond_wait_activation(struct damon_ctx *ctx)
 	return -EBUSY;
 }
 
-static void set_kdamond_stop(struct damon_ctx *ctx)
-{
-	mutex_lock(&ctx->kdamond_lock);
-	ctx->kdamond_stop = true;
-	mutex_unlock(&ctx->kdamond_lock);
-}
-
 /*
  * The monitoring daemon that runs as a kernel thread
  */
@@ -1038,17 +1017,18 @@ static int kdamond_fn(void *data)
 	struct damon_region *r, *next;
 	unsigned int max_nr_accesses = 0;
 	unsigned long sz_limit = 0;
+	bool done = false;
 
 	pr_debug("kdamond (%d) starts\n", current->pid);
 
 	if (ctx->primitive.init)
 		ctx->primitive.init(ctx);
 	if (ctx->callback.before_start && ctx->callback.before_start(ctx))
-		set_kdamond_stop(ctx);
+		done = true;
 
 	sz_limit = damon_region_sz_limit(ctx);
 
-	while (!kdamond_need_stop(ctx)) {
+	while (!kdamond_need_stop(ctx) && !done) {
 		if (kdamond_wait_activation(ctx))
 			continue;
 
@@ -1056,7 +1036,7 @@ static int kdamond_fn(void *data)
 			ctx->primitive.prepare_access_checks(ctx);
 		if (ctx->callback.after_sampling &&
 				ctx->callback.after_sampling(ctx))
-			set_kdamond_stop(ctx);
+			done = true;
 
 		usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
 
@@ -1069,7 +1049,7 @@ static int kdamond_fn(void *data)
 					sz_limit);
 			if (ctx->callback.after_aggregation &&
 					ctx->callback.after_aggregation(ctx))
-				set_kdamond_stop(ctx);
+				done = true;
 			kdamond_apply_schemes(ctx);
 			kdamond_reset_aggregated(ctx);
 			kdamond_split_regions(ctx);
@@ -1088,9 +1068,8 @@ static int kdamond_fn(void *data)
 			damon_destroy_region(r, t);
 	}
 
-	if (ctx->callback.before_terminate &&
-			ctx->callback.before_terminate(ctx))
-		set_kdamond_stop(ctx);
+	if (ctx->callback.before_terminate)
+		ctx->callback.before_terminate(ctx);
 	if (ctx->primitive.cleanup)
 		ctx->primitive.cleanup(ctx);
 
-- 
cgit v1.2.3


From 658f9ae761b5965893727dd4edcdad56e5a439bb Mon Sep 17 00:00:00 2001
From: Changbin Du <changbin.du@gmail.com>
Date: Fri, 5 Nov 2021 13:48:27 -0700
Subject: mm/damon: remove return value from before_terminate callback

Since the return value of 'before_terminate' callback is never used, we
make it have no return value.

Link: https://lkml.kernel.org/r/20211029005023.8895-1-changbin.du@gmail.com
Signed-off-by: Changbin Du <changbin.du@gmail.com>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/damon.h | 2 +-
 mm/damon/dbgfs.c      | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 321de9d72360..b4d4be3cc987 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -322,7 +322,7 @@ struct damon_callback {
 	int (*before_start)(struct damon_ctx *context);
 	int (*after_sampling)(struct damon_ctx *context);
 	int (*after_aggregation)(struct damon_ctx *context);
-	int (*before_terminate)(struct damon_ctx *context);
+	void (*before_terminate)(struct damon_ctx *context);
 };
 
 /**
diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c
index befb27a29aab..eccc14b34901 100644
--- a/mm/damon/dbgfs.c
+++ b/mm/damon/dbgfs.c
@@ -645,18 +645,17 @@ static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx)
 		debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]);
 }
 
-static int dbgfs_before_terminate(struct damon_ctx *ctx)
+static void dbgfs_before_terminate(struct damon_ctx *ctx)
 {
 	struct damon_target *t, *next;
 
 	if (!targetid_is_pid(ctx))
-		return 0;
+		return;
 
 	damon_for_each_target_safe(t, next, ctx) {
 		put_pid((struct pid *)t->id);
 		damon_destroy_target(t);
 	}
-	return 0;
 }
 
 static struct damon_ctx *dbgfs_new_ctx(void)
-- 
cgit v1.2.3


From e1c9788cb39777e81ebfbf31ae80b4ec14eb6f6d Mon Sep 17 00:00:00 2001
From: Kotresh HR <khiremat@redhat.com>
Date: Mon, 27 Sep 2021 19:22:27 +0530
Subject: ceph: don't rely on error_string to validate blocklisted session.

The "error_string" in the metadata of MClientSession is being
parsed by kclient to validate whether the session is blocklisted.
The "error_string" is for humans and shouldn't be relied on it.
Hence added the flag to MClientsession to indicate the session
is blocklisted.

[ jlayton: minor formatting cleanup ]

URL: https://tracker.ceph.com/issues/47450
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c         | 21 +++++++++++++++++++--
 include/linux/ceph/ceph_fs.h |  2 ++
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 52d1b538c656..522790c64db4 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3361,9 +3361,14 @@ static void handle_session(struct ceph_mds_session *session,
 
 	if (msg_version >= 3) {
 		u32 len;
-		/* version >= 2, metadata */
-		if (__decode_session_metadata(&p, end, &blocklisted) < 0)
+		/* version >= 2 and < 5, decode metadata, skip otherwise
+		 * as it's handled via flags.
+		 */
+		if (msg_version >= 5)
+			ceph_decode_skip_map(&p, end, string, string, bad);
+		else if (__decode_session_metadata(&p, end, &blocklisted) < 0)
 			goto bad;
+
 		/* version >= 3, feature bits */
 		ceph_decode_32_safe(&p, end, len, bad);
 		if (len) {
@@ -3372,6 +3377,18 @@ static void handle_session(struct ceph_mds_session *session,
 		}
 	}
 
+	if (msg_version >= 5) {
+		u32 flags;
+		/* version >= 4, struct_v, struct_cv, len, metric_spec */
+	        ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
+		/* version >= 5, flags   */
+                ceph_decode_32_safe(&p, end, flags, bad);
+		if (flags & CEPH_SESSION_BLOCKLISTED) {
+		        pr_warn("mds%d session blocklisted\n", session->s_mds);
+			blocklisted = true;
+		}
+	}
+
 	mutex_lock(&mdsc->mutex);
 	if (op == CEPH_SESSION_CLOSE) {
 		ceph_get_mds_session(session);
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index bc2699feddbe..7ad6c3d0db7d 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -302,6 +302,8 @@ enum {
 	CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 };
 
+#define CEPH_SESSION_BLOCKLISTED	(1 << 0)  /* session blocklisted */
+
 extern const char *ceph_session_op_name(int op);
 
 struct ceph_mds_session_head {
-- 
cgit v1.2.3


From aca39d9e86f3edeaac5d2c467f5fd31e0b0df606 Mon Sep 17 00:00:00 2001
From: Luís Henriques <lhenriques@suse.de>
Date: Thu, 4 Nov 2021 12:31:46 +0000
Subject: libceph, ceph: move ceph_osdc_copy_from() into cephfs code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch moves ceph_osdc_copy_from() function out of libceph code into
cephfs.  There are no other users for this function, and there is the need
(in another patch) to access internal ceph_osd_request struct members.

Signed-off-by: Luís Henriques <lhenriques@suse.de>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/file.c                  | 74 +++++++++++++++++++++++++++++++++++------
 include/linux/ceph/osd_client.h | 19 +++++------
 net/ceph/osd_client.c           | 60 +++++----------------------------
 3 files changed, 80 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 6005b430f6f7..6c77f203e7b5 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -2211,6 +2211,54 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
 	return 0;
 }
 
+static struct ceph_osd_request *
+ceph_alloc_copyfrom_request(struct ceph_osd_client *osdc,
+			    u64 src_snapid,
+			    struct ceph_object_id *src_oid,
+			    struct ceph_object_locator *src_oloc,
+			    struct ceph_object_id *dst_oid,
+			    struct ceph_object_locator *dst_oloc,
+			    u32 truncate_seq, u64 truncate_size)
+{
+	struct ceph_osd_request *req;
+	int ret;
+	u32 src_fadvise_flags =
+		CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+		CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+	u32 dst_fadvise_flags =
+		CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+		CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return ERR_PTR(-ENOMEM);
+
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+	ret = osd_req_op_copy_from_init(req, src_snapid, 0,
+					src_oid, src_oloc,
+					src_fadvise_flags,
+					dst_fadvise_flags,
+					truncate_seq,
+					truncate_size,
+					CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+	if (ret)
+		goto out;
+
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	return req;
+
+out:
+	ceph_osdc_put_request(req);
+	return ERR_PTR(ret);
+}
+
 static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
 				    struct ceph_inode_info *dst_ci, u64 *dst_off,
 				    struct ceph_fs_client *fsc,
@@ -2218,6 +2266,8 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 {
 	struct ceph_object_locator src_oloc, dst_oloc;
 	struct ceph_object_id src_oid, dst_oid;
+	struct ceph_osd_client *osdc;
+	struct ceph_osd_request *req;
 	size_t bytes = 0;
 	u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
 	u32 src_objlen, dst_objlen;
@@ -2228,6 +2278,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 	src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
 	dst_oloc.pool = dst_ci->i_layout.pool_id;
 	dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+	osdc = &fsc->client->osdc;
 
 	while (len >= object_size) {
 		ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
@@ -2243,17 +2294,18 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 		ceph_oid_printf(&dst_oid, "%llx.%08llx",
 				dst_ci->i_vino.ino, dst_objnum);
 		/* Do an object remote copy */
-		ret = ceph_osdc_copy_from(&fsc->client->osdc,
-					  src_ci->i_vino.snap, 0,
-					  &src_oid, &src_oloc,
-					  CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-					  CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
-					  &dst_oid, &dst_oloc,
-					  CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-					  CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
-					  dst_ci->i_truncate_seq,
-					  dst_ci->i_truncate_size,
-					  CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+		req = ceph_alloc_copyfrom_request(osdc, src_ci->i_vino.snap,
+						  &src_oid, &src_oloc,
+						  &dst_oid, &dst_oloc,
+						  dst_ci->i_truncate_seq,
+						  dst_ci->i_truncate_size);
+		if (IS_ERR(req))
+			ret = PTR_ERR(req);
+		else {
+			ceph_osdc_start_request(osdc, req, false);
+			ret = ceph_osdc_wait_request(osdc, req);
+			ceph_osdc_put_request(req);
+		}
 		if (ret) {
 			if (ret == -EOPNOTSUPP) {
 				fsc->have_copy_from2 = false;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 83fa08a06507..3431011f364d 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -475,6 +475,14 @@ extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
 				       u64 expected_object_size,
 				       u64 expected_write_size,
 				       u32 flags);
+extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+				     u64 src_snapid, u64 src_version,
+				     struct ceph_object_id *src_oid,
+				     struct ceph_object_locator *src_oloc,
+				     u32 src_fadvise_flags,
+				     u32 dst_fadvise_flags,
+				     u32 truncate_seq, u64 truncate_size,
+				     u8 copy_from_flags);
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
 					       struct ceph_snap_context *snapc,
@@ -515,17 +523,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
 		   struct page *req_page, size_t req_len,
 		   struct page **resp_pages, size_t *resp_len);
 
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-			u64 src_snapid, u64 src_version,
-			struct ceph_object_id *src_oid,
-			struct ceph_object_locator *src_oloc,
-			u32 src_fadvise_flags,
-			struct ceph_object_id *dst_oid,
-			struct ceph_object_locator *dst_oloc,
-			u32 dst_fadvise_flags,
-			u32 truncate_seq, u64 truncate_size,
-			u8 copy_from_flags);
-
 /* watch/notify */
 struct ceph_osd_linger_request *
 ceph_osdc_watch(struct ceph_osd_client *osdc,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index ff8624a7c964..1c5815530e0d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5310,14 +5310,14 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
 	ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
 
-static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
-				     u64 src_snapid, u64 src_version,
-				     struct ceph_object_id *src_oid,
-				     struct ceph_object_locator *src_oloc,
-				     u32 src_fadvise_flags,
-				     u32 dst_fadvise_flags,
-				     u32 truncate_seq, u64 truncate_size,
-				     u8 copy_from_flags)
+int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+			      u64 src_snapid, u64 src_version,
+			      struct ceph_object_id *src_oid,
+			      struct ceph_object_locator *src_oloc,
+			      u32 src_fadvise_flags,
+			      u32 dst_fadvise_flags,
+			      u32 truncate_seq, u64 truncate_size,
+			      u8 copy_from_flags)
 {
 	struct ceph_osd_req_op *op;
 	struct page **pages;
@@ -5346,49 +5346,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
 				 op->indata_len, 0, false, true);
 	return 0;
 }
-
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-			u64 src_snapid, u64 src_version,
-			struct ceph_object_id *src_oid,
-			struct ceph_object_locator *src_oloc,
-			u32 src_fadvise_flags,
-			struct ceph_object_id *dst_oid,
-			struct ceph_object_locator *dst_oloc,
-			u32 dst_fadvise_flags,
-			u32 truncate_seq, u64 truncate_size,
-			u8 copy_from_flags)
-{
-	struct ceph_osd_request *req;
-	int ret;
-
-	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
-	if (!req)
-		return -ENOMEM;
-
-	req->r_flags = CEPH_OSD_FLAG_WRITE;
-
-	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
-	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
-
-	ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
-					src_oloc, src_fadvise_flags,
-					dst_fadvise_flags, truncate_seq,
-					truncate_size, copy_from_flags);
-	if (ret)
-		goto out;
-
-	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
-	if (ret)
-		goto out;
-
-	ceph_osdc_start_request(osdc, req, false);
-	ret = ceph_osdc_wait_request(osdc, req);
-
-out:
-	ceph_osdc_put_request(req);
-	return ret;
-}
-EXPORT_SYMBOL(ceph_osdc_copy_from);
+EXPORT_SYMBOL(osd_req_op_copy_from_init);
 
 int __init ceph_osdc_setup(void)
 {
-- 
cgit v1.2.3


From c6975d7cab5b903aadbc0f78f9af4fae1bd23a50 Mon Sep 17 00:00:00 2001
From: Qian Cai <quic_qiancai@quicinc.com>
Date: Fri, 5 Nov 2021 11:05:09 -0400
Subject: arm64: Track no early_pgtable_alloc() for kmemleak

After switched page size from 64KB to 4KB on several arm64 servers here,
kmemleak starts to run out of early memory pool due to a huge number of
those early_pgtable_alloc() calls:

  kmemleak_alloc_phys()
  memblock_alloc_range_nid()
  memblock_phys_alloc_range()
  early_pgtable_alloc()
  init_pmd()
  alloc_init_pud()
  __create_pgd_mapping()
  __map_memblock()
  paging_init()
  setup_arch()
  start_kernel()

Increased the default value of DEBUG_KMEMLEAK_MEM_POOL_SIZE by 4 times
won't be enough for a server with 200GB+ memory. There isn't much
interesting to check memory leaks for those early page tables and those
early memory mappings should not reference to other memory. Hence, no
kmemleak false positives, and we can safely skip tracking those early
allocations from kmemleak like we did in the commit fed84c785270
("mm/memblock.c: skip kmemleak for kasan_init()") without needing to
introduce complications to automatically scale the value depends on the
runtime memory size etc. After the patch, the default value of
DEBUG_KMEMLEAK_MEM_POOL_SIZE becomes sufficient again.

Signed-off-by: Qian Cai <quic_qiancai@quicinc.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20211105150509.7826-1-quic_qiancai@quicinc.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm/mm/kasan_init.c   | 2 +-
 arch/arm64/mm/kasan_init.c | 5 +++--
 arch/arm64/mm/mmu.c        | 3 ++-
 include/linux/memblock.h   | 2 +-
 mm/memblock.c              | 9 ++++++---
 5 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
index 9c348042a724..4508aba8a58b 100644
--- a/arch/arm/mm/kasan_init.c
+++ b/arch/arm/mm/kasan_init.c
@@ -32,7 +32,7 @@ pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
 static __init void *kasan_alloc_block(size_t size)
 {
 	return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
-				      MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE);
+				      MEMBLOCK_ALLOC_NOLEAKTRACE, NUMA_NO_NODE);
 }
 
 static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 61b52a92b8b6..995ac7540e6a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -36,7 +36,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 {
 	void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
 					      __pa(MAX_DMA_ADDRESS),
-					      MEMBLOCK_ALLOC_KASAN, node);
+					      MEMBLOCK_ALLOC_NOLEAKTRACE, node);
 	if (!p)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
 		      __func__, PAGE_SIZE, PAGE_SIZE, node,
@@ -49,7 +49,8 @@ static phys_addr_t __init kasan_alloc_raw_page(int node)
 {
 	void *p = memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE,
 						__pa(MAX_DMA_ADDRESS),
-						MEMBLOCK_ALLOC_KASAN, node);
+						MEMBLOCK_ALLOC_NOLEAKTRACE,
+						node);
 	if (!p)
 		panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
 		      __func__, PAGE_SIZE, PAGE_SIZE, node,
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index fd85b51b9d50..4b83b7a9fafb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -96,7 +96,8 @@ static phys_addr_t __init early_pgtable_alloc(int shift)
 	phys_addr_t phys;
 	void *ptr;
 
-	phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+	phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0,
+					 MEMBLOCK_ALLOC_NOLEAKTRACE);
 	if (!phys)
 		panic("Failed to allocate page table page\n");
 
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 34de69b3b8ba..efc896155dae 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -372,7 +372,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 /* Flags for memblock allocation APIs */
 #define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
 #define MEMBLOCK_ALLOC_ACCESSIBLE	0
-#define MEMBLOCK_ALLOC_KASAN		1
+#define MEMBLOCK_ALLOC_NOLEAKTRACE	1
 
 /* We are using top down, so it is safe to use 0 here */
 #define MEMBLOCK_LOW_LIMIT 0
diff --git a/mm/memblock.c b/mm/memblock.c
index 184dcd2e5d99..fdc93a97bac2 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -287,7 +287,7 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
 {
 	/* pump up @end */
 	if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
-	    end == MEMBLOCK_ALLOC_KASAN)
+	    end == MEMBLOCK_ALLOC_NOLEAKTRACE)
 		end = memblock.current_limit;
 
 	/* avoid allocating the first page */
@@ -1379,8 +1379,11 @@ again:
 	return 0;
 
 done:
-	/* Skip kmemleak for kasan_init() due to high volume. */
-	if (end != MEMBLOCK_ALLOC_KASAN)
+	/*
+	 * Skip kmemleak for those places like kasan_init() and
+	 * early_pgtable_alloc() due to high volume.
+	 */
+	if (end != MEMBLOCK_ALLOC_NOLEAKTRACE)
 		/*
 		 * The min_count is set to 0 so that memblock allocated
 		 * blocks are never reported as leaks. This is because many
-- 
cgit v1.2.3


From dfd5bb23ad75bdabde89ac3166705a450bf16acb Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 5 May 2021 14:00:06 +0200
Subject: PCI: Export pci_dev_lock()

Commit e3a9b1212b9d ("PCI: Export pci_dev_trylock() and pci_dev_unlock()")
already exported pci_dev_trylock()/pci_dev_unlock() however in some
circumstances such as during error recovery it makes sense to block
waiting to get full access to the device so also export pci_dev_lock().

Link: https://lore.kernel.org/all/20210928181014.GA713179@bhelgaas/
Acked-by: Pierre Morel <pmorel@linux.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 drivers/pci/pci.c   | 3 ++-
 include/linux/pci.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index da75c422ba85..193bfcda9c26 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -5074,12 +5074,13 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
 	return pci_parent_bus_reset(dev, probe);
 }
 
-static void pci_dev_lock(struct pci_dev *dev)
+void pci_dev_lock(struct pci_dev *dev)
 {
 	pci_cfg_access_lock(dev);
 	/* block PM suspend, driver probe, etc. */
 	device_lock(&dev->dev);
 }
+EXPORT_SYMBOL_GPL(pci_dev_lock);
 
 /* Return 1 on successful lock, 0 on contention */
 int pci_dev_trylock(struct pci_dev *dev)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b4dbcc86b3f1..d307b071b65e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1664,6 +1664,7 @@ void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
 
+void pci_dev_lock(struct pci_dev *dev);
 int pci_dev_trylock(struct pci_dev *dev);
 void pci_dev_unlock(struct pci_dev *dev);
 
-- 
cgit v1.2.3


From 40a34121ac1dc52ed9cd34a8f4e48e32517a52fd Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 3 Nov 2021 13:47:32 -0700
Subject: bpf, sockmap: Use stricter sk state checks in sk_lookup_assign

In order to fix an issue with sockets in TCP sockmap redirect cases we plan
to allow CLOSE state sockets to exist in the sockmap. However, the check in
bpf_sk_lookup_assign() currently only invalidates sockets in the
TCP_ESTABLISHED case relying on the checks on sockmap insert to ensure we
never SOCK_CLOSE state sockets in the map.

To prepare for this change we flip the logic in bpf_sk_lookup_assign() to
explicitly test for the accepted cases. Namely, a tcp socket in TCP_LISTEN
or a udp socket in TCP_CLOSE state. This also makes the code more resilent
to future changes.

Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/20211103204736.248403-2-john.fastabend@gmail.com
---
 include/linux/skmsg.h | 12 ++++++++++++
 net/core/filter.c     |  6 ++++--
 net/core/sock_map.c   |  6 ------
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index b4256847c707..584d94be9c8b 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -507,6 +507,18 @@ static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
 	return !!psock->saved_data_ready;
 }
 
+static inline bool sk_is_tcp(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP;
+}
+
+static inline bool sk_is_udp(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_DGRAM &&
+	       sk->sk_protocol == IPPROTO_UDP;
+}
+
 #if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 
 #define BPF_F_STRPARSER	(1UL << 1)
diff --git a/net/core/filter.c b/net/core/filter.c
index 8e8d3b49c297..a68418268e92 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -10423,8 +10423,10 @@ BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
 		return -EINVAL;
 	if (unlikely(sk && sk_is_refcounted(sk)))
 		return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
-	if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
-		return -ESOCKTNOSUPPORT; /* reject connected sockets */
+	if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
+		return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
+	if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
+		return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
 
 	/* Check if socket is suitable for packet L3/L4 protocol */
 	if (sk && sk->sk_protocol != ctx->protocol)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index e252b8ec2b85..f39ef79ced67 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -511,12 +511,6 @@ static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
 	       ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
 }
 
-static bool sk_is_tcp(const struct sock *sk)
-{
-	return sk->sk_type == SOCK_STREAM &&
-	       sk->sk_protocol == IPPROTO_TCP;
-}
-
 static bool sock_map_redirect_allowed(const struct sock *sk)
 {
 	if (sk_is_tcp(sk))
-- 
cgit v1.2.3


From cf30f6a5f0c60ec98a637b836bef6915f602c6ab Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 11 Sep 2020 16:49:00 -0700
Subject: lib: zstd: Add kernel-specific API

This patch:
- Moves `include/linux/zstd.h` -> `include/linux/zstd_lib.h`
- Updates modified zstd headers to yearless copyright
- Adds a new API in `include/linux/zstd.h` that is functionally
  equivalent to the in-use subset of the current API. Functions are
  renamed to avoid symbol collisions with zstd, to make it clear it is
  not the upstream zstd API, and to follow the kernel style guide.
- Updates all callers to use the new API.

There are no functional changes in this patch. Since there are no
functional change, I felt it was okay to update all the callers in a
single patch. Once the API is approved, the callers are mechanically
changed.

This patch is preparing for the 3rd patch in this series, which updates
zstd to version 1.4.10. Since the upstream zstd API is no longer exposed
to callers, the update can happen transparently.

Signed-off-by: Nick Terrell <terrelln@fb.com>
Tested By: Paul Jones <paul@pauljones.id.au>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # LLVM/Clang v13.0.0 on x86-64
Tested-by: Jean-Denis Girard <jd.girard@sysnux.pf>
---
 crypto/zstd.c              |   28 +-
 fs/btrfs/zstd.c            |   68 +--
 fs/f2fs/compress.c         |   56 +-
 fs/f2fs/super.c            |    2 +-
 fs/pstore/platform.c       |    2 +-
 fs/squashfs/zstd_wrapper.c |   16 +-
 include/linux/zstd.h       | 1243 ++++++++++----------------------------------
 include/linux/zstd_lib.h   | 1157 +++++++++++++++++++++++++++++++++++++++++
 lib/decompress_unzstd.c    |   42 +-
 lib/zstd/compress.c        |  123 +++--
 lib/zstd/decompress.c      |  112 ++--
 11 files changed, 1691 insertions(+), 1158 deletions(-)
 create mode 100644 include/linux/zstd_lib.h

(limited to 'include/linux')

diff --git a/crypto/zstd.c b/crypto/zstd.c
index 1a3309f066f7..154a969c83a8 100644
--- a/crypto/zstd.c
+++ b/crypto/zstd.c
@@ -18,22 +18,22 @@
 #define ZSTD_DEF_LEVEL	3
 
 struct zstd_ctx {
-	ZSTD_CCtx *cctx;
-	ZSTD_DCtx *dctx;
+	zstd_cctx *cctx;
+	zstd_dctx *dctx;
 	void *cwksp;
 	void *dwksp;
 };
 
-static ZSTD_parameters zstd_params(void)
+static zstd_parameters zstd_params(void)
 {
-	return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+	return zstd_get_params(ZSTD_DEF_LEVEL, 0);
 }
 
 static int zstd_comp_init(struct zstd_ctx *ctx)
 {
 	int ret = 0;
-	const ZSTD_parameters params = zstd_params();
-	const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+	const zstd_parameters params = zstd_params();
+	const size_t wksp_size = zstd_cctx_workspace_bound(&params.cParams);
 
 	ctx->cwksp = vzalloc(wksp_size);
 	if (!ctx->cwksp) {
@@ -41,7 +41,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx)
 		goto out;
 	}
 
-	ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+	ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size);
 	if (!ctx->cctx) {
 		ret = -EINVAL;
 		goto out_free;
@@ -56,7 +56,7 @@ out_free:
 static int zstd_decomp_init(struct zstd_ctx *ctx)
 {
 	int ret = 0;
-	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	const size_t wksp_size = zstd_dctx_workspace_bound();
 
 	ctx->dwksp = vzalloc(wksp_size);
 	if (!ctx->dwksp) {
@@ -64,7 +64,7 @@ static int zstd_decomp_init(struct zstd_ctx *ctx)
 		goto out;
 	}
 
-	ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+	ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size);
 	if (!ctx->dctx) {
 		ret = -EINVAL;
 		goto out_free;
@@ -152,10 +152,10 @@ static int __zstd_compress(const u8 *src, unsigned int slen,
 {
 	size_t out_len;
 	struct zstd_ctx *zctx = ctx;
-	const ZSTD_parameters params = zstd_params();
+	const zstd_parameters params = zstd_params();
 
-	out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
-	if (ZSTD_isError(out_len))
+	out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, &params);
+	if (zstd_is_error(out_len))
 		return -EINVAL;
 	*dlen = out_len;
 	return 0;
@@ -182,8 +182,8 @@ static int __zstd_decompress(const u8 *src, unsigned int slen,
 	size_t out_len;
 	struct zstd_ctx *zctx = ctx;
 
-	out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
-	if (ZSTD_isError(out_len))
+	out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen);
+	if (zstd_is_error(out_len))
 		return -EINVAL;
 	*dlen = out_len;
 	return 0;
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index f06b68040352..fc42dd0badd7 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -28,10 +28,10 @@
 /* 307s to avoid pathologically clashing with transaction commit */
 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
 
-static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
+static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
 						 size_t src_len)
 {
-	ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
+	zstd_parameters params = zstd_get_params(level, src_len);
 
 	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
 		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
@@ -48,8 +48,8 @@ struct workspace {
 	unsigned long last_used; /* jiffies */
 	struct list_head list;
 	struct list_head lru_list;
-	ZSTD_inBuffer in_buf;
-	ZSTD_outBuffer out_buf;
+	zstd_in_buffer in_buf;
+	zstd_out_buffer out_buf;
 };
 
 /*
@@ -155,12 +155,12 @@ static void zstd_calc_ws_mem_sizes(void)
 	unsigned int level;
 
 	for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
-		ZSTD_parameters params =
+		zstd_parameters params =
 			zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
 		size_t level_size =
 			max_t(size_t,
-			      ZSTD_CStreamWorkspaceBound(params.cParams),
-			      ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
+			      zstd_cstream_workspace_bound(&params.cParams),
+			      zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
 
 		max_size = max_t(size_t, max_size, level_size);
 		zstd_ws_mem_sizes[level - 1] = max_size;
@@ -371,7 +371,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 		unsigned long *total_in, unsigned long *total_out)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
-	ZSTD_CStream *stream;
+	zstd_cstream *stream;
 	int ret = 0;
 	int nr_pages = 0;
 	struct page *in_page = NULL;  /* The current page to read */
@@ -381,7 +381,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 	unsigned long len = *total_out;
 	const unsigned long nr_dest_pages = *out_pages;
 	unsigned long max_out = nr_dest_pages * PAGE_SIZE;
-	ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
+	zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
 							   len);
 
 	*out_pages = 0;
@@ -389,10 +389,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 	*total_in = 0;
 
 	/* Initialize the stream */
-	stream = ZSTD_initCStream(params, len, workspace->mem,
+	stream = zstd_init_cstream(&params, len, workspace->mem,
 			workspace->size);
 	if (!stream) {
-		pr_warn("BTRFS: ZSTD_initCStream failed\n");
+		pr_warn("BTRFS: zstd_init_cstream failed\n");
 		ret = -EIO;
 		goto out;
 	}
@@ -418,11 +418,11 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
+		ret2 = zstd_compress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_compress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto out;
 		}
@@ -487,10 +487,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_endStream(stream, &workspace->out_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_endStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		ret2 = zstd_end_stream(stream, &workspace->out_buf);
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_end_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto out;
 		}
@@ -548,17 +548,17 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	struct page **pages_in = cb->compressed_pages;
 	size_t srclen = cb->compressed_len;
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	int ret = 0;
 	unsigned long page_in_index = 0;
 	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
 	unsigned long buf_start;
 	unsigned long total_out = 0;
 
-	stream = ZSTD_initDStream(
+	stream = zstd_init_dstream(
 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
 	if (!stream) {
-		pr_debug("BTRFS: ZSTD_initDStream failed\n");
+		pr_debug("BTRFS: zstd_init_dstream failed\n");
 		ret = -EIO;
 		goto done;
 	}
@@ -574,11 +574,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 	while (1) {
 		size_t ret2;
 
-		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto done;
 		}
@@ -624,16 +624,16 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
 		size_t destlen)
 {
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	int ret = 0;
 	size_t ret2;
 	unsigned long total_out = 0;
 	unsigned long pg_offset = 0;
 
-	stream = ZSTD_initDStream(
+	stream = zstd_init_dstream(
 			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
 	if (!stream) {
-		pr_warn("BTRFS: ZSTD_initDStream failed\n");
+		pr_warn("BTRFS: zstd_init_dstream failed\n");
 		ret = -EIO;
 		goto finish;
 	}
@@ -657,15 +657,15 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
 
 		/* Check if the frame is over and we still need more input */
 		if (ret2 == 0) {
-			pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
+			pr_debug("BTRFS: zstd_decompress_stream ended early\n");
 			ret = -EIO;
 			goto finish;
 		}
-		ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+		ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
 				&workspace->in_buf);
-		if (ZSTD_isError(ret2)) {
-			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-					ZSTD_getErrorCode(ret2));
+		if (zstd_is_error(ret2)) {
+			pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+					zstd_get_error_code(ret2));
 			ret = -EIO;
 			goto finish;
 		}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 20a083dc9042..b8d70deddfb7 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -336,8 +336,8 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = {
 
 static int zstd_init_compress_ctx(struct compress_ctx *cc)
 {
-	ZSTD_parameters params;
-	ZSTD_CStream *stream;
+	zstd_parameters params;
+	zstd_cstream *stream;
 	void *workspace;
 	unsigned int workspace_size;
 	unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
@@ -346,17 +346,17 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
 	if (!level)
 		level = F2FS_ZSTD_DEFAULT_CLEVEL;
 
-	params = ZSTD_getParams(level, cc->rlen, 0);
-	workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams);
+	params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen);
+	workspace_size = zstd_cstream_workspace_bound(&params.cParams);
 
 	workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
 					workspace_size, GFP_NOFS);
 	if (!workspace)
 		return -ENOMEM;
 
-	stream = ZSTD_initCStream(params, 0, workspace, workspace_size);
+	stream = zstd_init_cstream(&params, 0, workspace, workspace_size);
 	if (!stream) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n",
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
 				__func__);
 		kvfree(workspace);
@@ -379,9 +379,9 @@ static void zstd_destroy_compress_ctx(struct compress_ctx *cc)
 
 static int zstd_compress_pages(struct compress_ctx *cc)
 {
-	ZSTD_CStream *stream = cc->private2;
-	ZSTD_inBuffer inbuf;
-	ZSTD_outBuffer outbuf;
+	zstd_cstream *stream = cc->private2;
+	zstd_in_buffer inbuf;
+	zstd_out_buffer outbuf;
 	int src_size = cc->rlen;
 	int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE;
 	int ret;
@@ -394,19 +394,19 @@ static int zstd_compress_pages(struct compress_ctx *cc)
 	outbuf.dst = cc->cbuf->cdata;
 	outbuf.size = dst_size;
 
-	ret = ZSTD_compressStream(stream, &outbuf, &inbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+	ret = zstd_compress_stream(stream, &outbuf, &inbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
-	ret = ZSTD_endStream(stream, &outbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n",
+	ret = zstd_end_stream(stream, &outbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n",
 				KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
@@ -423,22 +423,22 @@ static int zstd_compress_pages(struct compress_ctx *cc)
 
 static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
 {
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	void *workspace;
 	unsigned int workspace_size;
 	unsigned int max_window_size =
 			MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
 
-	workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
+	workspace_size = zstd_dstream_workspace_bound(max_window_size);
 
 	workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
 					workspace_size, GFP_NOFS);
 	if (!workspace)
 		return -ENOMEM;
 
-	stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
+	stream = zstd_init_dstream(max_window_size, workspace, workspace_size);
 	if (!stream) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n",
 				KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
 				__func__);
 		kvfree(workspace);
@@ -460,9 +460,9 @@ static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic)
 
 static int zstd_decompress_pages(struct decompress_io_ctx *dic)
 {
-	ZSTD_DStream *stream = dic->private2;
-	ZSTD_inBuffer inbuf;
-	ZSTD_outBuffer outbuf;
+	zstd_dstream *stream = dic->private2;
+	zstd_in_buffer inbuf;
+	zstd_out_buffer outbuf;
 	int ret;
 
 	inbuf.pos = 0;
@@ -473,11 +473,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
 	outbuf.dst = dic->rbuf;
 	outbuf.size = dic->rlen;
 
-	ret = ZSTD_decompressStream(stream, &outbuf, &inbuf);
-	if (ZSTD_isError(ret)) {
-		printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+	ret = zstd_decompress_stream(stream, &outbuf, &inbuf);
+	if (zstd_is_error(ret)) {
+		printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n",
 				KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
-				__func__, ZSTD_getErrorCode(ret));
+				__func__, zstd_get_error_code(ret));
 		return -EIO;
 	}
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index cf049a042482..78b64bf5e0f7 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -592,7 +592,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
 	if (kstrtouint(str + 1, 10, &level))
 		return -EINVAL;
 
-	if (!level || level > ZSTD_maxCLevel()) {
+	if (!level || level > zstd_max_clevel()) {
 		f2fs_info(sbi, "invalid zstd compress level: %d", level);
 		return -EINVAL;
 	}
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index b9614db48b1d..f243cb5e6a4f 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -218,7 +218,7 @@ static int zbufsize_842(size_t size)
 #if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
 static int zbufsize_zstd(size_t size)
 {
-	return ZSTD_compressBound(size);
+	return zstd_compress_bound(size);
 }
 #endif
 
diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c
index 0015cf8b5582..c40445dbf38c 100644
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -34,7 +34,7 @@ static void *zstd_init(struct squashfs_sb_info *msblk, void *buff)
 		goto failed;
 	wksp->window_size = max_t(size_t,
 			msblk->block_size, SQUASHFS_METADATA_SIZE);
-	wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size);
+	wksp->mem_size = zstd_dstream_workspace_bound(wksp->window_size);
 	wksp->mem = vmalloc(wksp->mem_size);
 	if (wksp->mem == NULL)
 		goto failed;
@@ -63,15 +63,15 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	struct squashfs_page_actor *output)
 {
 	struct workspace *wksp = strm;
-	ZSTD_DStream *stream;
+	zstd_dstream *stream;
 	size_t total_out = 0;
 	int error = 0;
-	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
-	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+	zstd_in_buffer in_buf = { NULL, 0, 0 };
+	zstd_out_buffer out_buf = { NULL, 0, 0 };
 	struct bvec_iter_all iter_all = {};
 	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 
-	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);
+	stream = zstd_init_dstream(wksp->window_size, wksp->mem, wksp->mem_size);
 
 	if (!stream) {
 		ERROR("Failed to initialize zstd decompressor\n");
@@ -116,14 +116,14 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		}
 
 		total_out -= out_buf.pos;
-		zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+		zstd_err = zstd_decompress_stream(stream, &out_buf, &in_buf);
 		total_out += out_buf.pos; /* add the additional data produced */
 		if (zstd_err == 0)
 			break;
 
-		if (ZSTD_isError(zstd_err)) {
+		if (zstd_is_error(zstd_err)) {
 			ERROR("zstd decompression error: %d\n",
-					(int)ZSTD_getErrorCode(zstd_err));
+					(int)zstd_get_error_code(zstd_err));
 			error = -EIO;
 			break;
 		}
diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index e87f78c9b19c..9fbc7729b0a0 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -1,138 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
  */
 
-#ifndef ZSTD_H
-#define ZSTD_H
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
 
-/* ======   Dependency   ======*/
-#include <linux/types.h>   /* size_t */
+/**
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
+ */
 
+/* ======   Dependency   ====== */
+#include <linux/types.h>
+#include <linux/zstd_lib.h>
 
-/*-*****************************************************************************
- * Introduction
+/* ======   Helper Functions   ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
  *
- * zstd, short for Zstandard, is a fast lossless compression algorithm,
- * targeting real-time compression scenarios at zlib-level and better
- * compression ratios. The zstd compression library provides in-memory
- * compression and decompression functions. The library supports compression
- * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
- * ultra, should be used with caution, as they require more memory.
- * Compression can be done in:
- *  - a single step, reusing a context (described as Explicit memory management)
- *  - unbounded multiple steps (described as Streaming compression)
- * The compression ratio achievable on small data can be highly improved using
- * compression with a dictionary in:
- *  - a single step (described as Simple dictionary API)
- *  - a single step, reusing a dictionary (described as Fast dictionary API)
- ******************************************************************************/
-
-/*======  Helper functions  ======*/
+ * Return:    The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
 
 /**
- * enum ZSTD_ErrorCode - zstd error codes
+ * zstd_is_error() - tells if a size_t function result is an error code
+ * @code:  The function result to check for error.
  *
- * Functions that return size_t can be checked for errors using ZSTD_isError()
- * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ * Return: Non-zero iff the code is an error.
+ */
+unsigned int zstd_is_error(size_t code);
+
+/**
+ * enum zstd_error_code - zstd error codes
  */
-typedef enum {
-	ZSTD_error_no_error,
-	ZSTD_error_GENERIC,
-	ZSTD_error_prefix_unknown,
-	ZSTD_error_version_unsupported,
-	ZSTD_error_parameter_unknown,
-	ZSTD_error_frameParameter_unsupported,
-	ZSTD_error_frameParameter_unsupportedBy32bits,
-	ZSTD_error_frameParameter_windowTooLarge,
-	ZSTD_error_compressionParameter_unsupported,
-	ZSTD_error_init_missing,
-	ZSTD_error_memory_allocation,
-	ZSTD_error_stage_wrong,
-	ZSTD_error_dstSize_tooSmall,
-	ZSTD_error_srcSize_wrong,
-	ZSTD_error_corruption_detected,
-	ZSTD_error_checksum_wrong,
-	ZSTD_error_tableLog_tooLarge,
-	ZSTD_error_maxSymbolValue_tooLarge,
-	ZSTD_error_maxSymbolValue_tooSmall,
-	ZSTD_error_dictionary_corrupted,
-	ZSTD_error_dictionary_wrong,
-	ZSTD_error_dictionaryCreation_failed,
-	ZSTD_error_maxCode
-} ZSTD_ErrorCode;
+typedef ZSTD_ErrorCode zstd_error_code;
 
 /**
- * ZSTD_maxCLevel() - maximum compression level available
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code:  The function result for which zstd_is_error(code) is true.
  *
- * Return: Maximum compression level available.
+ * Return: A unique error code for this error.
  */
-int ZSTD_maxCLevel(void);
+zstd_error_code zstd_get_error_code(size_t code);
+
 /**
- * ZSTD_compressBound() - maximum compressed size in worst case scenario
- * @srcSize: The size of the data to compress.
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code:  The function result for which zstd_is_error(code) is true.
  *
- * Return:   The maximum compressed size in the worst case scenario.
+ * Return: An error string corresponding to the error code.
  */
-size_t ZSTD_compressBound(size_t srcSize);
+const char *zstd_get_error_name(size_t code);
+
 /**
- * ZSTD_isError() - tells if a size_t function result is an error code
- * @code:  The function result to check for error.
+ * zstd_min_clevel() - minimum allowed compression level
  *
- * Return: Non-zero iff the code is an error.
+ * Return: The minimum allowed compression level.
  */
-static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
-{
-	return code > (size_t)-ZSTD_error_maxCode;
-}
+int zstd_min_clevel(void);
+
 /**
- * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
- * @functionResult: The result of a function for which ZSTD_isError() is true.
+ * zstd_max_clevel() - maximum allowed compression level
  *
- * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
- *                  if the functionResult isn't an error.
+ * Return: The maximum allowed compression level.
  */
-static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
-	size_t functionResult)
-{
-	if (!ZSTD_isError(functionResult))
-		return (ZSTD_ErrorCode)0;
-	return (ZSTD_ErrorCode)(0 - functionResult);
-}
+int zstd_max_clevel(void);
+
+/* ======   Parameter Selection   ====== */
 
 /**
- * enum ZSTD_strategy - zstd compression search strategy
+ * enum zstd_strategy - zstd compression search strategy
  *
- * From faster to stronger.
+ * From faster to stronger. See zstd_lib.h.
  */
-typedef enum {
-	ZSTD_fast,
-	ZSTD_dfast,
-	ZSTD_greedy,
-	ZSTD_lazy,
-	ZSTD_lazy2,
-	ZSTD_btlazy2,
-	ZSTD_btopt,
-	ZSTD_btopt2
-} ZSTD_strategy;
+typedef ZSTD_strategy zstd_strategy;
 
 /**
- * struct ZSTD_compressionParameters - zstd compression parameters
+ * struct zstd_compression_parameters - zstd compression parameters
  * @windowLog:    Log of the largest match distance. Larger means more
  *                compression, and more memory needed during decompression.
- * @chainLog:     Fully searched segment. Larger means more compression, slower,
- *                and more memory (useless for fast).
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
  * @hashLog:      Dispatch table. Larger means more compression,
  *                slower, and more memory.
  * @searchLog:    Number of searches. Larger means more compression and slower.
@@ -141,1017 +99,342 @@ typedef enum {
  * @targetLength: Acceptable match size for optimal parser (only). Larger means
  *                more compression, and slower.
  * @strategy:     The zstd compression strategy.
+ *
+ * See zstd_lib.h.
  */
-typedef struct {
-	unsigned int windowLog;
-	unsigned int chainLog;
-	unsigned int hashLog;
-	unsigned int searchLog;
-	unsigned int searchLength;
-	unsigned int targetLength;
-	ZSTD_strategy strategy;
-} ZSTD_compressionParameters;
+typedef ZSTD_compressionParameters zstd_compression_parameters;
 
 /**
- * struct ZSTD_frameParameters - zstd frame parameters
- * @contentSizeFlag: Controls whether content size will be present in the frame
- *                   header (when known).
- * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
- *                   of the frame for error detection.
- * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
- *                   when using dictionary compression.
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
  *
- * The default value is all fields set to 0.
+ * The default value is all fields set to 0. See zstd_lib.h.
  */
-typedef struct {
-	unsigned int contentSizeFlag;
-	unsigned int checksumFlag;
-	unsigned int noDictIDFlag;
-} ZSTD_frameParameters;
+typedef ZSTD_frameParameters zstd_frame_parameters;
 
 /**
- * struct ZSTD_parameters - zstd parameters
+ * struct zstd_parameters - zstd parameters
  * @cParams: The compression parameters.
  * @fParams: The frame parameters.
  */
-typedef struct {
-	ZSTD_compressionParameters cParams;
-	ZSTD_frameParameters fParams;
-} ZSTD_parameters;
+typedef ZSTD_parameters zstd_parameters;
 
 /**
- * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level:              The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ *                      if unknown.
  *
- * Return:            The selected ZSTD_compressionParameters.
+ * Return:              The selected zstd_parameters.
  */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
+zstd_parameters zstd_get_params(int level,
+	unsigned long long estimated_src_size);
 
-/**
- * ZSTD_getParams() - returns ZSTD_parameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
- *
- * The same as ZSTD_getCParams() except also selects the default frame
- * parameters (all zero).
- *
- * Return:            The selected ZSTD_parameters.
- */
-ZSTD_parameters ZSTD_getParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
+/* ======   Single-pass Compression   ====== */
 
-/*-*************************************
- * Explicit memory management
- **************************************/
+typedef ZSTD_CCtx zstd_cctx;
 
 /**
- * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
  *
  * If multiple compression parameters might be used, the caller must call
- * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
  * size.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCCtx().
+ * Return:      A lower bound on the size of the workspace that is passed to
+ *              zstd_init_cctx().
  */
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
 
 /**
- * struct ZSTD_CCtx - the zstd compression context
- *
- * When compressing many times it is recommended to allocate a context just once
- * and reuse it for each successive compression operation.
- */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-/**
- * ZSTD_initCCtx() - initialize a zstd compression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A compression context emplaced into workspace.
- */
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_compressCCtx() - compress src into dst
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, ZSTD_parameters params);
-
-/**
- * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
- *
- * Return: A lower bound on the size of the workspace that is passed to
- *         ZSTD_initDCtx().
- */
-size_t ZSTD_DCtxWorkspaceBound(void);
-
-/**
- * struct ZSTD_DCtx - the zstd decompression context
- *
- * When decompressing many times it is recommended to allocate a context just
- * once and reuse it for each successive decompression operation.
- */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-/**
- * ZSTD_initDCtx() - initialize a zstd decompression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A decompression context emplaced into workspace.
- */
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-
-/*-************************
- * Simple dictionary API
- **************************/
-
-/**
- * ZSTD_compress_usingDict() - compress src into dst using a dictionary
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @dict:        The dictionary to use for compression.
- * @dictSize:    The size of the dictionary.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a predefined dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize,
-	ZSTD_parameters params);
-
-/**
- * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @dict:        The dictionary to use for decompression. The same dictionary
- *               must've been used to compress the data.
- * @dictSize:    The size of the dictionary.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize);
-
-/*-**************************
- * Fast dictionary API
- ***************************/
-
-/**
- * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
- * @cParams: The compression parameters to be used for compression.
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCDict().
- */
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CDict - a digested dictionary to be used for compression
+ * Return:          A zstd compression context or NULL on error.
  */
-typedef struct ZSTD_CDict_s ZSTD_CDict;
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_initCDict() - initialize a digested dictionary for compression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
- * @dictSize:      The size of the dictionary.
- * @params:        The parameters to use for compression. See ZSTD_getParams().
- * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_CDictWorkspaceBound(params.cParams).
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx:         The context. Must have been initialized with zstd_init_cctx().
+ * @dst:          The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:          The data to compress.
+ * @src_size:     The size of the data to compress.
+ * @parameters:   The compression parameters to be used.
  *
- * When compressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_CDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_CDict.
- *
- * Return:         The digested dictionary emplaced into workspace.
+ * Return:        The compressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
-	ZSTD_parameters params, void *workspace, size_t workspaceSize);
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size, const zstd_parameters *parameters);
 
-/**
- * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
- *               cParams are the compression parameters used to initialize the
- *               cdict.
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @cdict:       The digested dictionary to use for compression.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a digested dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const ZSTD_CDict *cdict);
+/* ======   Single-pass Decompression   ====== */
 
+typedef ZSTD_DCtx zstd_dctx;
 
 /**
- * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
  *
- * Return:  A lower bound on the size of the workspace that is passed to
- *          ZSTD_initDDict().
- */
-size_t ZSTD_DDictWorkspaceBound(void);
-
-/**
- * struct ZSTD_DDict - a digested dictionary to be used for decompression
+ * Return: A lower bound on the size of the workspace that is passed to
+ *         zstd_init_dctx().
  */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
+size_t zstd_dctx_workspace_bound(void);
 
 /**
- * ZSTD_initDDict() - initialize a digested dictionary for decompression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
- * @dictSize:      The size of the dictionary.
- * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_DDictWorkspaceBound().
- *
- * When decompressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_DDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_DDict.
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * Return:         The digested dictionary emplaced into workspace.
+ * Return:          A zstd decompression context or NULL on error.
  */
-ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
-	void *workspace, size_t workspaceSize);
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @ddict:       The digested dictionary to use for decompression. The same
- *               dictionary must've been used to compress the data.
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx:         The decompression context.
+ * @dst:          The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ *                as the decompressed size. If the caller cannot upper bound the
+ *                decompressed size, then it's better to use the streaming API.
+ * @src:          The zstd compressed data to decompress. Multiple concatenated
+ *                frames and skippable frames are allowed.
+ * @src_size:     The exact size of the data to decompress.
  *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * Return:        The decompressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
-	size_t dstCapacity, const void *src, size_t srcSize,
-	const ZSTD_DDict *ddict);
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size);
 
-
-/*-**************************
- * Streaming
- ***************************/
+/* ======   Streaming Buffers   ====== */
 
 /**
- * struct ZSTD_inBuffer - input buffer for streaming
+ * struct zstd_in_buffer - input buffer for streaming
  * @src:  Start of the input buffer.
  * @size: Size of the input buffer.
  * @pos:  Position where reading stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_inBuffer_s {
-	const void *src;
-	size_t size;
-	size_t pos;
-} ZSTD_inBuffer;
+typedef ZSTD_inBuffer zstd_in_buffer;
 
 /**
- * struct ZSTD_outBuffer - output buffer for streaming
+ * struct zstd_out_buffer - output buffer for streaming
  * @dst:  Start of the output buffer.
  * @size: Size of the output buffer.
  * @pos:  Position where writing stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_outBuffer_s {
-	void *dst;
-	size_t size;
-	size_t pos;
-} ZSTD_outBuffer;
+typedef ZSTD_outBuffer zstd_out_buffer;
 
+/* ======   Streaming Compression   ====== */
 
-
-/*-*****************************************************************************
- * Streaming compression - HowTo
- *
- * A ZSTD_CStream object is required to track streaming operation.
- * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
- * ZSTD_CStream objects can be reused multiple times on consecutive compression
- * operations. It is recommended to re-use ZSTD_CStream in situations where many
- * streaming operations will be achieved consecutively. Use one separate
- * ZSTD_CStream per thread for parallel execution.
- *
- * Use ZSTD_compressStream() repetitively to consume input stream.
- * The function will automatically update both `pos` fields.
- * Note that it may not consume the entire input, in which case `pos < size`,
- * and it's up to the caller to present again remaining data.
- * It returns a hint for the preferred number of bytes to use as an input for
- * the next function call.
- *
- * At any moment, it's possible to flush whatever data remains within internal
- * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
- * still be some content left within the internal buffer if `output->size` is
- * too small. It returns the number of bytes left in the internal buffer and
- * must be called until it returns 0.
- *
- * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
- * write frame epilogue. The epilogue is required for decoders to consider a
- * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
- * the full content if `output->size` is too small. In which case, call again
- * ZSTD_endStream() to complete the flush. It returns the number of bytes left
- * in the internal buffer and must be called until it returns 0.
- ******************************************************************************/
+typedef ZSTD_CStream zstd_cstream;
 
 /**
- * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
  *
  * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
- */
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CStream - the zstd streaming compression context
+ *           zstd_init_cstream().
  */
-typedef struct ZSTD_CStream_s ZSTD_CStream;
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
 
-/*===== ZSTD_CStream management functions =====*/
 /**
- * ZSTD_initCStream() - initialize a zstd streaming compression context
- * @params:         The zstd compression parameters.
- * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
- *                  pass the source size (zero means empty source). Otherwise,
- *                  the caller may optionally pass the source size, or zero if
- *                  unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace.
- *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
- *                  how large the workspace must be.
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters        The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ *                    must pass the source size (zero means empty source).
+ *                    Otherwise, the caller may optionally pass the source
+ *                    size, or zero if unknown.
+ * @workspace:        The workspace to emplace the context into. It must outlive
+ *                    the returned context.
+ * @workspace_size:   The size of workspace.
+ *                    Use zstd_cstream_workspace_bound(params->cparams) to
+ *                    determine how large the workspace must be.
  *
- * Return:          The zstd streaming compression context.
+ * Return:            The zstd streaming compression context or NULL on error.
  */
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+	unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
- * @cdict:          The digested dictionary to use for compression.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
- *                  with the cParams used to initialize the cdict to determine
- *                  how large the workspace must be.
- *
- * Return:          The zstd streaming compression context.
- */
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
-
-/*===== Streaming compression functions =====*/
-/**
- * ZSTD_resetCStream() - reset the context using parameters from creation
- * @zcs:            The zstd streaming compression context to reset.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream:          The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
  *
  * Resets the context using the parameters from creation. Skips dictionary
- * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
  * content size is always written into the frame header.
  *
- * Return:          Zero or an error, which can be checked using ZSTD_isError().
+ * Return:            Zero or an error, which can be checked using
+ *                    zstd_is_error().
  */
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+	unsigned long long pledged_src_size);
+
 /**
- * ZSTD_compressStream() - streaming compress some of input into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- * @input:  Source buffer. `input->pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input->pos < input->size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ * @input:   Source buffer. `input->pos` is updated to indicate how much data
+ *           was read. Note that it may not consume the entire input, in which
+ *           case `input->pos < input->size`, and it's up to the caller to
+ *           present remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
  *
- * Return:  A hint for the number of bytes to use as the input for the next
- *          function call or an error, which can be checked using
- *          ZSTD_isError().
+ * Return:   A hint for the number of bytes to use as the input for the next
+ *           function call or an error, which can be checked using
+ *           zstd_is_error().
  */
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
+
 /**
- * ZSTD_flushStream() - flush internal buffers into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * ZSTD_flushStream() must be called until it returns 0, meaning all the data
- * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
  * calling it too often will degrade the compression ratio.
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
-/**
- * ZSTD_endStream() - flush internal buffers into output and end the frame
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- *
- * ZSTD_endStream() must be called until it returns 0, meaning all the data has
- * been flushed and the frame epilogue has been written.
- *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /**
- * ZSTD_CStreamInSize() - recommended size for the input buffer
- *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_CStreamInSize(void);
-/**
- * ZSTD_CStreamOutSize() - recommended size for the output buffer
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete compressed block.
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
  *
- * Return: The recommended size for the output buffer.
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_CStreamOutSize(void);
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
+/* ======   Streaming Decompression   ====== */
 
-
-/*-*****************************************************************************
- * Streaming decompression - HowTo
- *
- * A ZSTD_DStream object is required to track streaming operations.
- * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
- * ZSTD_DStream objects can be re-used multiple times.
- *
- * Use ZSTD_decompressStream() repetitively to consume your input.
- * The function will update both `pos` fields.
- * If `input->pos < input->size`, some input has not been consumed.
- * It's up to the caller to present again remaining data.
- * If `output->pos < output->size`, decoder has flushed everything it could.
- * Returns 0 iff a frame is completely decoded and fully flushed.
- * Otherwise it returns a suggested next input size that will never load more
- * than the current frame.
- ******************************************************************************/
+typedef ZSTD_DStream zstd_dstream;
 
 /**
- * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
- * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
  *
- * Return:         A lower bound on the size of the workspace that is passed to
- *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+ * Return:           A lower bound on the size of the workspace that is passed
+ *                   to zstd_init_dstream().
  */
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
 
 /**
- * struct ZSTD_DStream - the zstd streaming decompression context
- */
-typedef struct ZSTD_DStream_s ZSTD_DStream;
-/*===== ZSTD_DStream management functions =====*/
-/**
- * ZSTD_initDStream() - initialize a zstd streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
-	size_t workspaceSize);
-/**
- * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @ddict:         The digested dictionary to use for decompression.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace:       The workspace to emplace the context into. It must outlive
+ *                   the returned context.
+ * @workspaceSize:   The size of workspace.
+ *                   Use zstd_dstream_workspace_bound(max_window_size) to
+ *                   determine how large the workspace must be.
  *
- * Return:         The zstd streaming decompression context.
+ * Return:           The zstd streaming decompression context.
  */
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
-	const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+	size_t workspace_size);
 
-/*===== Streaming decompression functions =====*/
 /**
- * ZSTD_resetDStream() - reset the context using parameters from creation
- * @zds:   The zstd streaming decompression context to reset.
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
  *
  * Resets the context using the parameters from creation. Skips dictionary
  * loading, since it can be reused.
  *
- * Return: Zero or an error, which can be checked using ZSTD_isError().
+ * Return:   Zero or an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_resetDStream(ZSTD_DStream *zds);
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
 /**
- * ZSTD_decompressStream() - streaming decompress some of input into output
- * @zds:    The zstd streaming decompression context.
- * @output: Destination buffer. `output.pos` is updated to indicate how much
- *          decompressed data was written.
- * @input:  Source buffer. `input.pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input.pos < input.size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output:  Destination buffer. `output.pos` is updated to indicate how much
+ *           decompressed data was written.
+ * @input:   Source buffer. `input.pos` is updated to indicate how much data was
+ *           read. Note that it may not consume the entire input, in which case
+ *           `input.pos < input.size`, and it's up to the caller to present
+ *           remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
- * ZSTD_decompressStream() will not consume the last byte of the frame until
+ * zstd_decompress_stream() will not consume the last byte of the frame until
  * the entire frame is flushed.
  *
- * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
- *          Otherwise returns a hint for the number of bytes to use as the input
- *          for the next function call or an error, which can be checked using
- *          ZSTD_isError(). The size hint will never load more than the frame.
- */
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
-
-/**
- * ZSTD_DStreamInSize() - recommended size for the input buffer
- *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_DStreamInSize(void);
-/**
- * ZSTD_DStreamOutSize() - recommended size for the output buffer
- *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete decompressed block.
- *
- * Return: The recommended size for the output buffer.
- */
-size_t ZSTD_DStreamOutSize(void);
-
-
-/* --- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
-
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-
-#define ZSTD_WINDOWLOG_MAX_32  27
-#define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX \
-	((unsigned int)(sizeof(size_t) == 4 \
-		? ZSTD_WINDOWLOG_MAX_32 \
-		: ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
-#define ZSTD_HASHLOG_MIN        6
-#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
-#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_HASHLOG3_MAX      17
-#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN      1
-/* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_SEARCHLENGTH_MAX   7
-/* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_SEARCHLENGTH_MIN   3
-#define ZSTD_TARGETLENGTH_MIN   4
-#define ZSTD_TARGETLENGTH_MAX 999
-
-/* for static allocation */
-#define ZSTD_FRAMEHEADERSIZE_MAX 18
-#define ZSTD_FRAMEHEADERSIZE_MIN  6
-#define ZSTD_frameHeaderSize_prefix 5
-#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
-#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
-/* magic number + skippable frame length */
-#define ZSTD_skippableHeaderSize 8
-
-
-/*-*************************************
- * Compressed size functions
- **************************************/
-
-/**
- * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
- * @src:     Source buffer. It should point to the start of a zstd encoded frame
- *           or a skippable frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           size of the frame.
- *
- * Return:   The compressed size of the frame pointed to by `src` or an error,
- *           which can be check with ZSTD_isError().
- *           Suitable to pass to ZSTD_decompress() or similar functions.
- */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Decompressed size functions
- **************************************/
-/**
- * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
- * @src:     It should point to the start of a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The frame content size stored in the frame header if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
- *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
- */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
-
-/**
- * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
- * @src:     It should point to the start of a series of zstd encoded and/or
- *           skippable frames.
- * @srcSize: The exact size of the series of frames.
- *
- * If any zstd encoded frame in the series doesn't have the frame content size
- * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
- * set when using ZSTD_compress(). The decompressed size can be very large.
- * If the source is untrusted, the decompressed size could be wrong or
- * intentionally modified. Always ensure the result fits within the
- * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
- * frames, and so it must traverse the input to read each frame header. This is
- * efficient as most of the data is skipped, however it does mean that all frame
- * data must be present and valid.
- *
- * Return:   Decompressed size of all the data contained in the frames if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
- *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
- */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Advanced compression functions
- **************************************/
-/**
- * ZSTD_checkCParams() - ensure parameter values remain within authorized range
- * @cParams: The zstd compression parameters.
- *
- * Return:   Zero or an error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
-
-/**
- * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
- * @srcSize:  Optionally the estimated source size, or zero if unknown.
- * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
- *
- * Return:    The optimized parameters.
- */
-ZSTD_compressionParameters ZSTD_adjustCParams(
-	ZSTD_compressionParameters cParams, unsigned long long srcSize,
-	size_t dictSize);
-
-/*--- Advanced decompression functions ---*/
-
-/**
- * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
- * @buffer: The source buffer to check.
- * @size:   The size of the source buffer, must be at least 4 bytes.
- *
- * Return: True iff the buffer starts with a zstd or skippable frame identifier.
- */
-unsigned int ZSTD_isFrame(const void *buffer, size_t size);
-
-/**
- * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
- * @dict:     The dictionary buffer.
- * @dictSize: The size of the dictionary buffer.
- *
- * Return:    The dictionary id stored within the dictionary or 0 if the
- *            dictionary is not a zstd dictionary. If it returns 0 the
- *            dictionary can still be loaded as a content-only dictionary.
+ * Return:   Returns 0 iff a frame is completely decoded and fully flushed.
+ *           Otherwise returns a hint for the number of bytes to use as the
+ *           input for the next function call or an error, which can be checked
+ *           using zstd_is_error(). The size hint will never load more than the
+ *           frame.
  */
-unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input);
 
-/**
- * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
- * @ddict: The ddict to find the id of.
- *
- * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
- *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
- *         content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
+/* ======   Frame Inspection Functions ====== */
 
 /**
- * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
- * @src:     Source buffer. It must be a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src:      Source buffer. It should point to the start of a zstd encoded
+ *            frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ *            size of the frame.
  *
- * Return:   The dictionary id required to decompress the frame stored within
- *           `src` or 0 if the dictionary id could not be decoded. It can return
- *           0 if the frame does not require a dictionary, the dictionary id
- *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
- *           is too small.
+ * Return:    The compressed size of the frame pointed to by `src` or an error,
+ *            which can be check with zstd_is_error().
+ *            Suitable to pass to ZSTD_decompress() or similar functions.
  */
-unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
 
 /**
- * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
  * @frameContentSize: The frame content size, or 0 if not present.
  * @windowSize:       The window size, or 0 if the frame is a skippable frame.
  * @dictID:           The dictionary id, or 0 if not present.
  * @checksumFlag:     Whether a checksum was used.
  */
-typedef struct {
-	unsigned long long frameContentSize;
-	unsigned int windowSize;
-	unsigned int dictID;
-	unsigned int checksumFlag;
-} ZSTD_frameParams;
+typedef ZSTD_frameParams zstd_frame_header;
 
 /**
- * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
- * @fparamsPtr: On success the frame parameters are written here.
- * @src:        The source buffer. It must point to a zstd or skippable frame.
- * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
- *              always large enough to succeed.
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params:   On success the frame parameters are written here.
+ * @src:      The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
  *
- * Return:      0 on success. If more data is required it returns how many bytes
- *              must be provided to make forward progress. Otherwise it returns
- *              an error, which can be checked using ZSTD_isError().
+ * Return:    0 on success. If more data is required it returns how many bytes
+ *            must be provided to make forward progress. Otherwise it returns
+ *            an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
-	size_t srcSize);
-
-/*-*****************************************************************************
- * Buffer-less and synchronous inner streaming functions
- *
- * This is an advanced API, giving full control over buffer management, for
- * users which need direct control over memory.
- * But it's also a complex one, with many restrictions (documented below).
- * Prefer using normal streaming API for an easier experience
- ******************************************************************************/
-
-/*-*****************************************************************************
- * Buffer-less streaming compression (synchronous mode)
- *
- * A ZSTD_CCtx object is required to track streaming operations.
- * Use ZSTD_initCCtx() to initialize a context.
- * ZSTD_CCtx object can be re-used multiple times within successive compression
- * operations.
- *
- * Start by initializing a context.
- * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
- * compression,
- * or ZSTD_compressBegin_advanced(), for finer parameter control.
- * It's also possible to duplicate a reference context which has already been
- * initialized, using ZSTD_copyCCtx()
- *
- * Then, consume your input using ZSTD_compressContinue().
- * There are some important considerations to keep in mind when using this
- * advanced function :
- * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
- *   buffer only.
- * - Interface is synchronous : input is consumed entirely and produce 1+
- *   (or more) compressed blocks.
- * - Caller must ensure there is enough space in `dst` to store compressed data
- *   under worst case scenario. Worst case evaluation is provided by
- *   ZSTD_compressBound().
- *   ZSTD_compressContinue() doesn't guarantee recover after a failed
- *   compression.
- * - ZSTD_compressContinue() presumes prior input ***is still accessible and
- *   unmodified*** (up to maximum distance size, see WindowLog).
- *   It remembers all previous contiguous blocks, plus one separated memory
- *   segment (which can itself consists of multiple contiguous blocks)
- * - ZSTD_compressContinue() detects that prior input has been overwritten when
- *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
- *   section from its history.
- *
- * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
- * and optional checksum. It's possible to use srcSize==0, in which case, it
- * will write a final empty block to end the frame. Without last block mark,
- * frames will be considered unfinished (corrupted) by decoders.
- *
- * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
- * frame.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming compression functions  =====*/
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, ZSTD_parameters params,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-
-
-
-/*-*****************************************************************************
- * Buffer-less streaming decompression (synchronous mode)
- *
- * A ZSTD_DCtx object is required to track streaming operations.
- * Use ZSTD_initDCtx() to initialize a context.
- * A ZSTD_DCtx object can be re-used multiple times.
- *
- * First typical operation is to retrieve frame parameters, using
- * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
- * important information to correctly decode the frame, such as the minimum
- * rolling buffer size to allocate to decompress data (`windowSize`), and the
- * dictionary ID used.
- * Note: content size is optional, it may not be present. 0 means unknown.
- * Note that these values could be wrong, either because of data malformation,
- * or because an attacker is spoofing deliberate false information. As a
- * consequence, check that values remain within valid application range,
- * especially `windowSize`, before allocation. Each application can set its own
- * limit, depending on local restrictions. For extended interoperability, it is
- * recommended to support at least 8 MB.
- * Frame parameters are extracted from the beginning of the compressed frame.
- * Data fragment must be large enough to ensure successful decoding, typically
- * `ZSTD_frameHeaderSize_max` bytes.
- * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
- *        >0: `srcSize` is too small, provide at least this many bytes.
- *        errorCode, which can be tested using ZSTD_isError().
- *
- * Start decompression, with ZSTD_decompressBegin() or
- * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
- * context, using ZSTD_copyDCtx().
- *
- * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
- * alternatively.
- * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
- * to ZSTD_decompressContinue().
- * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
- * fail.
- *
- * The result of ZSTD_decompressContinue() is the number of bytes regenerated
- * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
- * error; it just means ZSTD_decompressContinue() has decoded some metadata
- * item. It can also be an error code, which can be tested with ZSTD_isError().
- *
- * ZSTD_decompressContinue() needs previous data blocks during decompression, up
- * to `windowSize`. They should preferably be located contiguously, prior to
- * current block. Alternatively, a round buffer of sufficient size is also
- * possible. Sufficient size is determined by frame parameters.
- * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
- * follow each other, make sure that either the compressor breaks contiguity at
- * the same place, or that previous contiguous segment is large enough to
- * properly handle maximum back-reference.
- *
- * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
- * Context can then be reset to start a new decompression.
- *
- * Note: it's possible to know if next input to present is a header or a block,
- * using ZSTD_nextInputType(). This information is not required to properly
- * decode a frame.
- *
- * == Special case: skippable frames ==
- *
- * Skippable frames allow integration of user-defined data into a flow of
- * concatenated frames. Skippable frames will be ignored (skipped) by a
- * decompressor. The format of skippable frames is as follows:
- * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
- *    0x184D2A50 to 0x184D2A5F
- * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
- * c) Frame Content - any content (User Data) of length equal to Frame Size
- * For skippable frames ZSTD_decompressContinue() always returns 0.
- * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
- * what means that a frame is skippable.
- * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
- *       actually be a zstd encoded frame with no content. For purposes of
- *       decompression, it is valid in both cases to skip the frame using
- *       ZSTD_findFrameCompressedSize() to find its size in bytes.
- * It also returns frame size as fparamsPtr->frameContentSize.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming decompression functions  =====*/
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
-	size_t dictSize);
-void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-typedef enum {
-	ZSTDnit_frameHeader,
-	ZSTDnit_blockHeader,
-	ZSTDnit_block,
-	ZSTDnit_lastBlock,
-	ZSTDnit_checksum,
-	ZSTDnit_skippableFrame
-} ZSTD_nextInputType_e;
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
-
-/*-*****************************************************************************
- * Block functions
- *
- * Block functions produce and decode raw zstd blocks, without frame metadata.
- * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
- * very small blocks (< 100 bytes). User will have to take in charge required
- * information to regenerate data, such as compressed and content sizes.
- *
- * A few rules to respect:
- * - Compressing and decompressing require a context structure
- *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
- * - It is necessary to init context before starting
- *   + compression : ZSTD_compressBegin()
- *   + decompression : ZSTD_decompressBegin()
- *   + variants _usingDict() are also allowed
- *   + copyCCtx() and copyDCtx() work too
- * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- *   + If you need to compress more, cut data into multiple blocks
- *   + Consider using the regular ZSTD_compress() instead, as frame metadata
- *     costs become negligible when source size is large.
- * - When a block is considered not compressible enough, ZSTD_compressBlock()
- *   result will be zero. In which case, nothing is produced into `dst`.
- *   + User must test for such outcome and deal directly with uncompressed data
- *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
- *   + In case of multiple successive blocks, decoder must be informed of
- *     uncompressed block existence to follow proper history. Use
- *     ZSTD_insertBlock() in such a case.
- ******************************************************************************/
-
-/* Define for static allocation */
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
-/*=====   Raw zstd block functions  =====*/
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
-	size_t blockSize);
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+	size_t src_size);
 
-#endif  /* ZSTD_H */
+#endif  /* LINUX_ZSTD_H */
diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
new file mode 100644
index 000000000000..13151c34f725
--- /dev/null
+++ b/include/linux/zstd_lib.h
@@ -0,0 +1,1157 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd.
+ * An additional grant of patent rights can be found in the PATENTS file in the
+ * same directory.
+ *
+ * This program is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU General Public License version 2 as published by the
+ * Free Software Foundation. This program is dual-licensed; you may select
+ * either version 2 of the GNU General Public License ("GPL") or BSD license
+ * ("BSD").
+ */
+
+#ifndef ZSTD_H
+#define ZSTD_H
+
+/* ======   Dependency   ======*/
+#include <linux/types.h>   /* size_t */
+
+
+/*-*****************************************************************************
+ * Introduction
+ *
+ * zstd, short for Zstandard, is a fast lossless compression algorithm,
+ * targeting real-time compression scenarios at zlib-level and better
+ * compression ratios. The zstd compression library provides in-memory
+ * compression and decompression functions. The library supports compression
+ * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
+ * ultra, should be used with caution, as they require more memory.
+ * Compression can be done in:
+ *  - a single step, reusing a context (described as Explicit memory management)
+ *  - unbounded multiple steps (described as Streaming compression)
+ * The compression ratio achievable on small data can be highly improved using
+ * compression with a dictionary in:
+ *  - a single step (described as Simple dictionary API)
+ *  - a single step, reusing a dictionary (described as Fast dictionary API)
+ ******************************************************************************/
+
+/*======  Helper functions  ======*/
+
+/**
+ * enum ZSTD_ErrorCode - zstd error codes
+ *
+ * Functions that return size_t can be checked for errors using ZSTD_isError()
+ * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ */
+typedef enum {
+	ZSTD_error_no_error,
+	ZSTD_error_GENERIC,
+	ZSTD_error_prefix_unknown,
+	ZSTD_error_version_unsupported,
+	ZSTD_error_parameter_unknown,
+	ZSTD_error_frameParameter_unsupported,
+	ZSTD_error_frameParameter_unsupportedBy32bits,
+	ZSTD_error_frameParameter_windowTooLarge,
+	ZSTD_error_compressionParameter_unsupported,
+	ZSTD_error_init_missing,
+	ZSTD_error_memory_allocation,
+	ZSTD_error_stage_wrong,
+	ZSTD_error_dstSize_tooSmall,
+	ZSTD_error_srcSize_wrong,
+	ZSTD_error_corruption_detected,
+	ZSTD_error_checksum_wrong,
+	ZSTD_error_tableLog_tooLarge,
+	ZSTD_error_maxSymbolValue_tooLarge,
+	ZSTD_error_maxSymbolValue_tooSmall,
+	ZSTD_error_dictionary_corrupted,
+	ZSTD_error_dictionary_wrong,
+	ZSTD_error_dictionaryCreation_failed,
+	ZSTD_error_maxCode
+} ZSTD_ErrorCode;
+
+/**
+ * ZSTD_maxCLevel() - maximum compression level available
+ *
+ * Return: Maximum compression level available.
+ */
+int ZSTD_maxCLevel(void);
+/**
+ * ZSTD_compressBound() - maximum compressed size in worst case scenario
+ * @srcSize: The size of the data to compress.
+ *
+ * Return:   The maximum compressed size in the worst case scenario.
+ */
+size_t ZSTD_compressBound(size_t srcSize);
+/**
+ * ZSTD_isError() - tells if a size_t function result is an error code
+ * @code:  The function result to check for error.
+ *
+ * Return: Non-zero iff the code is an error.
+ */
+static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
+{
+	return code > (size_t)-ZSTD_error_maxCode;
+}
+/**
+ * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
+ * @functionResult: The result of a function for which ZSTD_isError() is true.
+ *
+ * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
+ *                  if the functionResult isn't an error.
+ */
+static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
+	size_t functionResult)
+{
+	if (!ZSTD_isError(functionResult))
+		return (ZSTD_ErrorCode)0;
+	return (ZSTD_ErrorCode)(0 - functionResult);
+}
+
+/**
+ * enum ZSTD_strategy - zstd compression search strategy
+ *
+ * From faster to stronger.
+ */
+typedef enum {
+	ZSTD_fast,
+	ZSTD_dfast,
+	ZSTD_greedy,
+	ZSTD_lazy,
+	ZSTD_lazy2,
+	ZSTD_btlazy2,
+	ZSTD_btopt,
+	ZSTD_btopt2
+} ZSTD_strategy;
+
+/**
+ * struct ZSTD_compressionParameters - zstd compression parameters
+ * @windowLog:    Log of the largest match distance. Larger means more
+ *                compression, and more memory needed during decompression.
+ * @chainLog:     Fully searched segment. Larger means more compression, slower,
+ *                and more memory (useless for fast).
+ * @hashLog:      Dispatch table. Larger means more compression,
+ *                slower, and more memory.
+ * @searchLog:    Number of searches. Larger means more compression and slower.
+ * @searchLength: Match length searched. Larger means faster decompression,
+ *                sometimes less compression.
+ * @targetLength: Acceptable match size for optimal parser (only). Larger means
+ *                more compression, and slower.
+ * @strategy:     The zstd compression strategy.
+ */
+typedef struct {
+	unsigned int windowLog;
+	unsigned int chainLog;
+	unsigned int hashLog;
+	unsigned int searchLog;
+	unsigned int searchLength;
+	unsigned int targetLength;
+	ZSTD_strategy strategy;
+} ZSTD_compressionParameters;
+
+/**
+ * struct ZSTD_frameParameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the frame
+ *                   header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
+ *                   of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
+ *                   when using dictionary compression.
+ *
+ * The default value is all fields set to 0.
+ */
+typedef struct {
+	unsigned int contentSizeFlag;
+	unsigned int checksumFlag;
+	unsigned int noDictIDFlag;
+} ZSTD_frameParameters;
+
+/**
+ * struct ZSTD_parameters - zstd parameters
+ * @cParams: The compression parameters.
+ * @fParams: The frame parameters.
+ */
+typedef struct {
+	ZSTD_compressionParameters cParams;
+	ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+/**
+ * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
+ * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ *
+ * Return:            The selected ZSTD_compressionParameters.
+ */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
+	unsigned long long estimatedSrcSize, size_t dictSize);
+
+/**
+ * ZSTD_getParams() - returns ZSTD_parameters for selected level
+ * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
+ * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
+ * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ *
+ * The same as ZSTD_getCParams() except also selects the default frame
+ * parameters (all zero).
+ *
+ * Return:            The selected ZSTD_parameters.
+ */
+ZSTD_parameters ZSTD_getParams(int compressionLevel,
+	unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*-*************************************
+ * Explicit memory management
+ **************************************/
+
+/**
+ * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * If multiple compression parameters might be used, the caller must call
+ * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
+ * size.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCCtx().
+ */
+size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CCtx - the zstd compression context
+ *
+ * When compressing many times it is recommended to allocate a context just once
+ * and reuse it for each successive compression operation.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+/**
+ * ZSTD_initCCtx() - initialize a zstd compression context
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
+ *                 determine how large the workspace must be.
+ *
+ * Return:         A compression context emplaced into workspace.
+ */
+ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_compressCCtx() - compress src into dst
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, ZSTD_parameters params);
+
+/**
+ * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
+ *
+ * Return: A lower bound on the size of the workspace that is passed to
+ *         ZSTD_initDCtx().
+ */
+size_t ZSTD_DCtxWorkspaceBound(void);
+
+/**
+ * struct ZSTD_DCtx - the zstd decompression context
+ *
+ * When decompressing many times it is recommended to allocate a context just
+ * once and reuse it for each successive decompression operation.
+ */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+/**
+ * ZSTD_initDCtx() - initialize a zstd decompression context
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
+ *                 determine how large the workspace must be.
+ *
+ * Return:         A decompression context emplaced into workspace.
+ */
+ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+
+/*-************************
+ * Simple dictionary API
+ **************************/
+
+/**
+ * ZSTD_compress_usingDict() - compress src into dst using a dictionary
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @dict:        The dictionary to use for compression.
+ * @dictSize:    The size of the dictionary.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Compression using a predefined dictionary. The same dictionary must be used
+ * during decompression.
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const void *dict, size_t dictSize,
+	ZSTD_parameters params);
+
+/**
+ * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ * @dict:        The dictionary to use for decompression. The same dictionary
+ *               must've been used to compress the data.
+ * @dictSize:    The size of the dictionary.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const void *dict, size_t dictSize);
+
+/*-**************************
+ * Fast dictionary API
+ ***************************/
+
+/**
+ * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCDict().
+ */
+size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CDict - a digested dictionary to be used for compression
+ */
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/**
+ * ZSTD_initCDict() - initialize a digested dictionary for compression
+ * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
+ *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
+ * @dictSize:      The size of the dictionary.
+ * @params:        The parameters to use for compression. See ZSTD_getParams().
+ * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
+ * @workspaceSize: The workspace size. Must be at least
+ *                 ZSTD_CDictWorkspaceBound(params.cParams).
+ *
+ * When compressing multiple messages / blocks with the same dictionary it is
+ * recommended to load it just once. The ZSTD_CDict merely references the
+ * dictBuffer, so it must outlive the returned ZSTD_CDict.
+ *
+ * Return:         The digested dictionary emplaced into workspace.
+ */
+ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
+	ZSTD_parameters params, void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
+ * @ctx:         The context. Must have been initialized with a workspace at
+ *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
+ *               cParams are the compression parameters used to initialize the
+ *               cdict.
+ * @dst:         The buffer to compress src into.
+ * @dstCapacity: The size of the destination buffer. May be any size, but
+ *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:         The data to compress.
+ * @srcSize:     The size of the data to compress.
+ * @cdict:       The digested dictionary to use for compression.
+ * @params:      The parameters to use for compression. See ZSTD_getParams().
+ *
+ * Compression using a digested dictionary. The same dictionary must be used
+ * during decompression.
+ *
+ * Return:       The compressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize, const ZSTD_CDict *cdict);
+
+
+/**
+ * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
+ *
+ * Return:  A lower bound on the size of the workspace that is passed to
+ *          ZSTD_initDDict().
+ */
+size_t ZSTD_DDictWorkspaceBound(void);
+
+/**
+ * struct ZSTD_DDict - a digested dictionary to be used for decompression
+ */
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/**
+ * ZSTD_initDDict() - initialize a digested dictionary for decompression
+ * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
+ *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
+ * @dictSize:      The size of the dictionary.
+ * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
+ * @workspaceSize: The workspace size. Must be at least
+ *                 ZSTD_DDictWorkspaceBound().
+ *
+ * When decompressing multiple messages / blocks with the same dictionary it is
+ * recommended to load it just once. The ZSTD_DDict merely references the
+ * dictBuffer, so it must outlive the returned ZSTD_DDict.
+ *
+ * Return:         The digested dictionary emplaced into workspace.
+ */
+ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
+	void *workspace, size_t workspaceSize);
+
+/**
+ * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
+ * @ctx:         The decompression context.
+ * @dst:         The buffer to decompress src into.
+ * @dstCapacity: The size of the destination buffer. Must be at least as large
+ *               as the decompressed size. If the caller cannot upper bound the
+ *               decompressed size, then it's better to use the streaming API.
+ * @src:         The zstd compressed data to decompress. Multiple concatenated
+ *               frames and skippable frames are allowed.
+ * @srcSize:     The exact size of the data to decompress.
+ * @ddict:       The digested dictionary to use for decompression. The same
+ *               dictionary must've been used to compress the data.
+ *
+ * Return:       The decompressed size or an error, which can be checked using
+ *               ZSTD_isError().
+ */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
+	size_t dstCapacity, const void *src, size_t srcSize,
+	const ZSTD_DDict *ddict);
+
+
+/*-**************************
+ * Streaming
+ ***************************/
+
+/**
+ * struct ZSTD_inBuffer - input buffer for streaming
+ * @src:  Start of the input buffer.
+ * @size: Size of the input buffer.
+ * @pos:  Position where reading stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ */
+typedef struct ZSTD_inBuffer_s {
+	const void *src;
+	size_t size;
+	size_t pos;
+} ZSTD_inBuffer;
+
+/**
+ * struct ZSTD_outBuffer - output buffer for streaming
+ * @dst:  Start of the output buffer.
+ * @size: Size of the output buffer.
+ * @pos:  Position where writing stopped. Will be updated.
+ *        Necessarily 0 <= pos <= size.
+ */
+typedef struct ZSTD_outBuffer_s {
+	void *dst;
+	size_t size;
+	size_t pos;
+} ZSTD_outBuffer;
+
+
+
+/*-*****************************************************************************
+ * Streaming compression - HowTo
+ *
+ * A ZSTD_CStream object is required to track streaming operation.
+ * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
+ * ZSTD_CStream objects can be reused multiple times on consecutive compression
+ * operations. It is recommended to re-use ZSTD_CStream in situations where many
+ * streaming operations will be achieved consecutively. Use one separate
+ * ZSTD_CStream per thread for parallel execution.
+ *
+ * Use ZSTD_compressStream() repetitively to consume input stream.
+ * The function will automatically update both `pos` fields.
+ * Note that it may not consume the entire input, in which case `pos < size`,
+ * and it's up to the caller to present again remaining data.
+ * It returns a hint for the preferred number of bytes to use as an input for
+ * the next function call.
+ *
+ * At any moment, it's possible to flush whatever data remains within internal
+ * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
+ * still be some content left within the internal buffer if `output->size` is
+ * too small. It returns the number of bytes left in the internal buffer and
+ * must be called until it returns 0.
+ *
+ * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
+ * write frame epilogue. The epilogue is required for decoders to consider a
+ * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
+ * the full content if `output->size` is too small. In which case, call again
+ * ZSTD_endStream() to complete the flush. It returns the number of bytes left
+ * in the internal buffer and must be called until it returns 0.
+ ******************************************************************************/
+
+/**
+ * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
+ * @cParams: The compression parameters to be used for compression.
+ *
+ * Return:   A lower bound on the size of the workspace that is passed to
+ *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
+ */
+size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
+
+/**
+ * struct ZSTD_CStream - the zstd streaming compression context
+ */
+typedef struct ZSTD_CStream_s ZSTD_CStream;
+
+/*===== ZSTD_CStream management functions =====*/
+/**
+ * ZSTD_initCStream() - initialize a zstd streaming compression context
+ * @params:         The zstd compression parameters.
+ * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
+ *                  pass the source size (zero means empty source). Otherwise,
+ *                  the caller may optionally pass the source size, or zero if
+ *                  unknown.
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspaceSize:  The size of workspace.
+ *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
+ *                  how large the workspace must be.
+ *
+ * Return:          The zstd streaming compression context.
+ */
+ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
+	unsigned long long pledgedSrcSize, void *workspace,
+	size_t workspaceSize);
+
+/**
+ * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
+ * @cdict:          The digested dictionary to use for compression.
+ * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
+ *                  with the cParams used to initialize the cdict to determine
+ *                  how large the workspace must be.
+ *
+ * Return:          The zstd streaming compression context.
+ */
+ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
+	unsigned long long pledgedSrcSize, void *workspace,
+	size_t workspaceSize);
+
+/*===== Streaming compression functions =====*/
+/**
+ * ZSTD_resetCStream() - reset the context using parameters from creation
+ * @zcs:            The zstd streaming compression context to reset.
+ * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
+ * content size is always written into the frame header.
+ *
+ * Return:          Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
+/**
+ * ZSTD_compressStream() - streaming compress some of input into output
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ * @input:  Source buffer. `input->pos` is updated to indicate how much data was
+ *          read. Note that it may not consume the entire input, in which case
+ *          `input->pos < input->size`, and it's up to the caller to present
+ *          remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ *
+ * Return:  A hint for the number of bytes to use as the input for the next
+ *          function call or an error, which can be checked using
+ *          ZSTD_isError().
+ */
+size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
+	ZSTD_inBuffer *input);
+/**
+ * ZSTD_flushStream() - flush internal buffers into output
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ *
+ * ZSTD_flushStream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
+ * calling it too often will degrade the compression ratio.
+ *
+ * Return:  The number of bytes still present within internal buffers or an
+ *          error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+/**
+ * ZSTD_endStream() - flush internal buffers into output and end the frame
+ * @zcs:    The zstd streaming compression context.
+ * @output: Destination buffer. `output->pos` is updated to indicate how much
+ *          compressed data was written.
+ *
+ * ZSTD_endStream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
+ *
+ * Return:  The number of bytes still present within internal buffers or an
+ *          error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+
+/**
+ * ZSTD_CStreamInSize() - recommended size for the input buffer
+ *
+ * Return: The recommended size for the input buffer.
+ */
+size_t ZSTD_CStreamInSize(void);
+/**
+ * ZSTD_CStreamOutSize() - recommended size for the output buffer
+ *
+ * When the output buffer is at least this large, it is guaranteed to be large
+ * enough to flush at least one complete compressed block.
+ *
+ * Return: The recommended size for the output buffer.
+ */
+size_t ZSTD_CStreamOutSize(void);
+
+
+
+/*-*****************************************************************************
+ * Streaming decompression - HowTo
+ *
+ * A ZSTD_DStream object is required to track streaming operations.
+ * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
+ * ZSTD_DStream objects can be re-used multiple times.
+ *
+ * Use ZSTD_decompressStream() repetitively to consume your input.
+ * The function will update both `pos` fields.
+ * If `input->pos < input->size`, some input has not been consumed.
+ * It's up to the caller to present again remaining data.
+ * If `output->pos < output->size`, decoder has flushed everything it could.
+ * Returns 0 iff a frame is completely decoded and fully flushed.
+ * Otherwise it returns a suggested next input size that will never load more
+ * than the current frame.
+ ******************************************************************************/
+
+/**
+ * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ *
+ * Return:         A lower bound on the size of the workspace that is passed to
+ *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+ */
+size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+
+/**
+ * struct ZSTD_DStream - the zstd streaming decompression context
+ */
+typedef struct ZSTD_DStream_s ZSTD_DStream;
+/*===== ZSTD_DStream management functions =====*/
+/**
+ * ZSTD_initDStream() - initialize a zstd streaming decompression context
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace.
+ *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
+ *                 how large the workspace must be.
+ *
+ * Return:         The zstd streaming decompression context.
+ */
+ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
+	size_t workspaceSize);
+/**
+ * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
+ * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * @ddict:         The digested dictionary to use for decompression.
+ * @workspace:     The workspace to emplace the context into. It must outlive
+ *                 the returned context.
+ * @workspaceSize: The size of workspace.
+ *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
+ *                 how large the workspace must be.
+ *
+ * Return:         The zstd streaming decompression context.
+ */
+ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
+	const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
+
+/*===== Streaming decompression functions =====*/
+/**
+ * ZSTD_resetDStream() - reset the context using parameters from creation
+ * @zds:   The zstd streaming decompression context to reset.
+ *
+ * Resets the context using the parameters from creation. Skips dictionary
+ * loading, since it can be reused.
+ *
+ * Return: Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_resetDStream(ZSTD_DStream *zds);
+/**
+ * ZSTD_decompressStream() - streaming decompress some of input into output
+ * @zds:    The zstd streaming decompression context.
+ * @output: Destination buffer. `output.pos` is updated to indicate how much
+ *          decompressed data was written.
+ * @input:  Source buffer. `input.pos` is updated to indicate how much data was
+ *          read. Note that it may not consume the entire input, in which case
+ *          `input.pos < input.size`, and it's up to the caller to present
+ *          remaining data again.
+ *
+ * The `input` and `output` buffers may be any size. Guaranteed to make some
+ * forward progress if `input` and `output` are not empty.
+ * ZSTD_decompressStream() will not consume the last byte of the frame until
+ * the entire frame is flushed.
+ *
+ * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
+ *          Otherwise returns a hint for the number of bytes to use as the input
+ *          for the next function call or an error, which can be checked using
+ *          ZSTD_isError(). The size hint will never load more than the frame.
+ */
+size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
+	ZSTD_inBuffer *input);
+
+/**
+ * ZSTD_DStreamInSize() - recommended size for the input buffer
+ *
+ * Return: The recommended size for the input buffer.
+ */
+size_t ZSTD_DStreamInSize(void);
+/**
+ * ZSTD_DStreamOutSize() - recommended size for the output buffer
+ *
+ * When the output buffer is at least this large, it is guaranteed to be large
+ * enough to flush at least one complete decompressed block.
+ *
+ * Return: The recommended size for the output buffer.
+ */
+size_t ZSTD_DStreamOutSize(void);
+
+
+/* --- Constants ---*/
+#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+
+#define ZSTD_WINDOWLOG_MAX_32  27
+#define ZSTD_WINDOWLOG_MAX_64  27
+#define ZSTD_WINDOWLOG_MAX \
+	((unsigned int)(sizeof(size_t) == 4 \
+		? ZSTD_WINDOWLOG_MAX_32 \
+		: ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN 10
+#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
+#define ZSTD_HASHLOG_MIN        6
+#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
+#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_HASHLOG3_MAX      17
+#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN      1
+/* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_SEARCHLENGTH_MAX   7
+/* only for ZSTD_btopt, other strategies are limited to 4 */
+#define ZSTD_SEARCHLENGTH_MIN   3
+#define ZSTD_TARGETLENGTH_MIN   4
+#define ZSTD_TARGETLENGTH_MAX 999
+
+/* for static allocation */
+#define ZSTD_FRAMEHEADERSIZE_MAX 18
+#define ZSTD_FRAMEHEADERSIZE_MIN  6
+#define ZSTD_frameHeaderSize_prefix 5
+#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
+#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
+/* magic number + skippable frame length */
+#define ZSTD_skippableHeaderSize 8
+
+
+/*-*************************************
+ * Compressed size functions
+ **************************************/
+
+/**
+ * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
+ * @src:     Source buffer. It should point to the start of a zstd encoded frame
+ *           or a skippable frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           size of the frame.
+ *
+ * Return:   The compressed size of the frame pointed to by `src` or an error,
+ *           which can be check with ZSTD_isError().
+ *           Suitable to pass to ZSTD_decompress() or similar functions.
+ */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
+
+/*-*************************************
+ * Decompressed size functions
+ **************************************/
+/**
+ * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
+ * @src:     It should point to the start of a zstd encoded frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
+ *
+ * Return:   The frame content size stored in the frame header if known.
+ *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
+ *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
+ */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/**
+ * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
+ * @src:     It should point to the start of a series of zstd encoded and/or
+ *           skippable frames.
+ * @srcSize: The exact size of the series of frames.
+ *
+ * If any zstd encoded frame in the series doesn't have the frame content size
+ * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
+ * set when using ZSTD_compress(). The decompressed size can be very large.
+ * If the source is untrusted, the decompressed size could be wrong or
+ * intentionally modified. Always ensure the result fits within the
+ * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
+ * frames, and so it must traverse the input to read each frame header. This is
+ * efficient as most of the data is skipped, however it does mean that all frame
+ * data must be present and valid.
+ *
+ * Return:   Decompressed size of all the data contained in the frames if known.
+ *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
+ *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
+ */
+unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
+
+/*-*************************************
+ * Advanced compression functions
+ **************************************/
+/**
+ * ZSTD_checkCParams() - ensure parameter values remain within authorized range
+ * @cParams: The zstd compression parameters.
+ *
+ * Return:   Zero or an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
+
+/**
+ * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
+ * @srcSize:  Optionally the estimated source size, or zero if unknown.
+ * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
+ *
+ * Return:    The optimized parameters.
+ */
+ZSTD_compressionParameters ZSTD_adjustCParams(
+	ZSTD_compressionParameters cParams, unsigned long long srcSize,
+	size_t dictSize);
+
+/*--- Advanced decompression functions ---*/
+
+/**
+ * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
+ * @buffer: The source buffer to check.
+ * @size:   The size of the source buffer, must be at least 4 bytes.
+ *
+ * Return: True iff the buffer starts with a zstd or skippable frame identifier.
+ */
+unsigned int ZSTD_isFrame(const void *buffer, size_t size);
+
+/**
+ * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
+ * @dict:     The dictionary buffer.
+ * @dictSize: The size of the dictionary buffer.
+ *
+ * Return:    The dictionary id stored within the dictionary or 0 if the
+ *            dictionary is not a zstd dictionary. If it returns 0 the
+ *            dictionary can still be loaded as a content-only dictionary.
+ */
+unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
+
+/**
+ * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
+ * @ddict: The ddict to find the id of.
+ *
+ * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
+ *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
+ *         content-only dictionary.
+ */
+unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
+
+/**
+ * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
+ * @src:     Source buffer. It must be a zstd encoded frame.
+ * @srcSize: The size of the source buffer. It must be at least as large as the
+ *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
+ *
+ * Return:   The dictionary id required to decompress the frame stored within
+ *           `src` or 0 if the dictionary id could not be decoded. It can return
+ *           0 if the frame does not require a dictionary, the dictionary id
+ *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
+ *           is too small.
+ */
+unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
+
+/**
+ * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or 0 if not present.
+ * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @dictID:           The dictionary id, or 0 if not present.
+ * @checksumFlag:     Whether a checksum was used.
+ */
+typedef struct {
+	unsigned long long frameContentSize;
+	unsigned int windowSize;
+	unsigned int dictID;
+	unsigned int checksumFlag;
+} ZSTD_frameParams;
+
+/**
+ * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
+ * @fparamsPtr: On success the frame parameters are written here.
+ * @src:        The source buffer. It must point to a zstd or skippable frame.
+ * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
+ *              always large enough to succeed.
+ *
+ * Return:      0 on success. If more data is required it returns how many bytes
+ *              must be provided to make forward progress. Otherwise it returns
+ *              an error, which can be checked using ZSTD_isError().
+ */
+size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
+	size_t srcSize);
+
+/*-*****************************************************************************
+ * Buffer-less and synchronous inner streaming functions
+ *
+ * This is an advanced API, giving full control over buffer management, for
+ * users which need direct control over memory.
+ * But it's also a complex one, with many restrictions (documented below).
+ * Prefer using normal streaming API for an easier experience
+ ******************************************************************************/
+
+/*-*****************************************************************************
+ * Buffer-less streaming compression (synchronous mode)
+ *
+ * A ZSTD_CCtx object is required to track streaming operations.
+ * Use ZSTD_initCCtx() to initialize a context.
+ * ZSTD_CCtx object can be re-used multiple times within successive compression
+ * operations.
+ *
+ * Start by initializing a context.
+ * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
+ * compression,
+ * or ZSTD_compressBegin_advanced(), for finer parameter control.
+ * It's also possible to duplicate a reference context which has already been
+ * initialized, using ZSTD_copyCCtx()
+ *
+ * Then, consume your input using ZSTD_compressContinue().
+ * There are some important considerations to keep in mind when using this
+ * advanced function :
+ * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
+ *   buffer only.
+ * - Interface is synchronous : input is consumed entirely and produce 1+
+ *   (or more) compressed blocks.
+ * - Caller must ensure there is enough space in `dst` to store compressed data
+ *   under worst case scenario. Worst case evaluation is provided by
+ *   ZSTD_compressBound().
+ *   ZSTD_compressContinue() doesn't guarantee recover after a failed
+ *   compression.
+ * - ZSTD_compressContinue() presumes prior input ***is still accessible and
+ *   unmodified*** (up to maximum distance size, see WindowLog).
+ *   It remembers all previous contiguous blocks, plus one separated memory
+ *   segment (which can itself consists of multiple contiguous blocks)
+ * - ZSTD_compressContinue() detects that prior input has been overwritten when
+ *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
+ *   section from its history.
+ *
+ * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
+ * and optional checksum. It's possible to use srcSize==0, in which case, it
+ * will write a final empty block to end the frame. Without last block mark,
+ * frames will be considered unfinished (corrupted) by decoders.
+ *
+ * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
+ * frame.
+ ******************************************************************************/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
+	size_t dictSize, int compressionLevel);
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
+	size_t dictSize, ZSTD_parameters params,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
+	unsigned long long pledgedSrcSize);
+size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+
+
+
+/*-*****************************************************************************
+ * Buffer-less streaming decompression (synchronous mode)
+ *
+ * A ZSTD_DCtx object is required to track streaming operations.
+ * Use ZSTD_initDCtx() to initialize a context.
+ * A ZSTD_DCtx object can be re-used multiple times.
+ *
+ * First typical operation is to retrieve frame parameters, using
+ * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
+ * important information to correctly decode the frame, such as the minimum
+ * rolling buffer size to allocate to decompress data (`windowSize`), and the
+ * dictionary ID used.
+ * Note: content size is optional, it may not be present. 0 means unknown.
+ * Note that these values could be wrong, either because of data malformation,
+ * or because an attacker is spoofing deliberate false information. As a
+ * consequence, check that values remain within valid application range,
+ * especially `windowSize`, before allocation. Each application can set its own
+ * limit, depending on local restrictions. For extended interoperability, it is
+ * recommended to support at least 8 MB.
+ * Frame parameters are extracted from the beginning of the compressed frame.
+ * Data fragment must be large enough to ensure successful decoding, typically
+ * `ZSTD_frameHeaderSize_max` bytes.
+ * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
+ *        >0: `srcSize` is too small, provide at least this many bytes.
+ *        errorCode, which can be tested using ZSTD_isError().
+ *
+ * Start decompression, with ZSTD_decompressBegin() or
+ * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
+ * context, using ZSTD_copyDCtx().
+ *
+ * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
+ * alternatively.
+ * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
+ * to ZSTD_decompressContinue().
+ * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
+ * fail.
+ *
+ * The result of ZSTD_decompressContinue() is the number of bytes regenerated
+ * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
+ * error; it just means ZSTD_decompressContinue() has decoded some metadata
+ * item. It can also be an error code, which can be tested with ZSTD_isError().
+ *
+ * ZSTD_decompressContinue() needs previous data blocks during decompression, up
+ * to `windowSize`. They should preferably be located contiguously, prior to
+ * current block. Alternatively, a round buffer of sufficient size is also
+ * possible. Sufficient size is determined by frame parameters.
+ * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
+ * follow each other, make sure that either the compressor breaks contiguity at
+ * the same place, or that previous contiguous segment is large enough to
+ * properly handle maximum back-reference.
+ *
+ * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+ * Context can then be reset to start a new decompression.
+ *
+ * Note: it's possible to know if next input to present is a header or a block,
+ * using ZSTD_nextInputType(). This information is not required to properly
+ * decode a frame.
+ *
+ * == Special case: skippable frames ==
+ *
+ * Skippable frames allow integration of user-defined data into a flow of
+ * concatenated frames. Skippable frames will be ignored (skipped) by a
+ * decompressor. The format of skippable frames is as follows:
+ * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
+ *    0x184D2A50 to 0x184D2A5F
+ * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+ * c) Frame Content - any content (User Data) of length equal to Frame Size
+ * For skippable frames ZSTD_decompressContinue() always returns 0.
+ * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
+ * what means that a frame is skippable.
+ * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
+ *       actually be a zstd encoded frame with no content. For purposes of
+ *       decompression, it is valid in both cases to skip the frame using
+ *       ZSTD_findFrameCompressedSize() to find its size in bytes.
+ * It also returns frame size as fparamsPtr->frameContentSize.
+ ******************************************************************************/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
+	size_t dictSize);
+void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
+size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+typedef enum {
+	ZSTDnit_frameHeader,
+	ZSTDnit_blockHeader,
+	ZSTDnit_block,
+	ZSTDnit_lastBlock,
+	ZSTDnit_checksum,
+	ZSTDnit_skippableFrame
+} ZSTD_nextInputType_e;
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
+
+/*-*****************************************************************************
+ * Block functions
+ *
+ * Block functions produce and decode raw zstd blocks, without frame metadata.
+ * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
+ * very small blocks (< 100 bytes). User will have to take in charge required
+ * information to regenerate data, such as compressed and content sizes.
+ *
+ * A few rules to respect:
+ * - Compressing and decompressing require a context structure
+ *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
+ * - It is necessary to init context before starting
+ *   + compression : ZSTD_compressBegin()
+ *   + decompression : ZSTD_decompressBegin()
+ *   + variants _usingDict() are also allowed
+ *   + copyCCtx() and copyDCtx() work too
+ * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
+ *   + If you need to compress more, cut data into multiple blocks
+ *   + Consider using the regular ZSTD_compress() instead, as frame metadata
+ *     costs become negligible when source size is large.
+ * - When a block is considered not compressible enough, ZSTD_compressBlock()
+ *   result will be zero. In which case, nothing is produced into `dst`.
+ *   + User must test for such outcome and deal directly with uncompressed data
+ *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
+ *   + In case of multiple successive blocks, decoder must be informed of
+ *     uncompressed block existence to follow proper history. Use
+ *     ZSTD_insertBlock() in such a case.
+ ******************************************************************************/
+
+/* Define for static allocation */
+#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
+/*=====   Raw zstd block functions  =====*/
+size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
+size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
+	const void *src, size_t srcSize);
+size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
+	size_t blockSize);
+
+#endif  /* ZSTD_H */
diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
index 6b629ab31c1e..c076d6f05064 100644
--- a/lib/decompress_unzstd.c
+++ b/lib/decompress_unzstd.c
@@ -91,11 +91,15 @@
 
 static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
 {
-	const int err = ZSTD_getErrorCode(ret);
+	const zstd_error_code err = zstd_get_error_code(ret);
 
-	if (!ZSTD_isError(ret))
+	if (!zstd_is_error(ret))
 		return 0;
 
+	/*
+	 * zstd_get_error_name() cannot be used because error takes a char *
+	 * not a const char *
+	 */
 	switch (err) {
 	case ZSTD_error_memory_allocation:
 		error("ZSTD decompressor ran out of memory");
@@ -124,28 +128,28 @@ static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
 				  long out_len, long *in_pos,
 				  void (*error)(char *x))
 {
-	const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+	const size_t wksp_size = zstd_dctx_workspace_bound();
 	void *wksp = large_malloc(wksp_size);
-	ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+	zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size);
 	int err;
 	size_t ret;
 
 	if (dctx == NULL) {
-		error("Out of memory while allocating ZSTD_DCtx");
+		error("Out of memory while allocating zstd_dctx");
 		err = -1;
 		goto out;
 	}
 	/*
 	 * Find out how large the frame actually is, there may be junk at
-	 * the end of the frame that ZSTD_decompressDCtx() can't handle.
+	 * the end of the frame that zstd_decompress_dctx() can't handle.
 	 */
-	ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+	ret = zstd_find_frame_compressed_size(in_buf, in_len);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
 	in_len = (long)ret;
 
-	ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+	ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
@@ -167,14 +171,14 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
 			 long *in_pos,
 			 void (*error)(char *x))
 {
-	ZSTD_inBuffer in;
-	ZSTD_outBuffer out;
-	ZSTD_frameParams params;
+	zstd_in_buffer in;
+	zstd_out_buffer out;
+	zstd_frame_header header;
 	void *in_allocated = NULL;
 	void *out_allocated = NULL;
 	void *wksp = NULL;
 	size_t wksp_size;
-	ZSTD_DStream *dstream;
+	zstd_dstream *dstream;
 	int err;
 	size_t ret;
 
@@ -238,13 +242,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
 	out.size = out_len;
 
 	/*
-	 * We need to know the window size to allocate the ZSTD_DStream.
+	 * We need to know the window size to allocate the zstd_dstream.
 	 * Since we are streaming, we need to allocate a buffer for the sliding
 	 * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
 	 * (8 MB), so it is important to use the actual value so as not to
 	 * waste memory when it is smaller.
 	 */
-	ret = ZSTD_getFrameParams(&params, in.src, in.size);
+	ret = zstd_get_frame_header(&header, in.src, in.size);
 	err = handle_zstd_error(ret, error);
 	if (err)
 		goto out;
@@ -253,19 +257,19 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
 		err = -1;
 		goto out;
 	}
-	if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+	if (header.windowSize > ZSTD_WINDOWSIZE_MAX) {
 		error("ZSTD-compressed data has too large a window size");
 		err = -1;
 		goto out;
 	}
 
 	/*
-	 * Allocate the ZSTD_DStream now that we know how much memory is
+	 * Allocate the zstd_dstream now that we know how much memory is
 	 * required.
 	 */
-	wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+	wksp_size = zstd_dstream_workspace_bound(header.windowSize);
 	wksp = large_malloc(wksp_size);
-	dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+	dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size);
 	if (dstream == NULL) {
 		error("Out of memory while allocating ZSTD_DStream");
 		err = -1;
@@ -298,7 +302,7 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
 			in.size = in_len;
 		}
 		/* Returns zero when the frame is complete. */
-		ret = ZSTD_decompressStream(dstream, &out, &in);
+		ret = zstd_decompress_stream(dstream, &out, &in);
 		err = handle_zstd_error(ret, error);
 		if (err)
 			goto out;
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
index b080264ed3ad..57aaa64306a0 100644
--- a/lib/zstd/compress.c
+++ b/lib/zstd/compress.c
@@ -3443,43 +3443,92 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize,
 	return params;
 }
 
-EXPORT_SYMBOL(ZSTD_maxCLevel);
-EXPORT_SYMBOL(ZSTD_compressBound);
-
-EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCCtx);
-EXPORT_SYMBOL(ZSTD_compressCCtx);
-EXPORT_SYMBOL(ZSTD_compress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCDict);
-EXPORT_SYMBOL(ZSTD_compress_usingCDict);
-
-EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCStream);
-EXPORT_SYMBOL(ZSTD_initCStream_usingCDict);
-EXPORT_SYMBOL(ZSTD_resetCStream);
-EXPORT_SYMBOL(ZSTD_compressStream);
-EXPORT_SYMBOL(ZSTD_flushStream);
-EXPORT_SYMBOL(ZSTD_endStream);
-EXPORT_SYMBOL(ZSTD_CStreamInSize);
-EXPORT_SYMBOL(ZSTD_CStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_getCParams);
-EXPORT_SYMBOL(ZSTD_getParams);
-EXPORT_SYMBOL(ZSTD_checkCParams);
-EXPORT_SYMBOL(ZSTD_adjustCParams);
-
-EXPORT_SYMBOL(ZSTD_compressBegin);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_compressBegin_advanced);
-EXPORT_SYMBOL(ZSTD_copyCCtx);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict);
-EXPORT_SYMBOL(ZSTD_compressContinue);
-EXPORT_SYMBOL(ZSTD_compressEnd);
-
-EXPORT_SYMBOL(ZSTD_getBlockSizeMax);
-EXPORT_SYMBOL(ZSTD_compressBlock);
+size_t zstd_compress_bound(size_t src_size)
+{
+	return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+int zstd_min_clevel(void)
+{
+	/*
+	 * zstd-1.3.1 doesn't implement ZSTD_minCLevel().
+	 * Return 0 (default level).
+	 */
+	return 0;
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+	return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+zstd_parameters zstd_get_params(int level,
+	unsigned long long estimated_src_size)
+{
+	return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_CCtxWorkspaceBound(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+	return ZSTD_initCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+	return ZSTD_compressCCtx(cctx, dst, dst_capacity, src, src_size, *parameters);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_CStreamWorkspaceBound(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+	return ZSTD_initCStream(*parameters, pledged_src_size, workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+	unsigned long long pledged_src_size)
+{
+	return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
index 66cd487a326a..02e92c2cbf4f 100644
--- a/lib/zstd/decompress.c
+++ b/lib/zstd/decompress.c
@@ -2490,42 +2490,82 @@ size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inB
 	}
 }
 
-EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDCtx);
-EXPORT_SYMBOL(ZSTD_decompressDCtx);
-EXPORT_SYMBOL(ZSTD_decompress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDDict);
-EXPORT_SYMBOL(ZSTD_decompress_usingDDict);
-
-EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDStream);
-EXPORT_SYMBOL(ZSTD_initDStream_usingDDict);
-EXPORT_SYMBOL(ZSTD_resetDStream);
-EXPORT_SYMBOL(ZSTD_decompressStream);
-EXPORT_SYMBOL(ZSTD_DStreamInSize);
-EXPORT_SYMBOL(ZSTD_DStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_findFrameCompressedSize);
-EXPORT_SYMBOL(ZSTD_getFrameContentSize);
-EXPORT_SYMBOL(ZSTD_findDecompressedSize);
-
-EXPORT_SYMBOL(ZSTD_isFrame);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromFrame);
-
-EXPORT_SYMBOL(ZSTD_getFrameParams);
-EXPORT_SYMBOL(ZSTD_decompressBegin);
-EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_copyDCtx);
-EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress);
-EXPORT_SYMBOL(ZSTD_decompressContinue);
-EXPORT_SYMBOL(ZSTD_nextInputType);
-
-EXPORT_SYMBOL(ZSTD_decompressBlock);
-EXPORT_SYMBOL(ZSTD_insertBlock);
+unsigned int zstd_is_error(size_t code)
+{
+	return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+	return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+	/* Real implementation in zstd-1.4.6. */
+	return "GENERIC";
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+size_t zstd_dctx_workspace_bound(void)
+{
+	return ZSTD_DCtxWorkspaceBound();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+	return ZSTD_initDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size)
+{
+	return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+	return ZSTD_DStreamWorkspaceBound(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+	size_t workspace_size)
+{
+	return ZSTD_initDStream(max_window_size, workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+	return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+	return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+	size_t src_size)
+{
+	return ZSTD_getFrameParams(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("Zstd Decompressor");
-- 
cgit v1.2.3


From e0c1b49f5b674cca7b10549c53b3791d0bbc90a8 Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 11 Sep 2020 16:37:08 -0700
Subject: lib: zstd: Upgrade to latest upstream zstd version 1.4.10

Upgrade to the latest upstream zstd version 1.4.10.

This patch is 100% generated from upstream zstd commit 20821a46f412 [0].

This patch is very large because it is transitioning from the custom
kernel zstd to using upstream directly. The new zstd follows upstreams
file structure which is different. Future update patches will be much
smaller because they will only contain the changes from one upstream
zstd release.

As an aid for review I've created a commit [1] that shows the diff
between upstream zstd as-is (which doesn't compile), and the zstd
code imported in this patch. The verion of zstd in this patch is
generated from upstream with changes applied by automation to replace
upstreams libc dependencies, remove unnecessary portability macros,
replace `/**` comments with `/*` comments, and use the kernel's xxhash
instead of bundling it.

The benefits of this patch are as follows:
1. Using upstream directly with automated script to generate kernel
   code. This allows us to update the kernel every upstream release, so
   the kernel gets the latest bug fixes and performance improvements,
   and doesn't get 3 years out of date again. The automation and the
   translated code are tested every upstream commit to ensure it
   continues to work.
2. Upgrades from a custom zstd based on 1.3.1 to 1.4.10, getting 3 years
   of performance improvements and bug fixes. On x86_64 I've measured
   15% faster BtrFS and SquashFS decompression+read speeds, 35% faster
   kernel decompression, and 30% faster ZRAM decompression+read speeds.
3. Zstd-1.4.10 supports negative compression levels, which allow zstd to
   match or subsume lzo's performance.
4. Maintains the same kernel-specific wrapper API, so no callers have to
   be modified with zstd version updates.

One concern that was brought up was stack usage. Upstream zstd had
already removed most of its heavy stack usage functions, but I just
removed the last functions that allocate arrays on the stack. I've
measured the high water mark for both compression and decompression
before and after this patch. Decompression is approximately neutral,
using about 1.2KB of stack space. Compression levels up to 3 regressed
from 1.4KB -> 1.6KB, and higher compression levels regressed from 1.5KB
-> 2KB. We've added unit tests upstream to prevent further regression.
I believe that this is a reasonable increase, and if it does end up
causing problems, this commit can be cleanly reverted, because it only
touches zstd.

I chose the bulk update instead of replaying upstream commits because
there have been ~3500 upstream commits since the 1.3.1 release, zstd
wasn't ready to be used in the kernel as-is before a month ago, and not
all upstream zstd commits build. The bulk update preserves bisectablity
because bugs can be bisected to the zstd version update. At that point
the update can be reverted, and we can work with upstream to find and
fix the bug.

Note that upstream zstd release 1.4.10 doesn't exist yet. I have cut a
staging branch at 20821a46f412 [0] and will apply any changes requested
to the staging branch. Once we're ready to merge this update I will cut
a zstd release at the commit we merge, so we have a known zstd release
in the kernel.

The implementation of the kernel API is contained in
zstd_compress_module.c and zstd_decompress_module.c.

[0] https://github.com/facebook/zstd/commit/20821a46f4122f9abd7c7b245d28162dde8129c9
[1] https://github.com/terrelln/linux/commit/e0fa481d0e3df26918da0a13749740a1f6777574

Signed-off-by: Nick Terrell <terrelln@fb.com>
Tested By: Paul Jones <paul@pauljones.id.au>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # LLVM/Clang v13.0.0 on x86-64
Tested-by: Jean-Denis Girard <jd.girard@sysnux.pf>
---
 include/linux/zstd.h                           |   13 +-
 include/linux/zstd_errors.h                    |   77 +
 include/linux/zstd_lib.h                       | 3367 +++++++++++-----
 lib/zstd/Makefile                              |   46 +-
 lib/zstd/bitstream.h                           |  380 --
 lib/zstd/common/bitstream.h                    |  437 ++
 lib/zstd/common/compiler.h                     |  170 +
 lib/zstd/common/cpu.h                          |  194 +
 lib/zstd/common/debug.c                        |   24 +
 lib/zstd/common/debug.h                        |  101 +
 lib/zstd/common/entropy_common.c               |  357 ++
 lib/zstd/common/error_private.c                |   56 +
 lib/zstd/common/error_private.h                |   66 +
 lib/zstd/common/fse.h                          |  710 ++++
 lib/zstd/common/fse_decompress.c               |  390 ++
 lib/zstd/common/huf.h                          |  356 ++
 lib/zstd/common/mem.h                          |  259 ++
 lib/zstd/common/zstd_common.c                  |   83 +
 lib/zstd/common/zstd_deps.h                    |  125 +
 lib/zstd/common/zstd_internal.h                |  450 +++
 lib/zstd/compress.c                            | 3534 ----------------
 lib/zstd/compress/fse_compress.c               |  625 +++
 lib/zstd/compress/hist.c                       |  165 +
 lib/zstd/compress/hist.h                       |   75 +
 lib/zstd/compress/huf_compress.c               |  905 +++++
 lib/zstd/compress/zstd_compress.c              | 5109 ++++++++++++++++++++++++
 lib/zstd/compress/zstd_compress_internal.h     | 1188 ++++++
 lib/zstd/compress/zstd_compress_literals.c     |  158 +
 lib/zstd/compress/zstd_compress_literals.h     |   29 +
 lib/zstd/compress/zstd_compress_sequences.c    |  439 ++
 lib/zstd/compress/zstd_compress_sequences.h    |   54 +
 lib/zstd/compress/zstd_compress_superblock.c   |  850 ++++
 lib/zstd/compress/zstd_compress_superblock.h   |   32 +
 lib/zstd/compress/zstd_cwksp.h                 |  482 +++
 lib/zstd/compress/zstd_double_fast.c           |  519 +++
 lib/zstd/compress/zstd_double_fast.h           |   32 +
 lib/zstd/compress/zstd_fast.c                  |  496 +++
 lib/zstd/compress/zstd_fast.h                  |   31 +
 lib/zstd/compress/zstd_lazy.c                  | 1414 +++++++
 lib/zstd/compress/zstd_lazy.h                  |   81 +
 lib/zstd/compress/zstd_ldm.c                   |  686 ++++
 lib/zstd/compress/zstd_ldm.h                   |  110 +
 lib/zstd/compress/zstd_ldm_geartab.h           |  103 +
 lib/zstd/compress/zstd_opt.c                   | 1346 +++++++
 lib/zstd/compress/zstd_opt.h                   |   50 +
 lib/zstd/decompress.c                          | 2571 ------------
 lib/zstd/decompress/huf_decompress.c           | 1206 ++++++
 lib/zstd/decompress/zstd_ddict.c               |  241 ++
 lib/zstd/decompress/zstd_ddict.h               |   44 +
 lib/zstd/decompress/zstd_decompress.c          | 2085 ++++++++++
 lib/zstd/decompress/zstd_decompress_block.c    | 1540 +++++++
 lib/zstd/decompress/zstd_decompress_block.h    |   62 +
 lib/zstd/decompress/zstd_decompress_internal.h |  202 +
 lib/zstd/decompress_sources.h                  |   15 +-
 lib/zstd/entropy_common.c                      |  243 --
 lib/zstd/error_private.h                       |   53 -
 lib/zstd/fse.h                                 |  575 ---
 lib/zstd/fse_compress.c                        |  795 ----
 lib/zstd/fse_decompress.c                      |  325 --
 lib/zstd/huf.h                                 |  212 -
 lib/zstd/huf_compress.c                        |  773 ----
 lib/zstd/huf_decompress.c                      |  960 -----
 lib/zstd/mem.h                                 |  151 -
 lib/zstd/zstd_common.c                         |   75 -
 lib/zstd/zstd_compress_module.c                |  160 +
 lib/zstd/zstd_decompress_module.c              |  105 +
 lib/zstd/zstd_internal.h                       |  273 --
 lib/zstd/zstd_opt.h                            | 1014 -----
 68 files changed, 26861 insertions(+), 12993 deletions(-)
 create mode 100644 include/linux/zstd_errors.h
 delete mode 100644 lib/zstd/bitstream.h
 create mode 100644 lib/zstd/common/bitstream.h
 create mode 100644 lib/zstd/common/compiler.h
 create mode 100644 lib/zstd/common/cpu.h
 create mode 100644 lib/zstd/common/debug.c
 create mode 100644 lib/zstd/common/debug.h
 create mode 100644 lib/zstd/common/entropy_common.c
 create mode 100644 lib/zstd/common/error_private.c
 create mode 100644 lib/zstd/common/error_private.h
 create mode 100644 lib/zstd/common/fse.h
 create mode 100644 lib/zstd/common/fse_decompress.c
 create mode 100644 lib/zstd/common/huf.h
 create mode 100644 lib/zstd/common/mem.h
 create mode 100644 lib/zstd/common/zstd_common.c
 create mode 100644 lib/zstd/common/zstd_deps.h
 create mode 100644 lib/zstd/common/zstd_internal.h
 delete mode 100644 lib/zstd/compress.c
 create mode 100644 lib/zstd/compress/fse_compress.c
 create mode 100644 lib/zstd/compress/hist.c
 create mode 100644 lib/zstd/compress/hist.h
 create mode 100644 lib/zstd/compress/huf_compress.c
 create mode 100644 lib/zstd/compress/zstd_compress.c
 create mode 100644 lib/zstd/compress/zstd_compress_internal.h
 create mode 100644 lib/zstd/compress/zstd_compress_literals.c
 create mode 100644 lib/zstd/compress/zstd_compress_literals.h
 create mode 100644 lib/zstd/compress/zstd_compress_sequences.c
 create mode 100644 lib/zstd/compress/zstd_compress_sequences.h
 create mode 100644 lib/zstd/compress/zstd_compress_superblock.c
 create mode 100644 lib/zstd/compress/zstd_compress_superblock.h
 create mode 100644 lib/zstd/compress/zstd_cwksp.h
 create mode 100644 lib/zstd/compress/zstd_double_fast.c
 create mode 100644 lib/zstd/compress/zstd_double_fast.h
 create mode 100644 lib/zstd/compress/zstd_fast.c
 create mode 100644 lib/zstd/compress/zstd_fast.h
 create mode 100644 lib/zstd/compress/zstd_lazy.c
 create mode 100644 lib/zstd/compress/zstd_lazy.h
 create mode 100644 lib/zstd/compress/zstd_ldm.c
 create mode 100644 lib/zstd/compress/zstd_ldm.h
 create mode 100644 lib/zstd/compress/zstd_ldm_geartab.h
 create mode 100644 lib/zstd/compress/zstd_opt.c
 create mode 100644 lib/zstd/compress/zstd_opt.h
 delete mode 100644 lib/zstd/decompress.c
 create mode 100644 lib/zstd/decompress/huf_decompress.c
 create mode 100644 lib/zstd/decompress/zstd_ddict.c
 create mode 100644 lib/zstd/decompress/zstd_ddict.h
 create mode 100644 lib/zstd/decompress/zstd_decompress.c
 create mode 100644 lib/zstd/decompress/zstd_decompress_block.c
 create mode 100644 lib/zstd/decompress/zstd_decompress_block.h
 create mode 100644 lib/zstd/decompress/zstd_decompress_internal.h
 delete mode 100644 lib/zstd/entropy_common.c
 delete mode 100644 lib/zstd/error_private.h
 delete mode 100644 lib/zstd/fse.h
 delete mode 100644 lib/zstd/fse_compress.c
 delete mode 100644 lib/zstd/fse_decompress.c
 delete mode 100644 lib/zstd/huf.h
 delete mode 100644 lib/zstd/huf_compress.c
 delete mode 100644 lib/zstd/huf_decompress.c
 delete mode 100644 lib/zstd/mem.h
 delete mode 100644 lib/zstd/zstd_common.c
 create mode 100644 lib/zstd/zstd_compress_module.c
 create mode 100644 lib/zstd/zstd_decompress_module.c
 delete mode 100644 lib/zstd/zstd_internal.h
 delete mode 100644 lib/zstd/zstd_opt.h

(limited to 'include/linux')

diff --git a/include/linux/zstd.h b/include/linux/zstd.h
index 9fbc7729b0a0..113408eef6ec 100644
--- a/include/linux/zstd.h
+++ b/include/linux/zstd.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
 /*
  * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
@@ -22,6 +22,7 @@
 
 /* ======   Dependency   ====== */
 #include <linux/types.h>
+#include <linux/zstd_errors.h>
 #include <linux/zstd_lib.h>
 
 /* ======   Helper Functions   ====== */
@@ -417,12 +418,18 @@ size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
 
 /**
  * struct zstd_frame_params - zstd frame parameters stored in the frame header
- * @frameContentSize: The frame content size, or 0 if not present.
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
  * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
  * @dictID:           The dictionary id, or 0 if not present.
  * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
  */
-typedef ZSTD_frameParams zstd_frame_header;
+typedef ZSTD_frameHeader zstd_frame_header;
 
 /**
  * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
new file mode 100644
index 000000000000..58b6dd45a969
--- /dev/null
+++ b/include/linux/zstd_errors.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#define ZSTDERRORLIB_VISIBILITY 
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
index 13151c34f725..b8c7dbf98390 100644
--- a/include/linux/zstd_lib.h
+++ b/include/linux/zstd_lib.h
@@ -2,1156 +2,2431 @@
  * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
  */
 
-#ifndef ZSTD_H
-#define ZSTD_H
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
 
 /* ======   Dependency   ======*/
+#include <linux/limits.h>   /* INT_MAX */
 #include <linux/types.h>   /* size_t */
 
 
-/*-*****************************************************************************
- * Introduction
- *
- * zstd, short for Zstandard, is a fast lossless compression algorithm,
- * targeting real-time compression scenarios at zlib-level and better
- * compression ratios. The zstd compression library provides in-memory
- * compression and decompression functions. The library supports compression
- * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
- * ultra, should be used with caution, as they require more memory.
- * Compression can be done in:
- *  - a single step, reusing a context (described as Explicit memory management)
- *  - unbounded multiple steps (described as Streaming compression)
- * The compression ratio achievable on small data can be highly improved using
- * compression with a dictionary in:
- *  - a single step (described as Simple dictionary API)
- *  - a single step, reusing a dictionary (described as Fast dictionary API)
- ******************************************************************************/
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#define ZSTDLIB_VISIBILITY 
+#define ZSTDLIB_API ZSTDLIB_VISIBILITY
+
+
+/* *****************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    4
+#define ZSTD_VERSION_RELEASE  10
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
 
 /*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+
+
+/* *************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
 
-/**
- * enum ZSTD_ErrorCode - zstd error codes
+
+/* *************************************
+*  Advanced compression API
+***************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
  *
- * Functions that return size_t can be checked for errors using ZSTD_isError()
- * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
  */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
 typedef enum {
-	ZSTD_error_no_error,
-	ZSTD_error_GENERIC,
-	ZSTD_error_prefix_unknown,
-	ZSTD_error_version_unsupported,
-	ZSTD_error_parameter_unknown,
-	ZSTD_error_frameParameter_unsupported,
-	ZSTD_error_frameParameter_unsupportedBy32bits,
-	ZSTD_error_frameParameter_windowTooLarge,
-	ZSTD_error_compressionParameter_unsupported,
-	ZSTD_error_init_missing,
-	ZSTD_error_memory_allocation,
-	ZSTD_error_stage_wrong,
-	ZSTD_error_dstSize_tooSmall,
-	ZSTD_error_srcSize_wrong,
-	ZSTD_error_corruption_detected,
-	ZSTD_error_checksum_wrong,
-	ZSTD_error_tableLog_tooLarge,
-	ZSTD_error_maxSymbolValue_tooLarge,
-	ZSTD_error_maxSymbolValue_tooSmall,
-	ZSTD_error_dictionary_corrupted,
-	ZSTD_error_dictionary_wrong,
-	ZSTD_error_dictionaryCreation_failed,
-	ZSTD_error_maxCode
-} ZSTD_ErrorCode;
-
-/**
- * ZSTD_maxCLevel() - maximum compression level available
- *
- * Return: Maximum compression level available.
- */
-int ZSTD_maxCLevel(void);
-/**
- * ZSTD_compressBound() - maximum compressed size in worst case scenario
- * @srcSize: The size of the data to compress.
- *
- * Return:   The maximum compressed size in the worst case scenario.
- */
-size_t ZSTD_compressBound(size_t srcSize);
-/**
- * ZSTD_isError() - tells if a size_t function result is an error code
- * @code:  The function result to check for error.
- *
- * Return: Non-zero iff the code is an error.
- */
-static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
-{
-	return code > (size_t)-ZSTD_error_maxCode;
-}
-/**
- * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
- * @functionResult: The result of a function for which ZSTD_isError() is true.
- *
- * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
- *                  if the functionResult isn't an error.
- */
-static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
-	size_t functionResult)
-{
-	if (!ZSTD_isError(functionResult))
-		return (ZSTD_ErrorCode)0;
-	return (ZSTD_ErrorCode)(0 - functionResult);
-}
-
-/**
- * enum ZSTD_strategy - zstd compression search strategy
- *
- * From faster to stronger.
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
  */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/* *************************************
+*  Advanced decompression API
+***************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
 typedef enum {
-	ZSTD_fast,
-	ZSTD_dfast,
-	ZSTD_greedy,
-	ZSTD_lazy,
-	ZSTD_lazy2,
-	ZSTD_btlazy2,
-	ZSTD_btopt,
-	ZSTD_btopt2
-} ZSTD_strategy;
 
-/**
- * struct ZSTD_compressionParameters - zstd compression parameters
- * @windowLog:    Log of the largest match distance. Larger means more
- *                compression, and more memory needed during decompression.
- * @chainLog:     Fully searched segment. Larger means more compression, slower,
- *                and more memory (useless for fast).
- * @hashLog:      Dispatch table. Larger means more compression,
- *                slower, and more memory.
- * @searchLog:    Number of searches. Larger means more compression and slower.
- * @searchLength: Match length searched. Larger means faster decompression,
- *                sometimes less compression.
- * @targetLength: Acceptable match size for optimal parser (only). Larger means
- *                more compression, and slower.
- * @strategy:     The zstd compression strategy.
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
  */
-typedef struct {
-	unsigned int windowLog;
-	unsigned int chainLog;
-	unsigned int hashLog;
-	unsigned int searchLog;
-	unsigned int searchLength;
-	unsigned int targetLength;
-	ZSTD_strategy strategy;
-} ZSTD_compressionParameters;
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/* **************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /*< start of input buffer */
+  size_t size;        /*< size of input buffer */
+  size_t pos;         /*< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /*< start of output buffer */
+  size_t size;        /*< size of output buffer */
+  size_t pos;         /*< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
 
-/**
- * struct ZSTD_frameParameters - zstd frame parameters
- * @contentSizeFlag: Controls whether content size will be present in the frame
- *                   header (when known).
- * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
- *                   of the frame for error detection.
- * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
- *                   when using dictionary compression.
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /*< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() :
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /*< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Advanced parameters and dictionary compression can only be used through the
+ * new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
  *
- * The default value is all fields set to 0.
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
  */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /*< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/* ************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* *********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/* ******************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/* *****************************************************************************
+ * Advanced dictionary and prefix API
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() :
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() :
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* **************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
 typedef struct {
-	unsigned int contentSizeFlag;
-	unsigned int checksumFlag;
-	unsigned int noDictIDFlag;
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /*< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /*< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /*< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /*< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /*< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /*< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /*< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /*< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
 } ZSTD_frameParameters;
 
-/**
- * struct ZSTD_parameters - zstd parameters
- * @cParams: The compression parameters.
- * @fParams: The frame parameters.
- */
 typedef struct {
-	ZSTD_compressionParameters cParams;
-	ZSTD_frameParameters fParams;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
 } ZSTD_parameters;
 
-/**
- * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
- *
- * Return:            The selected ZSTD_compressionParameters.
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /*< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /*< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /*< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /*< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /*< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+
+/* *************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
  */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
 
-/**
- * ZSTD_getParams() - returns ZSTD_parameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
  *
- * The same as ZSTD_getCParams() except also selects the default frame
- * parameters (all zero).
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
  *
- * Return:            The selected ZSTD_parameters.
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
  */
-ZSTD_parameters ZSTD_getParams(int compressionLevel,
-	unsigned long long estimatedSrcSize, size_t dictSize);
 
-/*-*************************************
- * Explicit memory management
- **************************************/
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
 
-/**
- * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
- * @cParams: The compression parameters to be used for compression.
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
  *
- * If multiple compression parameters might be used, the caller must call
- * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
- * size.
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCCtx().
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
  */
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
 
-/**
- * struct ZSTD_CCtx - the zstd compression context
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
  *
- * When compressing many times it is recommended to allocate a context just once
- * and reuse it for each successive compression operation.
- */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-/**
- * ZSTD_initCCtx() - initialize a zstd compression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
  *
- * Return:         A compression context emplaced into workspace.
- */
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_compressCCtx() - compress src into dst
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
  *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * @return : number of bytes written or a ZSTD error.
  */
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, ZSTD_parameters params);
-
-/**
- * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
- *
- * Return: A lower bound on the size of the workspace that is passed to
- *         ZSTD_initDCtx().
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
+/* *************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
  */
-size_t ZSTD_DCtxWorkspaceBound(void);
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+__attribute__((__unused__))
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /*< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 
-/**
- * struct ZSTD_DCtx - the zstd decompression context
- *
- * When decompressing many times it is recommended to allocate a context just
- * once and reuse it for each successive decompression operation.
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
  */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-/**
- * ZSTD_initDCtx() - initialize a zstd decompression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A decompression context emplaced into workspace.
- */
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-
-/*-************************
- * Simple dictionary API
- **************************/
-
-/**
- * ZSTD_compress_usingDict() - compress src into dst using a dictionary
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @dict:        The dictionary to use for compression.
- * @dictSize:    The size of the dictionary.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a predefined dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize,
-	ZSTD_parameters params);
-
-/**
- * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @dict:        The dictionary to use for decompression. The same dictionary
- *               must've been used to compress the data.
- * @dictSize:    The size of the dictionary.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const void *dict, size_t dictSize);
-
-/*-**************************
- * Fast dictionary API
- ***************************/
-
-/**
- * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
- * @cParams: The compression parameters to be used for compression.
- *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCDict().
- */
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CDict - a digested dictionary to be used for compression
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
+
+/* *************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
+ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now REDUNDANT.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning in some future version */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_t enum definition for details.
  */
-typedef struct ZSTD_CDict_s ZSTD_CDict;
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
 
-/**
- * ZSTD_initCDict() - initialize a digested dictionary for compression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
- * @dictSize:      The size of the dictionary.
- * @params:        The parameters to use for compression. See ZSTD_getParams().
- * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_CDictWorkspaceBound(params.cParams).
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
  *
- * When compressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_CDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_CDict.
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
  *
- * Return:         The digested dictionary emplaced into workspace.
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
  */
-ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
-	ZSTD_parameters params, void *workspace, size_t workspaceSize);
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
 
-/**
- * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
- *               cParams are the compression parameters used to initialize the
- *               cdict.
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @cdict:       The digested dictionary to use for compression.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
  *
- * Compression using a digested dictionary. The same dictionary must be used
- * during decompression.
+ * For use with sequence compression API: ZSTD_compressSequences().
  *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
  */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize, const ZSTD_CDict *cdict);
-
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
 
-/**
- * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
  *
- * Return:  A lower bound on the size of the workspace that is passed to
- *          ZSTD_initDDict().
  */
-size_t ZSTD_DDictWorkspaceBound(void);
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
 
-/**
- * struct ZSTD_DDict - a digested dictionary to be used for decompression
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
-
-/**
- * ZSTD_initDDict() - initialize a digested dictionary for decompression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
- * @dictSize:      The size of the dictionary.
- * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_DDictWorkspaceBound().
- *
- * When decompressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_DDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_DDict.
- *
- * Return:         The digested dictionary emplaced into workspace.
- */
-ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
-	void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @ddict:       The digested dictionary to use for decompression. The same
- *               dictionary must've been used to compress the data.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
-	size_t dstCapacity, const void *src, size_t srcSize,
-	const ZSTD_DDict *ddict);
-
-
-/*-**************************
- * Streaming
- ***************************/
-
-/**
- * struct ZSTD_inBuffer - input buffer for streaming
- * @src:  Start of the input buffer.
- * @size: Size of the input buffer.
- * @pos:  Position where reading stopped. Will be updated.
- *        Necessarily 0 <= pos <= size.
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
  */
-typedef struct ZSTD_inBuffer_s {
-	const void *src;
-	size_t size;
-	size_t pos;
-} ZSTD_inBuffer;
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
 
-/**
- * struct ZSTD_outBuffer - output buffer for streaming
- * @dst:  Start of the output buffer.
- * @size: Size of the output buffer.
- * @pos:  Position where writing stopped. Will be updated.
- *        Necessarily 0 <= pos <= size.
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
  */
-typedef struct ZSTD_outBuffer_s {
-	void *dst;
-	size_t size;
-	size_t pos;
-} ZSTD_outBuffer;
-
-
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 
-/*-*****************************************************************************
- * Streaming compression - HowTo
- *
- * A ZSTD_CStream object is required to track streaming operation.
- * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
- * ZSTD_CStream objects can be reused multiple times on consecutive compression
- * operations. It is recommended to re-use ZSTD_CStream in situations where many
- * streaming operations will be achieved consecutively. Use one separate
- * ZSTD_CStream per thread for parallel execution.
- *
- * Use ZSTD_compressStream() repetitively to consume input stream.
- * The function will automatically update both `pos` fields.
- * Note that it may not consume the entire input, in which case `pos < size`,
- * and it's up to the caller to present again remaining data.
- * It returns a hint for the preferred number of bytes to use as an input for
- * the next function call.
- *
- * At any moment, it's possible to flush whatever data remains within internal
- * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
- * still be some content left within the internal buffer if `output->size` is
- * too small. It returns the number of bytes left in the internal buffer and
- * must be called until it returns 0.
- *
- * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
- * write frame epilogue. The epilogue is required for decoders to consider a
- * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
- * the full content if `output->size` is too small. In which case, call again
- * ZSTD_endStream() to complete the flush. It returns the number of bytes left
- * in the internal buffer and must be called until it returns 0.
- ******************************************************************************/
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 
-/**
- * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
- * @cParams: The compression parameters to be used for compression.
- *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
  */
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() :
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 
-/**
- * struct ZSTD_CStream - the zstd streaming compression context
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
  */
-typedef struct ZSTD_CStream_s ZSTD_CStream;
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/* *************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
 
-/*===== ZSTD_CStream management functions =====*/
-/**
- * ZSTD_initCStream() - initialize a zstd streaming compression context
- * @params:         The zstd compression parameters.
- * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
- *                  pass the source size (zero means empty source). Otherwise,
- *                  the caller may optionally pass the source size, or zero if
- *                  unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace.
- *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
- *                  how large the workspace must be.
- *
- * Return:          The zstd streaming compression context.
- */
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
-
-/**
- * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
- * @cdict:          The digested dictionary to use for compression.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
- *                  with the cParams used to initialize the cdict to determine
- *                  how large the workspace must be.
- *
- * Return:          The zstd streaming compression context.
- */
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize, void *workspace,
-	size_t workspaceSize);
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
 
-/*===== Streaming compression functions =====*/
-/**
- * ZSTD_resetCStream() - reset the context using parameters from creation
- * @zcs:            The zstd streaming compression context to reset.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
- *
- * Resets the context using the parameters from creation. Skips dictionary
- * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
- * content size is always written into the frame header.
- *
- * Return:          Zero or an error, which can be checked using ZSTD_isError().
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
  */
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
-/**
- * ZSTD_compressStream() - streaming compress some of input into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- * @input:  Source buffer. `input->pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input->pos < input->size`, and it's up to the caller to present
- *          remaining data again.
- *
- * The `input` and `output` buffers may be any size. Guaranteed to make some
- * forward progress if `input` and `output` are not empty.
- *
- * Return:  A hint for the number of bytes to use as the input for the next
- *          function call or an error, which can be checked using
- *          ZSTD_isError().
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
  */
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
-/**
- * ZSTD_flushStream() - flush internal buffers into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- *
- * ZSTD_flushStream() must be called until it returns 0, meaning all the data
- * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
- * calling it too often will degrade the compression ratio.
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
  */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
-/**
- * ZSTD_endStream() - flush internal buffers into output and end the frame
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
  *
- * ZSTD_endStream() must be called until it returns 0, meaning all the data has
- * been flushed and the frame epilogue has been written.
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
-
-/**
- * ZSTD_CStreamInSize() - recommended size for the input buffer
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
  *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_CStreamInSize(void);
-/**
- * ZSTD_CStreamOutSize() - recommended size for the output buffer
+ * Param has values of byte ZSTD_refMultipleDDicts_e
  *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete compressed block.
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
  *
- * Return: The recommended size for the output buffer.
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
  */
-size_t ZSTD_CStreamOutSize(void);
-
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
 
 
-/*-*****************************************************************************
- * Streaming decompression - HowTo
- *
- * A ZSTD_DStream object is required to track streaming operations.
- * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
- * ZSTD_DStream objects can be re-used multiple times.
- *
- * Use ZSTD_decompressStream() repetitively to consume your input.
- * The function will update both `pos` fields.
- * If `input->pos < input->size`, some input has not been consumed.
- * It's up to the caller to present again remaining data.
- * If `output->pos < output->size`, decoder has flushed everything it could.
- * Returns 0 iff a frame is completely decoded and fully flushed.
- * Otherwise it returns a suggested next input size that will never load more
- * than the current frame.
- ******************************************************************************/
+/*! ZSTD_DCtx_setFormat() :
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 
-/**
- * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- *
- * Return:         A lower bound on the size of the workspace that is passed to
- *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
  */
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/* ******************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
-/**
- * struct ZSTD_DStream - the zstd streaming decompression context
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
  */
-typedef struct ZSTD_DStream_s ZSTD_DStream;
-/*===== ZSTD_DStream management functions =====*/
-/**
- * ZSTD_initDStream() - initialize a zstd streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
-	size_t workspaceSize);
-/**
- * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @ddict:         The digested dictionary to use for decompression.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
-	const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 
-/*===== Streaming decompression functions =====*/
-/**
- * ZSTD_resetDStream() - reset the context using parameters from creation
- * @zds:   The zstd streaming decompression context to reset.
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
  *
- * Resets the context using the parameters from creation. Skips dictionary
- * loading, since it can be reused.
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
  *
- * Return: Zero or an error, which can be checked using ZSTD_isError().
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-size_t ZSTD_resetDStream(ZSTD_DStream *zds);
-/**
- * ZSTD_decompressStream() - streaming decompress some of input into output
- * @zds:    The zstd streaming decompression context.
- * @output: Destination buffer. `output.pos` is updated to indicate how much
- *          decompressed data was written.
- * @input:  Source buffer. `input.pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input.pos < input.size`, and it's up to the caller to present
- *          remaining data again.
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
  *
- * The `input` and `output` buffers may be any size. Guaranteed to make some
- * forward progress if `input` and `output` are not empty.
- * ZSTD_decompressStream() will not consume the last byte of the frame until
- * the entire frame is flushed.
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
  *
- * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
- *          Otherwise returns a hint for the number of bytes to use as the input
- *          for the next function call or an error, which can be checked using
- *          ZSTD_isError(). The size hint will never load more than the frame.
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
-	ZSTD_inBuffer *input);
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
 
-/**
- * ZSTD_DStreamInSize() - recommended size for the input buffer
- *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_DStreamInSize(void);
-/**
- * ZSTD_DStreamOutSize() - recommended size for the output buffer
+/*!
+ * This function is deprecated, and is equivalent to:
  *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete decompressed block.
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
  *
- * Return: The recommended size for the output buffer.
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-size_t ZSTD_DStreamOutSize(void);
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 
 
-/* --- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
+/* *******************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
 
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+/*
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
 
-#define ZSTD_WINDOWLOG_MAX_32  27
-#define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX \
-	((unsigned int)(sizeof(size_t) == 4 \
-		? ZSTD_WINDOWLOG_MAX_32 \
-		: ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
-#define ZSTD_HASHLOG_MIN        6
-#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
-#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_HASHLOG3_MAX      17
-#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN      1
-/* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_SEARCHLENGTH_MAX   7
-/* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_SEARCHLENGTH_MIN   3
-#define ZSTD_TARGETLENGTH_MIN   4
-#define ZSTD_TARGETLENGTH_MAX 999
-
-/* for static allocation */
-#define ZSTD_FRAMEHEADERSIZE_MAX 18
-#define ZSTD_FRAMEHEADERSIZE_MIN  6
-#define ZSTD_frameHeaderSize_prefix 5
-#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
-#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
-/* magic number + skippable frame length */
-#define ZSTD_skippableHeaderSize 8
-
-
-/*-*************************************
- * Compressed size functions
- **************************************/
-
-/**
- * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
- * @src:     Source buffer. It should point to the start of a zstd encoded frame
- *           or a skippable frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           size of the frame.
- *
- * Return:   The compressed size of the frame pointed to by `src` or an error,
- *           which can be check with ZSTD_isError().
- *           Suitable to pass to ZSTD_decompress() or similar functions.
- */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Decompressed size functions
- **************************************/
-/**
- * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
- * @src:     It should point to the start of a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The frame content size stored in the frame header if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
- *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
- */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
-
-/**
- * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
- * @src:     It should point to the start of a series of zstd encoded and/or
- *           skippable frames.
- * @srcSize: The exact size of the series of frames.
- *
- * If any zstd encoded frame in the series doesn't have the frame content size
- * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
- * set when using ZSTD_compress(). The decompressed size can be very large.
- * If the source is untrusted, the decompressed size could be wrong or
- * intentionally modified. Always ensure the result fits within the
- * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
- * frames, and so it must traverse the input to read each frame header. This is
- * efficient as most of the data is skipped, however it does mean that all frame
- * data must be present and valid.
- *
- * Return:   Decompressed size of all the data contained in the frames if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
- *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
- */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Advanced compression functions
- **************************************/
-/**
- * ZSTD_checkCParams() - ensure parameter values remain within authorized range
- * @cParams: The zstd compression parameters.
- *
- * Return:   Zero or an error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
-
-/**
- * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
- * @srcSize:  Optionally the estimated source size, or zero if unknown.
- * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
- *
- * Return:    The optimized parameters.
- */
-ZSTD_compressionParameters ZSTD_adjustCParams(
-	ZSTD_compressionParameters cParams, unsigned long long srcSize,
-	size_t dictSize);
-
-/*--- Advanced decompression functions ---*/
-
-/**
- * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
- * @buffer: The source buffer to check.
- * @size:   The size of the source buffer, must be at least 4 bytes.
- *
- * Return: True iff the buffer starts with a zstd or skippable frame identifier.
- */
-unsigned int ZSTD_isFrame(const void *buffer, size_t size);
-
-/**
- * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
- * @dict:     The dictionary buffer.
- * @dictSize: The size of the dictionary buffer.
- *
- * Return:    The dictionary id stored within the dictionary or 0 if the
- *            dictionary is not a zstd dictionary. If it returns 0 the
- *            dictionary can still be loaded as a content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
 
-/**
- * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
- * @ddict: The ddict to find the id of.
- *
- * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
- *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
- *         content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
-/**
- * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
- * @src:     Source buffer. It must be a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The dictionary id required to decompress the frame stored within
- *           `src` or 0 if the dictionary id could not be decoded. It can return
- *           0 if the frame does not require a dictionary, the dictionary id
- *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
- *           is too small.
- */
-unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
 
-/**
- * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
- * @frameContentSize: The frame content size, or 0 if not present.
- * @windowSize:       The window size, or 0 if the frame is a skippable frame.
- * @dictID:           The dictionary id, or 0 if not present.
- * @checksumFlag:     Whether a checksum was used.
- */
+/*
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
 typedef struct {
-	unsigned long long frameContentSize;
-	unsigned int windowSize;
-	unsigned int dictID;
-	unsigned int checksumFlag;
-} ZSTD_frameParams;
-
-/**
- * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
- * @fparamsPtr: On success the frame parameters are written here.
- * @src:        The source buffer. It must point to a zstd or skippable frame.
- * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
- *              always large enough to succeed.
- *
- * Return:      0 on success. If more data is required it returns how many bytes
- *              must be provided to make forward progress. Otherwise it returns
- *              an error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
-	size_t srcSize);
-
-/*-*****************************************************************************
- * Buffer-less and synchronous inner streaming functions
- *
- * This is an advanced API, giving full control over buffer management, for
- * users which need direct control over memory.
- * But it's also a complex one, with many restrictions (documented below).
- * Prefer using normal streaming API for an easier experience
- ******************************************************************************/
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/*       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
 
-/*-*****************************************************************************
- * Buffer-less streaming compression (synchronous mode)
- *
- * A ZSTD_CCtx object is required to track streaming operations.
- * Use ZSTD_initCCtx() to initialize a context.
- * ZSTD_CCtx object can be re-used multiple times within successive compression
- * operations.
- *
- * Start by initializing a context.
- * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
- * compression,
- * or ZSTD_compressBegin_advanced(), for finer parameter control.
- * It's also possible to duplicate a reference context which has already been
- * initialized, using ZSTD_copyCCtx()
- *
- * Then, consume your input using ZSTD_compressContinue().
- * There are some important considerations to keep in mind when using this
- * advanced function :
- * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
- *   buffer only.
- * - Interface is synchronous : input is consumed entirely and produce 1+
- *   (or more) compressed blocks.
- * - Caller must ensure there is enough space in `dst` to store compressed data
- *   under worst case scenario. Worst case evaluation is provided by
- *   ZSTD_compressBound().
- *   ZSTD_compressContinue() doesn't guarantee recover after a failed
- *   compression.
- * - ZSTD_compressContinue() presumes prior input ***is still accessible and
- *   unmodified*** (up to maximum distance size, see WindowLog).
- *   It remembers all previous contiguous blocks, plus one separated memory
- *   segment (which can itself consists of multiple contiguous blocks)
- * - ZSTD_compressContinue() detects that prior input has been overwritten when
- *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
- *   section from its history.
- *
- * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
- * and optional checksum. It's possible to use srcSize==0, in which case, it
- * will write a final empty block to end the frame. Without last block mark,
- * frames will be considered unfinished (corrupted) by decoders.
- *
- * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
- * frame.
- ******************************************************************************/
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
 
-/*=====   Buffer-less streaming compression functions  =====*/
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
-	size_t dictSize, ZSTD_parameters params,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
-	unsigned long long pledgedSrcSize);
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-
-
-
-/*-*****************************************************************************
- * Buffer-less streaming decompression (synchronous mode)
- *
- * A ZSTD_DCtx object is required to track streaming operations.
- * Use ZSTD_initDCtx() to initialize a context.
- * A ZSTD_DCtx object can be re-used multiple times.
- *
- * First typical operation is to retrieve frame parameters, using
- * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
- * important information to correctly decode the frame, such as the minimum
- * rolling buffer size to allocate to decompress data (`windowSize`), and the
- * dictionary ID used.
- * Note: content size is optional, it may not be present. 0 means unknown.
- * Note that these values could be wrong, either because of data malformation,
- * or because an attacker is spoofing deliberate false information. As a
- * consequence, check that values remain within valid application range,
- * especially `windowSize`, before allocation. Each application can set its own
- * limit, depending on local restrictions. For extended interoperability, it is
- * recommended to support at least 8 MB.
- * Frame parameters are extracted from the beginning of the compressed frame.
- * Data fragment must be large enough to ensure successful decoding, typically
- * `ZSTD_frameHeaderSize_max` bytes.
- * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
- *        >0: `srcSize` is too small, provide at least this many bytes.
- *        errorCode, which can be tested using ZSTD_isError().
- *
- * Start decompression, with ZSTD_decompressBegin() or
- * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
- * context, using ZSTD_copyDCtx().
- *
- * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
- * alternatively.
- * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
- * to ZSTD_decompressContinue().
- * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
- * fail.
- *
- * The result of ZSTD_decompressContinue() is the number of bytes regenerated
- * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
- * error; it just means ZSTD_decompressContinue() has decoded some metadata
- * item. It can also be an error code, which can be tested with ZSTD_isError().
- *
- * ZSTD_decompressContinue() needs previous data blocks during decompression, up
- * to `windowSize`. They should preferably be located contiguously, prior to
- * current block. Alternatively, a round buffer of sufficient size is also
- * possible. Sufficient size is determined by frame parameters.
- * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
- * follow each other, make sure that either the compressor breaks contiguity at
- * the same place, or that previous contiguous segment is large enough to
- * properly handle maximum back-reference.
- *
- * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
- * Context can then be reset to start a new decompression.
- *
- * Note: it's possible to know if next input to present is a header or a block,
- * using ZSTD_nextInputType(). This information is not required to properly
- * decode a frame.
- *
- * == Special case: skippable frames ==
- *
- * Skippable frames allow integration of user-defined data into a flow of
- * concatenated frames. Skippable frames will be ignored (skipped) by a
- * decompressor. The format of skippable frames is as follows:
- * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
- *    0x184D2A50 to 0x184D2A5F
- * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
- * c) Frame Content - any content (User Data) of length equal to Frame Size
- * For skippable frames ZSTD_decompressContinue() always returns 0.
- * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
- * what means that a frame is skippable.
- * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
- *       actually be a zstd encoded frame with no content. For purposes of
- *       decompression, it is valid in both cases to skip the frame using
- *       ZSTD_findFrameCompressedSize() to find its size in bytes.
- * It also returns frame size as fparamsPtr->frameContentSize.
- ******************************************************************************/
 
-/*=====   Buffer-less streaming decompression functions  =====*/
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
-	size_t dictSize);
-void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-typedef enum {
-	ZSTDnit_frameHeader,
-	ZSTDnit_blockHeader,
-	ZSTDnit_block,
-	ZSTDnit_lastBlock,
-	ZSTDnit_checksum,
-	ZSTDnit_skippableFrame
-} ZSTD_nextInputType_e;
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
-
-/*-*****************************************************************************
- * Block functions
- *
- * Block functions produce and decode raw zstd blocks, without frame metadata.
- * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
- * very small blocks (< 100 bytes). User will have to take in charge required
- * information to regenerate data, such as compressed and content sizes.
- *
- * A few rules to respect:
- * - Compressing and decompressing require a context structure
- *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
- * - It is necessary to init context before starting
- *   + compression : ZSTD_compressBegin()
- *   + decompression : ZSTD_decompressBegin()
- *   + variants _usingDict() are also allowed
- *   + copyCCtx() and copyDCtx() work too
- * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- *   + If you need to compress more, cut data into multiple blocks
- *   + Consider using the regular ZSTD_compress() instead, as frame metadata
- *     costs become negligible when source size is large.
- * - When a block is considered not compressible enough, ZSTD_compressBlock()
- *   result will be zero. In which case, nothing is produced into `dst`.
- *   + User must test for such outcome and deal directly with uncompressed data
- *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
- *   + In case of multiple successive blocks, decoder must be informed of
- *     uncompressed block existence to follow proper history. Use
- *     ZSTD_insertBlock() in such a case.
- ******************************************************************************/
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
 
-/* Define for static allocation */
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
-/*=====   Raw zstd block functions  =====*/
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-	const void *src, size_t srcSize);
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
-	size_t blockSize);
-
-#endif  /* ZSTD_H */
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index f5d778e7e5c7..65218ec5b8f2 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -1,10 +1,46 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
 ccflags-y += -O3
 
-zstd_compress-y := fse_compress.o huf_compress.o compress.o \
-		   entropy_common.o fse_decompress.o zstd_common.o
-zstd_decompress-y := huf_decompress.o decompress.o \
-		     entropy_common.o fse_decompress.o zstd_common.o
+zstd_compress-y := \
+		zstd_compress_module.o \
+		common/debug.o \
+		common/entropy_common.o \
+		common/error_private.o \
+		common/fse_decompress.o \
+		common/zstd_common.o \
+		compress/fse_compress.o \
+		compress/hist.o \
+		compress/huf_compress.o \
+		compress/zstd_compress.o \
+		compress/zstd_compress_literals.o \
+		compress/zstd_compress_sequences.o \
+		compress/zstd_compress_superblock.o \
+		compress/zstd_double_fast.o \
+		compress/zstd_fast.o \
+		compress/zstd_lazy.o \
+		compress/zstd_ldm.o \
+		compress/zstd_opt.o \
+
+zstd_decompress-y := \
+		zstd_decompress_module.o \
+		common/debug.o \
+		common/entropy_common.o \
+		common/error_private.o \
+		common/fse_decompress.o \
+		common/zstd_common.o \
+		decompress/huf_decompress.o \
+		decompress/zstd_ddict.o \
+		decompress/zstd_decompress.o \
+		decompress/zstd_decompress_block.o \
diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
deleted file mode 100644
index 5d6343c1a909..000000000000
--- a/lib/zstd/bitstream.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * bitstream
- * Part of FSE library
- * header file (to include)
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef BITSTREAM_H_MODULE
-#define BITSTREAM_H_MODULE
-
-/*
-*  This API consists of small unitary functions, which must be inlined for best performance.
-*  Since link-time-optimization is not available for all compilers,
-*  these functions are defined into a .h to be included.
-*/
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include "error_private.h" /* error codes and messages */
-#include "mem.h"	   /* unaligned access routines */
-
-/*=========================================
-*  Target specific
-=========================================*/
-#define STREAM_ACCUMULATOR_MIN_32 25
-#define STREAM_ACCUMULATOR_MIN_64 57
-#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
-
-/*-******************************************
-*  bitStream encoding API (write forward)
-********************************************/
-/* bitStream can mix input from multiple sources.
-*  A critical property of these streams is that they encode and decode in **reverse** direction.
-*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
-*/
-typedef struct {
-	size_t bitContainer;
-	int bitPos;
-	char *startPtr;
-	char *ptr;
-	char *endPtr;
-} BIT_CStream_t;
-
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity);
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC);
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC);
-
-/* Start with initCStream, providing the size of buffer to write into.
-*  bitStream will never write outside of this buffer.
-*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
-*
-*  bits are first added to a local register.
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
-*  Writing data into memory is an explicit operation, performed by the flushBits function.
-*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
-*  After a flushBits, a maximum of 7 bits might still be stored into local register.
-*
-*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
-*
-*  Last operation is to close the bitStream.
-*  The function returns the final size of CStream in bytes.
-*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
-*/
-
-/*-********************************************
-*  bitStream decoding API (read backward)
-**********************************************/
-typedef struct {
-	size_t bitContainer;
-	unsigned bitsConsumed;
-	const char *ptr;
-	const char *start;
-} BIT_DStream_t;
-
-typedef enum {
-	BIT_DStream_unfinished = 0,
-	BIT_DStream_endOfBuffer = 1,
-	BIT_DStream_completed = 2,
-	BIT_DStream_overflow = 3
-} BIT_DStream_status; /* result of BIT_reloadDStream() */
-/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
-
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize);
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits);
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD);
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD);
-
-/* Start by invoking BIT_initDStream().
-*  A chunk of the bitStream is then stored into a local register.
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-*  You can then retrieve bitFields stored into the local register, **in reverse order**.
-*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
-*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-*  Otherwise, it can be less than that, so proceed accordingly.
-*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
-*/
-
-/*-****************************************
-*  unsafe API
-******************************************/
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
-
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC);
-/* unsafe version; does not check buffer overflow */
-
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits);
-/* faster, but works only if nbBits >= 1 */
-
-/*-**************************************************************
-*  Internal functions
-****************************************************************/
-ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); }
-
-/*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = {0,       1,       3,       7,	0xF,      0x1F,     0x3F,     0x7F,      0xFF,
-				    0x1FF,   0x3FF,   0x7FF,   0xFFF,    0x1FFF,   0x3FFF,   0x7FFF,   0xFFFF,    0x1FFFF,
-				    0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */
-
-/*-**************************************************************
-*  bitStream encoding
-****************************************************************/
-/*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(void*)
- *  @return : 0 if success,
-			  otherwise an error code (can be tested using ERR_isError() ) */
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity)
-{
-	bitC->bitContainer = 0;
-	bitC->bitPos = 0;
-	bitC->startPtr = (char *)startPtr;
-	bitC->ptr = bitC->startPtr;
-	bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
-	if (dstCapacity <= sizeof(bitC->ptr))
-		return ERROR(dstSize_tooSmall);
-	return 0;
-}
-
-/*! BIT_addBits() :
-	can add up to 26 bits into `bitC`.
-	Does not check for register overflow ! */
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-	bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
-	bitC->bitPos += nbBits;
-}
-
-/*! BIT_addBitsFast() :
- *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-	bitC->bitContainer |= value << bitC->bitPos;
-	bitC->bitPos += nbBits;
-}
-
-/*! BIT_flushBitsFast() :
- *  unsafe version; does not check buffer overflow */
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC)
-{
-	size_t const nbBytes = bitC->bitPos >> 3;
-	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-	bitC->ptr += nbBytes;
-	bitC->bitPos &= 7;
-	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_flushBits() :
- *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC)
-{
-	size_t const nbBytes = bitC->bitPos >> 3;
-	ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-	bitC->ptr += nbBytes;
-	if (bitC->ptr > bitC->endPtr)
-		bitC->ptr = bitC->endPtr;
-	bitC->bitPos &= 7;
-	bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_closeCStream() :
- *  @return : size of CStream, in bytes,
-			  or 0 if it could not fit into dstBuffer */
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC)
-{
-	BIT_addBitsFast(bitC, 1, 1); /* endMark */
-	BIT_flushBits(bitC);
-
-	if (bitC->ptr >= bitC->endPtr)
-		return 0; /* doesn't fit within authorized budget : cancel */
-
-	return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
-}
-
-/*-********************************************************
-* bitStream decoding
-**********************************************************/
-/*! BIT_initDStream() :
-*   Initialize a BIT_DStream_t.
-*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
-*   `srcSize` must be the *exact* size of the bitStream, in bytes.
-*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
-*/
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize)
-{
-	if (srcSize < 1) {
-		memset(bitD, 0, sizeof(*bitD));
-		return ERROR(srcSize_wrong);
-	}
-
-	if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
-		bitD->start = (const char *)srcBuffer;
-		bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-		{
-			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
-			if (lastByte == 0)
-				return ERROR(GENERIC); /* endMark not present */
-		}
-	} else {
-		bitD->start = (const char *)srcBuffer;
-		bitD->ptr = bitD->start;
-		bitD->bitContainer = *(const BYTE *)(bitD->start);
-		switch (srcSize) {
-		case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
-			fallthrough;
-		case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
-			fallthrough;
-		case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
-			fallthrough;
-		case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
-			fallthrough;
-		case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
-			fallthrough;
-		case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
-			fallthrough;
-		default:;
-		}
-		{
-			BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-			bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-			if (lastByte == 0)
-				return ERROR(GENERIC); /* endMark not present */
-		}
-		bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8;
-	}
-
-	return srcSize;
-}
-
-ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; }
-
-ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; }
-
-ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; }
-
-/*! BIT_lookBits() :
- *  Provides next n bits from local register.
- *  local register is not modified.
- *  On 32-bits, maxNbBits==24.
- *  On 64-bits, maxNbBits==56.
- *  @return : value extracted
- */
-ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits)
-{
-	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-	return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask);
-}
-
-/*! BIT_lookBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits)
-{
-	U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-	return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask);
-}
-
-ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; }
-
-/*! BIT_readBits() :
- *  Read (consume) next n bits from local register and update.
- *  Pay attention to not read more than nbBits contained into local register.
- *  @return : extracted value.
- */
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits)
-{
-	size_t const value = BIT_lookBits(bitD, nbBits);
-	BIT_skipBits(bitD, nbBits);
-	return value;
-}
-
-/*! BIT_readBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits)
-{
-	size_t const value = BIT_lookBitsFast(bitD, nbBits);
-	BIT_skipBits(bitD, nbBits);
-	return value;
-}
-
-/*! BIT_reloadDStream() :
-*   Refill `bitD` from buffer previously set in BIT_initDStream() .
-*   This function is safe, it guarantees it will not read beyond src buffer.
-*   @return : status of `BIT_DStream_t` internal register.
-			  if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD)
-{
-	if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */
-		return BIT_DStream_overflow;
-
-	if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
-		bitD->ptr -= bitD->bitsConsumed >> 3;
-		bitD->bitsConsumed &= 7;
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-		return BIT_DStream_unfinished;
-	}
-	if (bitD->ptr == bitD->start) {
-		if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8)
-			return BIT_DStream_endOfBuffer;
-		return BIT_DStream_completed;
-	}
-	{
-		U32 nbBytes = bitD->bitsConsumed >> 3;
-		BIT_DStream_status result = BIT_DStream_unfinished;
-		if (bitD->ptr - nbBytes < bitD->start) {
-			nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
-			result = BIT_DStream_endOfBuffer;
-		}
-		bitD->ptr -= nbBytes;
-		bitD->bitsConsumed -= nbBytes * 8;
-		bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
-		return result;
-	}
-}
-
-/*! BIT_endOfDStream() :
-*   @return Tells if DStream has exactly reached its end (all bits consumed).
-*/
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream)
-{
-	return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8));
-}
-
-#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
new file mode 100644
index 000000000000..28248abe8612
--- /dev/null
+++ b/lib/zstd/common/bitstream.h
@@ -0,0 +1,437 @@
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"            /* unaligned access routines */
+#include "compiler.h"       /* UNLIKELY() */
+#include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
+#include "error_private.h"  /* error codes and messages */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t bitContainer;
+    unsigned bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+    const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                                 11, 14, 16, 18, 22, 25,  3, 30,
+                                                  8, 12, 20, 28, 15, 17, 24,  7,
+                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+    0,          1,         3,         7,         0xF,       0x1F,
+    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
+{
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
+{
+    assert((value>>nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                ZSTD_FALLTHROUGH;
+
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                ZSTD_FALLTHROUGH;
+
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                ZSTD_FALLTHROUGH;
+
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                ZSTD_FALLTHROUGH;
+
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                ZSTD_FALLTHROUGH;
+
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                ZSTD_FALLTHROUGH;
+
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+    U32 const regMask = sizeof(bitContainer)*8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+{
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->limitPtr) {
+        return BIT_reloadDStreamFast(bitD);
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr */
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
new file mode 100644
index 000000000000..a1a051e4bce6
--- /dev/null
+++ b/lib/zstd/common/compiler.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#define FORCE_INLINE_ATTR __attribute__((always_inline))
+
+
+/*
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#define WIN_CDECL
+
+/*
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/*
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#define UNUSED_ATTR __attribute__((unused))
+
+/* force no inlining */
+#define FORCE_NOINLINE static __attribute__((__noinline__))
+
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#  define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#elif defined(__aarch64__)
+#  define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#  define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#else
+#  define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+
+/* disable warnings */
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define ZSTD_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+/* Only use C++ attributes in C++. Some compilers report support for C++
+ * attributes when compiling with C.
+ */
+#define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
+
+/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * - Else: __attribute__((__fallthrough__))
+ */
+#define ZSTD_FALLTHROUGH fallthrough
+
+/* detects whether we are being compiled under msan */
+
+
+/* detects whether we are being compiled under asan */
+
+
+#endif /* ZSTD_COMPILER_H */
diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
new file mode 100644
index 000000000000..0db7b42407ee
--- /dev/null
+++ b/lib/zstd/common/cpu.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/*
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include "mem.h"
+
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
new file mode 100644
index 000000000000..bb863c9ea616
--- /dev/null
+++ b/lib/zstd/common/debug.c
@@ -0,0 +1,24 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+int g_debuglevel = DEBUGLEVEL;
diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
new file mode 100644
index 000000000000..6dd88d1fbd02
--- /dev/null
+++ b/lib/zstd/common/debug.h
@@ -0,0 +1,101 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+#  define ZSTD_DEPS_NEED_ASSERT
+#  include "zstd_deps.h"
+#else
+#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
+#    define assert(condition) ((void)0)   /* disable assert (default) */
+#  endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+#  define ZSTD_DEPS_NEED_IO
+#  include "zstd_deps.h"
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
+            }   }
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
+            }   }
+#else
+#  define RAWLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#endif
+
+
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
new file mode 100644
index 000000000000..53b47a2b52ff
--- /dev/null
+++ b/lib/zstd/common/entropy_common.c
@@ -0,0 +1,357 @@
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"       /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+#include "huf.h"
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
+    int previous0 = 0;
+
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
+        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                    buffer, sizeof(buffer));
+            if (FSE_isError(countSize)) return countSize;
+            if (countSize > hbSize) return ERROR(corruption_detected);
+            return countSize;
+    }   }
+    assert(hbSize >= 8);
+
+    /* init */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    for (;;) {
+        if (previous0) {
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
+                } else {
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            }
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
+            bitCount += 2;
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
+            int count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
+            } else {
+                count = bitStream & (2*threshold-1);
+                if (count >= threshold) count -= max;
+                bitCount += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
+            normalizedCounter[charnum++] = (short)count;
+            previous0 = !count;
+
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1) break;
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
+    if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
+    if (bitCount > 32) return ERROR(corruption_detected);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    return ip-istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
new file mode 100644
index 000000000000..6d1135f8c373
--- /dev/null
+++ b/lib/zstd/common/error_private.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void)code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+#endif
+}
diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
new file mode 100644
index 000000000000..d14e686adf95
--- /dev/null
+++ b/lib/zstd/common/error_private.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t */
+#include <linux/zstd_errors.h>  /* enum list */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#define ERR_STATIC static __attribute__((unused))
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR   /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+
+#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
new file mode 100644
index 000000000000..0bb174c2c367
--- /dev/null
+++ b/lib/zstd/common/fse.h
@@ -0,0 +1,710 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t, ptrdiff_t */
+
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /*< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                                 const short* normalizedCounter,
+                                 unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif  /* FSE_H */
+
+#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/*< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/*< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
+
+typedef enum {
+   FSE_repeat_none,  /*< Cannot use the previous table */
+   FSE_repeat_check, /*< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits+1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
+    {   U32 const tableSize = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
new file mode 100644
index 000000000000..2c8bbe3e4c14
--- /dev/null
+++ b/lib/zstd/common/fse_decompress.c
@@ -0,0 +1,390 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "debug.h"      /* assert */
+#include "bitstream.h"
+#include "compiler.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    ZSTD_free(dt);
+}
+
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+
+    /* normal FSE decoding mode */
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = wksp->dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
new file mode 100644
index 000000000000..88c5586646aa
--- /dev/null
+++ b/lib/zstd/common/huf.h
@@ -0,0 +1,356 @@
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include "zstd_deps.h"    /* size_t */
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* ***  simple functions  *** */
+/* ========================== */
+
+/* HUF_compress() :
+ *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/* HUF_decompress() :
+ *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ *  Note : in contrast with FSE, HUF_decompress can regenerate
+ *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ *         because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ *           or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /*< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
+
+
+/* ***   Advanced function   *** */
+
+/* HUF_compress2() :
+ *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               unsigned maxSymbolValue, unsigned tableLog);
+
+/* HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     unsigned maxSymbolValue, unsigned tableLog,
+                                     void* workSpace, size_t wkspSize);
+
+#endif   /* HUF_H_298734234 */
+
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of a dynamic library,
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ * *****************************************************************/
+#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+#include "mem.h"   /* U32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+   HUF_repeat_none,  /*< Cannot use the previous table */
+   HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/* HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+                             void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
+/* HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/* HUF_getNbBits() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : is not inlined, as HUF_CElt definition is private
+ *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/*
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/* HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /*< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
new file mode 100644
index 000000000000..dcdd586a9fd9
--- /dev/null
+++ b/lib/zstd/common/mem.h
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <asm/unaligned.h>  /* get_unaligned, put_unaligned* */
+#include <linux/compiler.h>  /* inline */
+#include <linux/swab.h>  /* swab32, swab64 */
+#include <linux/types.h>  /* size_t, ptrdiff_t */
+#include "debug.h"  /* DEBUG_STATIC_ASSERT */
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#define MEM_STATIC static inline
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef int16_t  S16;
+typedef uint32_t U32;
+typedef int32_t  S32;
+typedef uint64_t U64;
+typedef int64_t  S64;
+
+/*-**************************************************************
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+/*-**************************************************************
+*  Memory I/O Implementation
+*****************************************************************/
+MEM_STATIC unsigned MEM_32bits(void)
+{
+    return sizeof(size_t) == 4;
+}
+
+MEM_STATIC unsigned MEM_64bits(void)
+{
+    return sizeof(size_t) == 8;
+}
+
+#if defined(__LITTLE_ENDIAN)
+#define MEM_LITTLE_ENDIAN 1
+#else
+#define MEM_LITTLE_ENDIAN 0
+#endif
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    return MEM_LITTLE_ENDIAN;
+}
+
+MEM_STATIC U16 MEM_read16(const void *memPtr)
+{
+    return get_unaligned((const U16 *)memPtr);
+}
+
+MEM_STATIC U32 MEM_read32(const void *memPtr)
+{
+    return get_unaligned((const U32 *)memPtr);
+}
+
+MEM_STATIC U64 MEM_read64(const void *memPtr)
+{
+    return get_unaligned((const U64 *)memPtr);
+}
+
+MEM_STATIC size_t MEM_readST(const void *memPtr)
+{
+    return get_unaligned((const size_t *)memPtr);
+}
+
+MEM_STATIC void MEM_write16(void *memPtr, U16 value)
+{
+    put_unaligned(value, (U16 *)memPtr);
+}
+
+MEM_STATIC void MEM_write32(void *memPtr, U32 value)
+{
+    put_unaligned(value, (U32 *)memPtr);
+}
+
+MEM_STATIC void MEM_write64(void *memPtr, U64 value)
+{
+    put_unaligned(value, (U64 *)memPtr);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void *memPtr)
+{
+    return get_unaligned_le16(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
+{
+    put_unaligned_le16(val, memPtr);
+}
+
+MEM_STATIC U32 MEM_readLE24(const void *memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
+{
+	MEM_writeLE16(memPtr, (U16)val);
+	((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void *memPtr)
+{
+    return get_unaligned_le32(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
+{
+    put_unaligned_le32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readLE64(const void *memPtr)
+{
+    return get_unaligned_le64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
+{
+    put_unaligned_le64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readLEST(const void *memPtr)
+{
+	if (MEM_32bits())
+		return (size_t)MEM_readLE32(memPtr);
+	else
+		return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
+{
+	if (MEM_32bits())
+		MEM_writeLE32(memPtr, (U32)val);
+	else
+		MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void *memPtr)
+{
+    return get_unaligned_be32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
+{
+    put_unaligned_be32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void *memPtr)
+{
+    return get_unaligned_be64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
+{
+    put_unaligned_be64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void *memPtr)
+{
+	if (MEM_32bits())
+		return (size_t)MEM_readBE32(memPtr);
+	else
+		return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
+{
+	if (MEM_32bits())
+		MEM_writeBE32(memPtr, (U32)val);
+	else
+		MEM_writeBE64(memPtr, (U64)val);
+}
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+    return swab32(in);
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+    return swab64(in);
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
new file mode 100644
index 000000000000..3d7e35b309b5
--- /dev/null
+++ b/lib/zstd/common/zstd_common.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+#include "error_private.h"
+#include "zstd_internal.h"
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
+
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
+
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
new file mode 100644
index 000000000000..7a5bf44839c9
--- /dev/null
+++ b/lib/zstd/common/zstd_deps.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <linux/limits.h>
+#include <linux/stddef.h>
+
+#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
+#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
+#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/*
+ * Define malloc as always failing. That means the user must
+ * either use ZSTD_customMem or statically allocate memory.
+ * Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#define ZSTD_malloc(s) ({ (void)(s); NULL; })
+#define ZSTD_free(p) ((void)(p))
+#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#include <linux/math64.h>
+
+static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+  return div_u64(dividend, divisor);
+}
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 1, meaning
+ * it is disabled in production.
+ * Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <linux/kernel.h>
+
+#define assert(x) WARN_ON((x))
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 2, meaning
+ * it is disabled in production.
+ * Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <linux/printk.h>
+
+#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/*
+ * Only requested when MSAN is enabled.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+/*
+ * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
+ * is an unsigned long.
+ */
+typedef long intptr_t;
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
new file mode 100644
index 000000000000..fc6f3a9b40c0
--- /dev/null
+++ b/lib/zstd/common/zstd_internal.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "compiler.h"
+#include "mem.h"
+#include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
+#include "error_private.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include <linux/zstd.h>
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+#include <linux/xxhash.h>                /* XXH_reset, update, digest */
+#define ZSTD_TRACE 0
+
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError   /* for inlining */
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+/*
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/*
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/*
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
+
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 8);
+}
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 16);
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;         /* Offset code of the sequence */
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;      /* ptr to end of sequences */
+    BYTE* litStart;
+    BYTE* lit;              /* ptr to end of literals */
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+
+    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
+    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/*
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
+/*
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
deleted file mode 100644
index 57aaa64306a0..000000000000
--- a/lib/zstd/compress.c
+++ /dev/null
@@ -1,3534 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memset */
-
-/*-*************************************
-*  Constants
-***************************************/
-static const U32 g_searchStrength = 8; /* control skip over incompressible data */
-#define HASH_READ_SIZE 8
-typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
-
-/*-*************************************
-*  Helper functions
-***************************************/
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
-
-/*-*************************************
-*  Sequence storage
-***************************************/
-static void ZSTD_resetSeqStore(seqStore_t *ssPtr)
-{
-	ssPtr->lit = ssPtr->litStart;
-	ssPtr->sequences = ssPtr->sequencesStart;
-	ssPtr->longLengthID = 0;
-}
-
-/*-*************************************
-*  Context memory management
-***************************************/
-struct ZSTD_CCtx_s {
-	const BYTE *nextSrc;  /* next block here to continue on curr prefix */
-	const BYTE *base;     /* All regular indexes relative to this position */
-	const BYTE *dictBase; /* extDict indexes relative to this position */
-	U32 dictLimit;	/* below that point, need extDict */
-	U32 lowLimit;	 /* below that point, no more data */
-	U32 nextToUpdate;     /* index from which to continue dictionary update */
-	U32 nextToUpdate3;    /* index from which to continue dictionary update */
-	U32 hashLog3;	 /* dispatch table : larger == faster, more memory */
-	U32 loadedDictEnd;    /* index of end of dictionary */
-	U32 forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
-	U32 forceRawDict;     /* Force loading dictionary in "content-only" mode (no header analysis) */
-	ZSTD_compressionStage_e stage;
-	U32 rep[ZSTD_REP_NUM];
-	U32 repToConfirm[ZSTD_REP_NUM];
-	U32 dictID;
-	ZSTD_parameters params;
-	void *workSpace;
-	size_t workSpaceSize;
-	size_t blockSize;
-	U64 frameContentSize;
-	struct xxh64_state xxhState;
-	ZSTD_customMem customMem;
-
-	seqStore_t seqStore; /* sequences storage ptrs */
-	U32 *hashTable;
-	U32 *hashTable3;
-	U32 *chainTable;
-	HUF_CElt *hufTable;
-	U32 flagStaticTables;
-	HUF_repeat flagStaticHufTable;
-	FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-	FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-	FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
-	unsigned tmpCounters[HUF_COMPRESS_WORKSPACE_SIZE_U32];
-};
-
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
-	U32 const divider = (cParams.searchLength == 3) ? 3 : 4;
-	size_t const maxNbSeq = blockSize / divider;
-	size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-	size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
-	size_t const hSize = ((size_t)1) << cParams.hashLog;
-	U32 const hashLog3 = (cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
-	size_t const h3Size = ((size_t)1) << hashLog3;
-	size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-	size_t const optSpace =
-	    ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-	size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-				     (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-
-	return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize);
-}
-
-static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_CCtx *cctx;
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-	cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
-	if (!cctx)
-		return NULL;
-	memset(cctx, 0, sizeof(ZSTD_CCtx));
-	cctx->customMem = customMem;
-	return cctx;
-}
-
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem);
-	if (cctx) {
-		cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize);
-	}
-	return cctx;
-}
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx)
-{
-	if (cctx == NULL)
-		return 0; /* support free on NULL */
-	ZSTD_free(cctx->workSpace, cctx->customMem);
-	ZSTD_free(cctx, cctx->customMem);
-	return 0; /* reserved as a potential error code in the future */
-}
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); }
-
-static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; }
-
-/** ZSTD_checkParams() :
-	ensure param values remain within authorized range.
-	@return : 0, or an error code if one value is beyond authorized range */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
-{
-#define CLAMPCHECK(val, min, max)                                       \
-	{                                                               \
-		if ((val < min) | (val > max))                          \
-			return ERROR(compressionParameter_unsupported); \
-	}
-	CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
-	CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
-	CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
-	CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-	CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
-	CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-	if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2)
-		return ERROR(compressionParameter_unsupported);
-	return 0;
-}
-
-/** ZSTD_cycleLog() :
- *  condition for correct operation : hashLog > 1 */
-static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
-{
-	U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
-	return hashLog - btScale;
-}
-
-/** ZSTD_adjustCParams() :
-	optimize `cPar` for a given input (`srcSize` and `dictSize`).
-	mostly downsizing to reduce memory consumption and initialization.
-	Both `srcSize` and `dictSize` are optional (use 0 if unknown),
-	but if both are 0, no optimization can be done.
-	Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
-{
-	if (srcSize + dictSize == 0)
-		return cPar; /* no size information available : no adjustment */
-
-	/* resize params, to use less memory when necessary */
-	{
-		U32 const minSrcSize = (srcSize == 0) ? 500 : 0;
-		U64 const rSize = srcSize + dictSize + minSrcSize;
-		if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) {
-			U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
-			if (cPar.windowLog > srcLog)
-				cPar.windowLog = srcLog;
-		}
-	}
-	if (cPar.hashLog > cPar.windowLog)
-		cPar.hashLog = cPar.windowLog;
-	{
-		U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
-		if (cycleLog > cPar.windowLog)
-			cPar.chainLog -= (cycleLog - cPar.windowLog);
-	}
-
-	if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
-		cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
-
-	return cPar;
-}
-
-static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
-{
-	return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) &
-	       (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3));
-}
-
-/*! ZSTD_continueCCtx() :
-	reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize)
-{
-	U32 const end = (U32)(cctx->nextSrc - cctx->base);
-	cctx->params = params;
-	cctx->frameContentSize = frameContentSize;
-	cctx->lowLimit = end;
-	cctx->dictLimit = end;
-	cctx->nextToUpdate = end + 1;
-	cctx->stage = ZSTDcs_init;
-	cctx->dictID = 0;
-	cctx->loadedDictEnd = 0;
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			cctx->rep[i] = repStartValue[i];
-	}
-	cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
-	xxh64_reset(&cctx->xxhState, 0);
-	return 0;
-}
-
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
-
-/*! ZSTD_resetCCtx_advanced() :
-	note : `params` must be validated */
-static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp)
-{
-	if (crp == ZSTDcrp_continue)
-		if (ZSTD_equivalentParams(params, zc->params)) {
-			zc->flagStaticTables = 0;
-			zc->flagStaticHufTable = HUF_repeat_none;
-			return ZSTD_continueCCtx(zc, params, frameContentSize);
-		}
-
-	{
-		size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
-		U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4;
-		size_t const maxNbSeq = blockSize / divider;
-		size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-		size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
-		size_t const hSize = ((size_t)1) << params.cParams.hashLog;
-		U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
-		size_t const h3Size = ((size_t)1) << hashLog3;
-		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-		void *ptr;
-
-		/* Check if workSpace is large enough, alloc a new one if needed */
-		{
-			size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) +
-						(ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-			size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-						   (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-			if (zc->workSpaceSize < neededSpace) {
-				ZSTD_free(zc->workSpace, zc->customMem);
-				zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
-				if (zc->workSpace == NULL)
-					return ERROR(memory_allocation);
-				zc->workSpaceSize = neededSpace;
-			}
-		}
-
-		if (crp != ZSTDcrp_noMemset)
-			memset(zc->workSpace, 0, tableSpace); /* reset tables only */
-		xxh64_reset(&zc->xxhState, 0);
-		zc->hashLog3 = hashLog3;
-		zc->hashTable = (U32 *)(zc->workSpace);
-		zc->chainTable = zc->hashTable + hSize;
-		zc->hashTable3 = zc->chainTable + chainSize;
-		ptr = zc->hashTable3 + h3Size;
-		zc->hufTable = (HUF_CElt *)ptr;
-		zc->flagStaticTables = 0;
-		zc->flagStaticHufTable = HUF_repeat_none;
-		ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
-
-		zc->nextToUpdate = 1;
-		zc->nextSrc = NULL;
-		zc->base = NULL;
-		zc->dictBase = NULL;
-		zc->dictLimit = 0;
-		zc->lowLimit = 0;
-		zc->params = params;
-		zc->blockSize = blockSize;
-		zc->frameContentSize = frameContentSize;
-		{
-			int i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				zc->rep[i] = repStartValue[i];
-		}
-
-		if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
-			zc->seqStore.litFreq = (U32 *)ptr;
-			zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits);
-			zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1);
-			zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1);
-			ptr = zc->seqStore.offCodeFreq + (MaxOff + 1);
-			zc->seqStore.matchTable = (ZSTD_match_t *)ptr;
-			ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1;
-			zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr;
-			ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1;
-			zc->seqStore.litLengthSum = 0;
-		}
-		zc->seqStore.sequencesStart = (seqDef *)ptr;
-		ptr = zc->seqStore.sequencesStart + maxNbSeq;
-		zc->seqStore.llCode = (BYTE *)ptr;
-		zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
-		zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
-		zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
-
-		zc->stage = ZSTDcs_init;
-		zc->dictID = 0;
-		zc->loadedDictEnd = 0;
-
-		return 0;
-	}
-}
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx)
-{
-	int i;
-	for (i = 0; i < ZSTD_REP_NUM; i++)
-		cctx->rep[i] = 0;
-}
-
-/*! ZSTD_copyCCtx() :
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize)
-{
-	if (srcCCtx->stage != ZSTDcs_init)
-		return ERROR(stage_wrong);
-
-	memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-	{
-		ZSTD_parameters params = srcCCtx->params;
-		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-		ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
-	}
-
-	/* copy tables */
-	{
-		size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
-		size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
-		size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
-		size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-		memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
-	}
-
-	/* copy dictionary offsets */
-	dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
-	dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3;
-	dstCCtx->nextSrc = srcCCtx->nextSrc;
-	dstCCtx->base = srcCCtx->base;
-	dstCCtx->dictBase = srcCCtx->dictBase;
-	dstCCtx->dictLimit = srcCCtx->dictLimit;
-	dstCCtx->lowLimit = srcCCtx->lowLimit;
-	dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd;
-	dstCCtx->dictID = srcCCtx->dictID;
-
-	/* copy entropy tables */
-	dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
-	dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
-	if (srcCCtx->flagStaticTables) {
-		memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
-		memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
-		memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
-	}
-	if (srcCCtx->flagStaticHufTable) {
-		memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4);
-	}
-
-	return 0;
-}
-
-/*! ZSTD_reduceTable() :
-*   reduce table indexes by `reducerValue` */
-static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue)
-{
-	U32 u;
-	for (u = 0; u < size; u++) {
-		if (table[u] < reducerValue)
-			table[u] = 0;
-		else
-			table[u] -= reducerValue;
-	}
-}
-
-/*! ZSTD_reduceIndex() :
-*   rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue)
-{
-	{
-		U32 const hSize = 1 << zc->params.cParams.hashLog;
-		ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
-	}
-
-	{
-		U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
-		ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
-	}
-
-	{
-		U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
-		ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue);
-	}
-}
-
-/*-*******************************************************
-*  Block entropic compression
-*********************************************************/
-
-/* See doc/zstd_compression_format.md for detailed format description */
-
-size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	if (srcSize + ZSTD_blockHeaderSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize);
-	ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
-	return ZSTD_blockHeaderSize + srcSize;
-}
-
-static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	BYTE *const ostart = (BYTE * const)dst;
-	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-	if (srcSize + flSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-
-	switch (flSize) {
-	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break;
-	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break;
-	default: /*note : should not be necessary : flSize is within {1,2,3} */
-	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break;
-	}
-
-	memcpy(ostart + flSize, src, srcSize);
-	return srcSize + flSize;
-}
-
-static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	BYTE *const ostart = (BYTE * const)dst;
-	U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-	(void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
-
-	switch (flSize) {
-	case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break;
-	case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break;
-	default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
-	case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break;
-	}
-
-	ostart[flSize] = *(const BYTE *)src;
-	return flSize + 1;
-}
-
-static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
-
-static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const minGain = ZSTD_minGain(srcSize);
-	size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
-	BYTE *const ostart = (BYTE *)dst;
-	U32 singleStream = srcSize < 256;
-	symbolEncodingType_e hType = set_compressed;
-	size_t cLitSize;
-
-/* small ? don't even attempt compression (speed opt) */
-#define LITERAL_NOENTROPY 63
-	{
-		size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
-		if (srcSize <= minLitSize)
-			return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-	}
-
-	if (dstCapacity < lhSize + 1)
-		return ERROR(dstSize_tooSmall); /* not enough space for compression */
-	{
-		HUF_repeat repeat = zc->flagStaticHufTable;
-		int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
-		if (repeat == HUF_repeat_valid && lhSize == 3)
-			singleStream = 1;
-		cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
-					: HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-								sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
-		if (repeat != HUF_repeat_none) {
-			hType = set_repeat;
-		} /* reused the existing table */
-		else {
-			zc->flagStaticHufTable = HUF_repeat_check;
-		} /* now have a table to reuse */
-	}
-
-	if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-	}
-	if (cLitSize == 1) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
-	}
-
-	/* Build header */
-	switch (lhSize) {
-	case 3: /* 2 - 2 - 10 - 10 */
-	{
-		U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14);
-		ZSTD_writeLE24(ostart, lhc);
-		break;
-	}
-	case 4: /* 2 - 2 - 14 - 14 */
-	{
-		U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18);
-		ZSTD_writeLE32(ostart, lhc);
-		break;
-	}
-	default: /* should not be necessary, lhSize is only {3,4,5} */
-	case 5:  /* 2 - 2 - 18 - 18 */
-	{
-		U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22);
-		ZSTD_writeLE32(ostart, lhc);
-		ostart[4] = (BYTE)(cLitSize >> 10);
-		break;
-	}
-	}
-	return lhSize + cLitSize;
-}
-
-static const BYTE LL_Code[64] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18,
-				 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
-				 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
-
-static const BYTE ML_Code[128] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-				  26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38,
-				  38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-				  40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42,
-				  42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42};
-
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
-{
-	BYTE const LL_deltaCode = 19;
-	BYTE const ML_deltaCode = 36;
-	const seqDef *const sequences = seqStorePtr->sequencesStart;
-	BYTE *const llCodeTable = seqStorePtr->llCode;
-	BYTE *const ofCodeTable = seqStorePtr->ofCode;
-	BYTE *const mlCodeTable = seqStorePtr->mlCode;
-	U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	U32 u;
-	for (u = 0; u < nbSeq; u++) {
-		U32 const llv = sequences[u].litLength;
-		U32 const mlv = sequences[u].matchLength;
-		llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
-		ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
-		mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
-	}
-	if (seqStorePtr->longLengthID == 1)
-		llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-	if (seqStorePtr->longLengthID == 2)
-		mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
-{
-	const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-	const seqStore_t *seqStorePtr = &(zc->seqStore);
-	FSE_CTable *CTable_LitLength = zc->litlengthCTable;
-	FSE_CTable *CTable_OffsetBits = zc->offcodeCTable;
-	FSE_CTable *CTable_MatchLength = zc->matchlengthCTable;
-	U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
-	const seqDef *const sequences = seqStorePtr->sequencesStart;
-	const BYTE *const ofCodeTable = seqStorePtr->ofCode;
-	const BYTE *const llCodeTable = seqStorePtr->llCode;
-	const BYTE *const mlCodeTable = seqStorePtr->mlCode;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstCapacity;
-	BYTE *op = ostart;
-	size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-	BYTE *seqHead;
-
-	U32 *count;
-	S16 *norm;
-	U32 *workspace;
-	size_t workspaceSize = sizeof(zc->tmpCounters);
-	{
-		size_t spaceUsed32 = 0;
-		count = (U32 *)zc->tmpCounters + spaceUsed32;
-		spaceUsed32 += MaxSeq + 1;
-		norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32);
-		spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-		workspace = (U32 *)zc->tmpCounters + spaceUsed32;
-		workspaceSize -= (spaceUsed32 << 2);
-	}
-
-	/* Compress literals */
-	{
-		const BYTE *const literals = seqStorePtr->litStart;
-		size_t const litSize = seqStorePtr->lit - literals;
-		size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-		op += cSize;
-	}
-
-	/* Sequences Header */
-	if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */)
-		return ERROR(dstSize_tooSmall);
-	if (nbSeq < 0x7F)
-		*op++ = (BYTE)nbSeq;
-	else if (nbSeq < LONGNBSEQ)
-		op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2;
-	else
-		op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
-	if (nbSeq == 0)
-		return op - ostart;
-
-	/* seqHead : flags for FSE encoding type */
-	seqHead = op++;
-
-#define MIN_SEQ_FOR_DYNAMIC_FSE 64
-#define MAX_SEQ_FOR_STATIC_FSE 1000
-
-	/* convert length/distances into codes */
-	ZSTD_seqToCodes(seqStorePtr);
-
-	/* CTable for Literal Lengths */
-	{
-		U32 max = MaxLL;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = llCodeTable[0];
-			FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-			LLtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			LLtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize);
-			LLtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
-			if (count[llCodeTable[nbSeq - 1]] > 1) {
-				count[llCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize);
-			LLtype = set_compressed;
-		}
-	}
-
-	/* CTable for Offsets */
-	{
-		U32 max = MaxOff;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = ofCodeTable[0];
-			FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-			Offtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			Offtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize);
-			Offtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
-			if (count[ofCodeTable[nbSeq - 1]] > 1) {
-				count[ofCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize);
-			Offtype = set_compressed;
-		}
-	}
-
-	/* CTable for MatchLengths */
-	{
-		U32 max = MaxML;
-		size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
-		if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-			*op++ = *mlCodeTable;
-			FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-			MLtype = set_rle;
-		} else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-			MLtype = set_repeat;
-		} else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) {
-			FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize);
-			MLtype = set_basic;
-		} else {
-			size_t nbSeq_1 = nbSeq;
-			const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
-			if (count[mlCodeTable[nbSeq - 1]] > 1) {
-				count[mlCodeTable[nbSeq - 1]]--;
-				nbSeq_1--;
-			}
-			FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-			{
-				size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-				if (FSE_isError(NCountSize))
-					return NCountSize;
-				op += NCountSize;
-			}
-			FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize);
-			MLtype = set_compressed;
-		}
-	}
-
-	*seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2));
-	zc->flagStaticTables = 0;
-
-	/* Encoding Sequences */
-	{
-		BIT_CStream_t blockStream;
-		FSE_CState_t stateMatchLength;
-		FSE_CState_t stateOffsetBits;
-		FSE_CState_t stateLitLength;
-
-		CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */
-
-		/* first symbols */
-		FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]);
-		FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]);
-		FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]);
-		BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]);
-		if (ZSTD_32bits())
-			BIT_flushBits(&blockStream);
-		BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]);
-		if (ZSTD_32bits())
-			BIT_flushBits(&blockStream);
-		if (longOffsets) {
-			U32 const ofBits = ofCodeTable[nbSeq - 1];
-			int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-			if (extraBits) {
-				BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits);
-				BIT_flushBits(&blockStream);
-			}
-			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits);
-		} else {
-			BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]);
-		}
-		BIT_flushBits(&blockStream);
-
-		{
-			size_t n;
-			for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */
-				BYTE const llCode = llCodeTable[n];
-				BYTE const ofCode = ofCodeTable[n];
-				BYTE const mlCode = mlCodeTable[n];
-				U32 const llBits = LL_bits[llCode];
-				U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
-				U32 const mlBits = ML_bits[mlCode];
-				/* (7)*/							    /* (7)*/
-				FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */  /* 15 */
-				FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
-				if (ZSTD_32bits())
-					BIT_flushBits(&blockStream);				  /* (7)*/
-				FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
-				if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-					BIT_flushBits(&blockStream); /* (7)*/
-				BIT_addBits(&blockStream, sequences[n].litLength, llBits);
-				if (ZSTD_32bits() && ((llBits + mlBits) > 24))
-					BIT_flushBits(&blockStream);
-				BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
-				if (ZSTD_32bits())
-					BIT_flushBits(&blockStream); /* (7)*/
-				if (longOffsets) {
-					int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-					if (extraBits) {
-						BIT_addBits(&blockStream, sequences[n].offset, extraBits);
-						BIT_flushBits(&blockStream); /* (7)*/
-					}
-					BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */
-				} else {
-					BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
-				}
-				BIT_flushBits(&blockStream); /* (7)*/
-			}
-		}
-
-		FSE_flushCState(&blockStream, &stateMatchLength);
-		FSE_flushCState(&blockStream, &stateOffsetBits);
-		FSE_flushCState(&blockStream, &stateLitLength);
-
-		{
-			size_t const streamSize = BIT_closeCStream(&blockStream);
-			if (streamSize == 0)
-				return ERROR(dstSize_tooSmall); /* not enough space */
-			op += streamSize;
-		}
-	}
-	return op - ostart;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
-{
-	size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
-	size_t const minGain = ZSTD_minGain(srcSize);
-	size_t const maxCSize = srcSize - minGain;
-	/* If the srcSize <= dstCapacity, then there is enough space to write a
-	 * raw uncompressed block. Since we ran out of space, the block must not
-	 * be compressible, so fall back to a raw uncompressed block.
-	 */
-	int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
-	int i;
-
-	if (ZSTD_isError(cSize) && !uncompressibleError)
-		return cSize;
-	if (cSize >= maxCSize || uncompressibleError) {
-		zc->flagStaticHufTable = HUF_repeat_none;
-		return 0;
-	}
-	/* confirm repcodes */
-	for (i = 0; i < ZSTD_REP_NUM; i++)
-		zc->rep[i] = zc->repToConfirm[i];
-	return cSize;
-}
-
-/*! ZSTD_storeSeq() :
-	Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
-	`offsetCode` : distance to match, or 0 == repCode.
-	`matchCode` : matchLength - MINMATCH
-*/
-ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode)
-{
-	/* copy Literals */
-	ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
-	seqStorePtr->lit += litLength;
-
-	/* literal Length */
-	if (litLength > 0xFFFF) {
-		seqStorePtr->longLengthID = 1;
-		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	}
-	seqStorePtr->sequences[0].litLength = (U16)litLength;
-
-	/* match offset */
-	seqStorePtr->sequences[0].offset = offsetCode + 1;
-
-	/* match Length */
-	if (matchCode > 0xFFFF) {
-		seqStorePtr->longLengthID = 2;
-		seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-	}
-	seqStorePtr->sequences[0].matchLength = (U16)matchCode;
-
-	seqStorePtr->sequences++;
-}
-
-/*-*************************************
-*  Match length counter
-***************************************/
-static unsigned ZSTD_NbCommonBytes(register size_t val)
-{
-	if (ZSTD_isLittleEndian()) {
-		if (ZSTD_64bits()) {
-			return (__builtin_ctzll((U64)val) >> 3);
-		} else { /* 32 bits */
-			return (__builtin_ctz((U32)val) >> 3);
-		}
-	} else { /* Big Endian CPU */
-		if (ZSTD_64bits()) {
-			return (__builtin_clzll(val) >> 3);
-		} else { /* 32 bits */
-			return (__builtin_clz((U32)val) >> 3);
-		}
-	}
-}
-
-static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit)
-{
-	const BYTE *const pStart = pIn;
-	const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);
-
-	while (pIn < pInLoopLimit) {
-		size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn);
-		if (!diff) {
-			pIn += sizeof(size_t);
-			pMatch += sizeof(size_t);
-			continue;
-		}
-		pIn += ZSTD_NbCommonBytes(diff);
-		return (size_t)(pIn - pStart);
-	}
-	if (ZSTD_64bits())
-		if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) {
-			pIn += 4;
-			pMatch += 4;
-		}
-	if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) {
-		pIn += 2;
-		pMatch += 2;
-	}
-	if ((pIn < pInLimit) && (*pMatch == *pIn))
-		pIn++;
-	return (size_t)(pIn - pStart);
-}
-
-/** ZSTD_count_2segments() :
-*   can count match length with `ip` & `match` in 2 different segments.
-*   convention : on reaching mEnd, match count continue starting from iStart
-*/
-static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart)
-{
-	const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd);
-	size_t const matchLength = ZSTD_count(ip, match, vEnd);
-	if (match + matchLength != mEnd)
-		return matchLength;
-	return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd);
-}
-
-/*-*************************************
-*  Hashes
-***************************************/
-static const U32 prime3bytes = 506832829U;
-static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); }
-ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */
-
-static const U32 prime4bytes = 2654435761U;
-static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); }
-static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); }
-
-static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); }
-static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); }
-
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); }
-static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); }
-
-static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); }
-static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); }
-static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); }
-
-static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls)
-{
-	switch (mls) {
-	// case 3: return ZSTD_hash3Ptr(p, hBits);
-	default:
-	case 4: return ZSTD_hash4Ptr(p, hBits);
-	case 5: return ZSTD_hash5Ptr(p, hBits);
-	case 6: return ZSTD_hash6Ptr(p, hBits);
-	case 7: return ZSTD_hash7Ptr(p, hBits);
-	case 8: return ZSTD_hash8Ptr(p, hBits);
-	}
-}
-
-/*-*************************************
-*  Fast Scan
-***************************************/
-static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hBits = zc->params.cParams.hashLog;
-	const BYTE *const base = zc->base;
-	const BYTE *ip = base + zc->nextToUpdate;
-	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-	const size_t fastHashFillStep = 3;
-
-	while (ip <= iend) {
-		hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-		ip += fastHashFillStep;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashTable = cctx->hashTable;
-	U32 const hBits = cctx->params.cParams.hashLog;
-	seqStore_t *seqStorePtr = &(cctx->seqStore);
-	const BYTE *const base = cctx->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = cctx->dictLimit;
-	const BYTE *const lowest = base + lowestIndex;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - HASH_READ_SIZE;
-	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-	U32 offsetSaved = 0;
-
-	/* init */
-	ip += (ip == lowest);
-	{
-		U32 const maxRep = (U32)(ip - lowest);
-		if (offset_2 > maxRep)
-			offsetSaved = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			offsetSaved = offset_1, offset_1 = 0;
-	}
-
-	/* Main Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-		size_t mLength;
-		size_t const h = ZSTD_hashPtr(ip, hBits, mls);
-		U32 const curr = (U32)(ip - base);
-		U32 const matchIndex = hashTable[h];
-		const BYTE *match = base + matchIndex;
-		hashTable[h] = curr; /* update hash table */
-
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) {
-			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			U32 offset;
-			if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-			mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-			offset = (U32)(ip - match);
-			while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-				ip--;
-				match--;
-				mLength++;
-			} /* catch up */
-			offset_2 = offset_1;
-			offset_1 = offset;
-
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-		}
-
-		/* match found */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */
-			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-				/* store sequence */
-				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-				{
-					U32 const tmpOff = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOff;
-				} /* swap offset_2 <=> offset_1 */
-				hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-				ip += rLength;
-				anchor = ip;
-				continue; /* faster when present ... (?) */
-			}
-		}
-	}
-
-	/* save reps for next block */
-	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	const U32 mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *hashTable = ctx->hashTable;
-	const U32 hBits = ctx->params.cParams.hashLog;
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const base = ctx->base;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const dictStart = dictBase + lowestIndex;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const lowPrefixPtr = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-		const size_t h = ZSTD_hashPtr(ip, hBits, mls);
-		const U32 matchIndex = hashTable[h];
-		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-		const BYTE *match = matchBase + matchIndex;
-		const U32 curr = (U32)(ip - base);
-		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-		const BYTE *repMatch = repBase + repIndex;
-		size_t mLength;
-		hashTable[h] = curr; /* update hash table */
-
-		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-			mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-			{
-				const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-				const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-				U32 offset;
-				mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
-				while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-					ip--;
-					match--;
-					mLength++;
-				} /* catch up */
-				offset = curr - matchIndex;
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-			}
-		}
-
-		/* found a match : store it */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2;
-			hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while (ip <= ilimit) {
-				U32 const curr2 = (U32)(ip - base);
-				U32 const repIndex2 = curr2 - offset_2;
-				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-					size_t repLength2 =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-					U32 tmpOffset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-					hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2;
-					ip += repLength2;
-					anchor = ip;
-					continue;
-				}
-				break;
-			}
-		}
-	}
-
-	/* save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	U32 const mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-/*-*************************************
-*  Double Fast
-***************************************/
-static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls)
-{
-	U32 *const hashLarge = cctx->hashTable;
-	U32 const hBitsL = cctx->params.cParams.hashLog;
-	U32 *const hashSmall = cctx->chainTable;
-	U32 const hBitsS = cctx->params.cParams.chainLog;
-	const BYTE *const base = cctx->base;
-	const BYTE *ip = base + cctx->nextToUpdate;
-	const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-	const size_t fastHashFillStep = 3;
-
-	while (ip <= iend) {
-		hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-		hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-		ip += fastHashFillStep;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashLong = cctx->hashTable;
-	const U32 hBitsL = cctx->params.cParams.hashLog;
-	U32 *const hashSmall = cctx->chainTable;
-	const U32 hBitsS = cctx->params.cParams.chainLog;
-	seqStore_t *seqStorePtr = &(cctx->seqStore);
-	const BYTE *const base = cctx->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = cctx->dictLimit;
-	const BYTE *const lowest = base + lowestIndex;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - HASH_READ_SIZE;
-	U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-	U32 offsetSaved = 0;
-
-	/* init */
-	ip += (ip == lowest);
-	{
-		U32 const maxRep = (U32)(ip - lowest);
-		if (offset_2 > maxRep)
-			offsetSaved = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			offsetSaved = offset_1, offset_1 = 0;
-	}
-
-	/* Main Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-		size_t mLength;
-		size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
-		size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
-		U32 const curr = (U32)(ip - base);
-		U32 const matchIndexL = hashLong[h2];
-		U32 const matchIndexS = hashSmall[h];
-		const BYTE *matchLong = base + matchIndexL;
-		const BYTE *match = base + matchIndexS;
-		hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
-
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */
-			mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			U32 offset;
-			if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-				mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8;
-				offset = (U32)(ip - matchLong);
-				while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) {
-					ip--;
-					matchLong--;
-					mLength++;
-				} /* catch up */
-			} else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-				U32 const matchIndex3 = hashLong[h3];
-				const BYTE *match3 = base + matchIndex3;
-				hashLong[h3] = curr + 1;
-				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-					mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8;
-					ip++;
-					offset = (U32)(ip - match3);
-					while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) {
-						ip--;
-						match3--;
-						mLength++;
-					} /* catch up */
-				} else {
-					mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-					offset = (U32)(ip - match);
-					while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-						ip--;
-						match--;
-						mLength++;
-					} /* catch up */
-				}
-			} else {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-
-			offset_2 = offset_1;
-			offset_1 = offset;
-
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-		}
-
-		/* match found */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] =
-			    curr + 2; /* here because curr+2 could be > iend-8 */
-			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-
-			/* check immediate repcode */
-			while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-				/* store sequence */
-				size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-				{
-					U32 const tmpOff = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOff;
-				} /* swap offset_2 <=> offset_1 */
-				hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-				hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-				ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-				ip += rLength;
-				anchor = ip;
-				continue; /* faster when present ... (?) */
-			}
-		}
-	}
-
-	/* save reps for next block */
-	cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-	cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	const U32 mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-	U32 *const hashLong = ctx->hashTable;
-	U32 const hBitsL = ctx->params.cParams.hashLog;
-	U32 *const hashSmall = ctx->chainTable;
-	U32 const hBitsS = ctx->params.cParams.chainLog;
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const base = ctx->base;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const dictStart = dictBase + lowestIndex;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const lowPrefixPtr = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* Search Loop */
-	while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-		const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
-		const U32 matchIndex = hashSmall[hSmall];
-		const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-		const BYTE *match = matchBase + matchIndex;
-
-		const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
-		const U32 matchLongIndex = hashLong[hLong];
-		const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
-		const BYTE *matchLong = matchLongBase + matchLongIndex;
-
-		const U32 curr = (U32)(ip - base);
-		const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-		const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-		const BYTE *repMatch = repBase + repIndex;
-		size_t mLength;
-		hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
-
-		if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-		    (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-			const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-			mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4;
-			ip++;
-			ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-		} else {
-			if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-				const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
-				const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
-				U32 offset;
-				mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8;
-				offset = curr - matchLongIndex;
-				while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) {
-					ip--;
-					matchLong--;
-					mLength++;
-				} /* catch up */
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-			} else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-				size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-				U32 const matchIndex3 = hashLong[h3];
-				const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base;
-				const BYTE *match3 = match3Base + matchIndex3;
-				U32 offset;
-				hashLong[h3] = curr + 1;
-				if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-					const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
-					const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
-					mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8;
-					ip++;
-					offset = curr + 1 - matchIndex3;
-					while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) {
-						ip--;
-						match3--;
-						mLength++;
-					} /* catch up */
-				} else {
-					const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-					const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-					mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4;
-					offset = curr - matchIndex;
-					while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-						ip--;
-						match--;
-						mLength++;
-					} /* catch up */
-				}
-				offset_2 = offset_1;
-				offset_1 = offset;
-				ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-			} else {
-				ip += ((ip - anchor) >> g_searchStrength) + 1;
-				continue;
-			}
-		}
-
-		/* found a match : store it */
-		ip += mLength;
-		anchor = ip;
-
-		if (ip <= ilimit) {
-			/* Fill Table */
-			hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2;
-			hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2;
-			hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-			hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base);
-			/* check immediate repcode */
-			while (ip <= ilimit) {
-				U32 const curr2 = (U32)(ip - base);
-				U32 const repIndex2 = curr2 - offset_2;
-				const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-				if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-					const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-					size_t const repLength2 =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-					U32 tmpOffset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-					hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2;
-					hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2;
-					ip += repLength2;
-					anchor = ip;
-					continue;
-				}
-				break;
-			}
-		}
-	}
-
-	/* save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	U32 const mls = ctx->params.cParams.searchLength;
-	switch (mls) {
-	default: /* includes case 3 */
-	case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
-	case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
-	case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
-	case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
-	}
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
-*   ip : assumed <= iend-8 .
-*   @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hashLog = zc->params.cParams.hashLog;
-	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const bt = zc->chainTable;
-	U32 const btLog = zc->params.cParams.chainLog - 1;
-	U32 const btMask = (1 << btLog) - 1;
-	U32 matchIndex = hashTable[h];
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *match;
-	const U32 curr = (U32)(ip - base);
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = smallerPtr + 1;
-	U32 dummy32; /* to be nullified at the end */
-	U32 const windowLow = zc->lowLimit;
-	U32 matchEndIdx = curr + 8;
-	size_t bestLength = 8;
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength])
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			bestLength = matchLength;
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-		}
-
-		if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-			break;		      /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
-
-		if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-	if (bestLength > 384)
-		return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
-	if (matchEndIdx > curr + 8)
-		return matchEndIdx - curr - 8;
-	return 1;
-}
-
-static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls,
-					    U32 extDict)
-{
-	U32 *const hashTable = zc->hashTable;
-	U32 const hashLog = zc->params.cParams.hashLog;
-	size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const bt = zc->chainTable;
-	U32 const btLog = zc->params.cParams.chainLog - 1;
-	U32 const btMask = (1 << btLog) - 1;
-	U32 matchIndex = hashTable[h];
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const U32 curr = (U32)(ip - base);
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	const U32 windowLow = zc->lowLimit;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-	U32 matchEndIdx = curr + 8;
-	U32 dummy32; /* to be nullified at the end */
-	size_t bestLength = 0;
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-		const BYTE *match;
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength])
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-			if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1)))
-				bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
-			if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-				break;		      /* drop, to guarantee consistency (miss a little bit of compression) */
-		}
-
-		if (match[matchLength] < ip[matchLength]) {
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-
-	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-	return bestLength;
-}
-
-static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target)
-		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0);
-}
-
-/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-					     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-	case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-	case 7:
-	case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-	}
-}
-
-static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target)
-		idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1);
-}
-
-/** Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-					   const U32 mls)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-						     const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-						     const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-	case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-	case 7:
-	case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-	}
-}
-
-/* *********************************
-*  Hash Chain
-***********************************/
-#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls)
-{
-	U32 *const hashTable = zc->hashTable;
-	const U32 hashLog = zc->params.cParams.hashLog;
-	U32 *const chainTable = zc->chainTable;
-	const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
-	const BYTE *const base = zc->base;
-	const U32 target = (U32)(ip - base);
-	U32 idx = zc->nextToUpdate;
-
-	while (idx < target) { /* catch up */
-		size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls);
-		NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-		hashTable[h] = idx;
-		idx++;
-	}
-
-	zc->nextToUpdate = target;
-	return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-/* inlining is important to hardwire a hot branch (template emulation) */
-FORCE_INLINE
-size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */
-				    const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls,
-				    const U32 extDict)
-{
-	U32 *const chainTable = zc->chainTable;
-	const U32 chainSize = (1 << zc->params.cParams.chainLog);
-	const U32 chainMask = chainSize - 1;
-	const BYTE *const base = zc->base;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const U32 lowLimit = zc->lowLimit;
-	const U32 curr = (U32)(ip - base);
-	const U32 minChain = curr > chainSize ? curr - chainSize : 0;
-	int nbAttempts = maxNbAttempts;
-	size_t ml = EQUAL_READ32 - 1;
-
-	/* HC4 match finder */
-	U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls);
-
-	for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) {
-		const BYTE *match;
-		size_t currMl = 0;
-		if ((!extDict) || matchIndex >= dictLimit) {
-			match = base + matchIndex;
-			if (match[ml] == ip[ml]) /* potentially better */
-				currMl = ZSTD_count(ip, match, iLimit);
-		} else {
-			match = dictBase + matchIndex;
-			if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-				currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
-		}
-
-		/* save best solution */
-		if (currMl > ml) {
-			ml = currMl;
-			*offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
-			if (ip + currMl == iLimit)
-				break; /* best possible, and avoid read overflow*/
-		}
-
-		if (matchIndex <= minChain)
-			break;
-		matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-	}
-
-	return ml;
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-						   const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
-	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
-	case 7:
-	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
-	}
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-							   const U32 matchLengthSearch)
-{
-	switch (matchLengthSearch) {
-	default: /* includes case 3 */
-	case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
-	case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
-	case 7:
-	case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
-	}
-}
-
-/* *******************************
-*  Common parser - lazy strategy
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base + ctx->dictLimit;
-
-	U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
-	U32 const mls = ctx->params.cParams.searchLength;
-
-	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-	searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0;
-
-	/* init */
-	ip += (ip == base);
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	{
-		U32 const maxRep = (U32)(ip - base);
-		if (offset_2 > maxRep)
-			savedOffset = offset_2, offset_2 = 0;
-		if (offset_1 > maxRep)
-			savedOffset = offset_1, offset_1 = 0;
-	}
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		size_t matchLength = 0;
-		size_t offset = 0;
-		const BYTE *start = ip + 1;
-
-		/* check repCode */
-		if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) {
-			/* repcode : we take it */
-			matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-			if (depth == 0)
-				goto _storeSequence;
-		}
-
-		/* first search (depth 0) */
-		{
-			size_t offsetFound = 99999999;
-			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-			if (ml2 > matchLength)
-				matchLength = ml2, start = ip, offset = offsetFound;
-		}
-
-		if (matchLength < EQUAL_READ32) {
-			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-			continue;
-		}
-
-		/* let's try to find a better solution */
-		if (depth >= 1)
-			while (ip < ilimit) {
-				ip++;
-				if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-					size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-					int const gain2 = (int)(mlRep * 3);
-					int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-					if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
-						matchLength = mlRep, offset = 0, start = ip;
-				}
-				{
-					size_t offset2 = 99999999;
-					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-						matchLength = ml2, offset = offset2, start = ip;
-						continue; /* search a better one */
-					}
-				}
-
-				/* let's find an even better one */
-				if ((depth == 2) && (ip < ilimit)) {
-					ip++;
-					if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-						size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-						int const gain2 = (int)(ml2 * 4);
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
-							matchLength = ml2, offset = 0, start = ip;
-					}
-					{
-						size_t offset2 = 99999999;
-						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-							matchLength = ml2, offset = offset2, start = ip;
-							continue;
-						}
-					}
-				}
-				break; /* nothing found : store previous solution */
-			}
-
-		/* NOTE:
-		 * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
-		 * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
-		 * overflows the pointer, which is undefined behavior.
-		 */
-		/* catch up */
-		if (offset) {
-			while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) &&
-			       (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */
-			{
-				start--;
-				matchLength++;
-			}
-			offset_2 = offset_1;
-			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-		}
-
-	/* store sequence */
-_storeSequence:
-		{
-			size_t const litLength = start - anchor;
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-			anchor = ip = start + matchLength;
-		}
-
-		/* check immediate repcode */
-		while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-			/* store sequence */
-			matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32;
-			offset = offset_2;
-			offset_2 = offset_1;
-			offset_1 = (U32)offset; /* swap repcodes */
-			ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-			ip += matchLength;
-			anchor = ip;
-			continue; /* faster when present ... (?) */
-		}
-	}
-
-	/* Save reps for next block */
-	ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
-	ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); }
-
-static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); }
-
-static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
-
-static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); }
-
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const U32 dictLimit = ctx->dictLimit;
-	const U32 lowestIndex = ctx->lowLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const dictStart = dictBase + ctx->lowLimit;
-
-	const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
-	const U32 mls = ctx->params.cParams.searchLength;
-
-	typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-	searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
-	U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-	/* init */
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ip += (ip == prefixStart);
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		size_t matchLength = 0;
-		size_t offset = 0;
-		const BYTE *start = ip + 1;
-		U32 curr = (U32)(ip - base);
-
-		/* check repCode */
-		{
-			const U32 repIndex = (U32)(curr + 1 - offset_1);
-			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-			const BYTE *const repMatch = repBase + repIndex;
-			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					matchLength =
-					    ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-					if (depth == 0)
-						goto _storeSequence;
-				}
-		}
-
-		/* first search (depth 0) */
-		{
-			size_t offsetFound = 99999999;
-			size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-			if (ml2 > matchLength)
-				matchLength = ml2, start = ip, offset = offsetFound;
-		}
-
-		if (matchLength < EQUAL_READ32) {
-			ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-			continue;
-		}
-
-		/* let's try to find a better solution */
-		if (depth >= 1)
-			while (ip < ilimit) {
-				ip++;
-				curr++;
-				/* check repCode */
-				if (offset) {
-					const U32 repIndex = (U32)(curr - offset_1);
-					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-					const BYTE *const repMatch = repBase + repIndex;
-					if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-						if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-							/* repcode detected */
-							const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-							size_t const repLength =
-							    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) +
-							    EQUAL_READ32;
-							int const gain2 = (int)(repLength * 3);
-							int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-							if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-								matchLength = repLength, offset = 0, start = ip;
-						}
-				}
-
-				/* search match, depth 1 */
-				{
-					size_t offset2 = 99999999;
-					size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-					int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-					int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-					if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-						matchLength = ml2, offset = offset2, start = ip;
-						continue; /* search a better one */
-					}
-				}
-
-				/* let's find an even better one */
-				if ((depth == 2) && (ip < ilimit)) {
-					ip++;
-					curr++;
-					/* check repCode */
-					if (offset) {
-						const U32 repIndex = (U32)(curr - offset_1);
-						const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-						const BYTE *const repMatch = repBase + repIndex;
-						if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-							if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-								/* repcode detected */
-								const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-								size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend,
-													repEnd, prefixStart) +
-										   EQUAL_READ32;
-								int gain2 = (int)(repLength * 4);
-								int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-								if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-									matchLength = repLength, offset = 0, start = ip;
-							}
-					}
-
-					/* search match, depth 2 */
-					{
-						size_t offset2 = 99999999;
-						size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-						int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-						int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-						if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-							matchLength = ml2, offset = offset2, start = ip;
-							continue;
-						}
-					}
-				}
-				break; /* nothing found : store previous solution */
-			}
-
-		/* catch up */
-		if (offset) {
-			U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE));
-			const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
-			const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
-			while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) {
-				start--;
-				match--;
-				matchLength++;
-			} /* catch up */
-			offset_2 = offset_1;
-			offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-		}
-
-	/* store sequence */
-	_storeSequence : {
-		size_t const litLength = start - anchor;
-		ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-		anchor = ip = start + matchLength;
-	}
-
-		/* check immediate repcode */
-		while (ip <= ilimit) {
-			const U32 repIndex = (U32)((ip - base) - offset_2);
-			const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-			const BYTE *const repMatch = repBase + repIndex;
-			if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					matchLength =
-					    ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-					offset = offset_2;
-					offset_2 = offset_1;
-					offset_1 = (U32)offset; /* swap offset history */
-					ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-					ip += matchLength;
-					anchor = ip;
-					continue; /* faster when present ... (?) */
-				}
-			break;
-		}
-	}
-
-	/* Save reps for next block */
-	ctx->repToConfirm[0] = offset_1;
-	ctx->repToConfirm[1] = offset_2;
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); }
-
-static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
-}
-
-static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
-}
-
-static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-	ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
-}
-
-/* The optimal parser */
-#include "zstd_opt.h"
-
-static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-	ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
-#else
-	(void)ctx;
-	(void)src;
-	(void)srcSize;
-	return;
-#endif
-}
-
-typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize);
-
-static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
-{
-	static const ZSTD_blockCompressor blockCompressor[2][8] = {
-	    {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2,
-	     ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2},
-	    {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,
-	     ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}};
-
-	return blockCompressor[extDict][(U32)strat];
-}
-
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
-	const BYTE *const base = zc->base;
-	const BYTE *const istart = (const BYTE *)src;
-	const U32 curr = (U32)(istart - base);
-	if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1)
-		return 0; /* don't even attempt compression below a certain srcSize */
-	ZSTD_resetSeqStore(&(zc->seqStore));
-	if (curr > zc->nextToUpdate + 384)
-		zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
-	blockCompressor(zc, src, srcSize);
-	return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
-}
-
-/*! ZSTD_compress_generic() :
-*   Compress a chunk of data into one or multiple blocks.
-*   All blocks will be terminated, all input will be consumed.
-*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
-*   Frame is supposed already started (header already produced)
-*   @return : compressed size, or an error code
-*/
-static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk)
-{
-	size_t blockSize = cctx->blockSize;
-	size_t remaining = srcSize;
-	const BYTE *ip = (const BYTE *)src;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	U32 const maxDist = 1 << cctx->params.cParams.windowLog;
-
-	if (cctx->params.fParams.checksumFlag && srcSize)
-		xxh64_update(&cctx->xxhState, src, srcSize);
-
-	while (remaining) {
-		U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
-		size_t cSize;
-
-		if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
-			return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
-		if (remaining < blockSize)
-			blockSize = remaining;
-
-		/* preemptive overflow correction */
-		if (cctx->lowLimit > (3U << 29)) {
-			U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
-			U32 const curr = (U32)(ip - cctx->base);
-			U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog);
-			U32 const correction = curr - newCurr;
-			ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
-			ZSTD_reduceIndex(cctx, correction);
-			cctx->base += correction;
-			cctx->dictBase += correction;
-			cctx->lowLimit -= correction;
-			cctx->dictLimit -= correction;
-			if (cctx->nextToUpdate < correction)
-				cctx->nextToUpdate = 0;
-			else
-				cctx->nextToUpdate -= correction;
-		}
-
-		if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
-			/* enforce maxDist */
-			U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist;
-			if (cctx->lowLimit < newLowLimit)
-				cctx->lowLimit = newLowLimit;
-			if (cctx->dictLimit < cctx->lowLimit)
-				cctx->dictLimit = cctx->lowLimit;
-		}
-
-		cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-
-		if (cSize == 0) { /* block is not compressible */
-			U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3);
-			if (blockSize + ZSTD_blockHeaderSize > dstCapacity)
-				return ERROR(dstSize_tooSmall);
-			ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
-			memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
-			cSize = ZSTD_blockHeaderSize + blockSize;
-		} else {
-			U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3);
-			ZSTD_writeLE24(op, cBlockHeader24);
-			cSize += ZSTD_blockHeaderSize;
-		}
-
-		remaining -= blockSize;
-		dstCapacity -= cSize;
-		ip += blockSize;
-		op += cSize;
-	}
-
-	if (lastFrameChunk && (op > ostart))
-		cctx->stage = ZSTDcs_ending;
-	return op - ostart;
-}
-
-static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
-{
-	BYTE *const op = (BYTE *)dst;
-	U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */
-	U32 const checksumFlag = params.fParams.checksumFlag > 0;
-	U32 const windowSize = 1U << params.cParams.windowLog;
-	U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
-	BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
-	U32 const fcsCode =
-	    params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */
-	BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6));
-	size_t pos;
-
-	if (dstCapacity < ZSTD_frameHeaderSize_max)
-		return ERROR(dstSize_tooSmall);
-
-	ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER);
-	op[4] = frameHeaderDecriptionByte;
-	pos = 5;
-	if (!singleSegment)
-		op[pos++] = windowLogByte;
-	switch (dictIDSizeCode) {
-	default: /* impossible */
-	case 0: break;
-	case 1:
-		op[pos] = (BYTE)(dictID);
-		pos++;
-		break;
-	case 2:
-		ZSTD_writeLE16(op + pos, (U16)dictID);
-		pos += 2;
-		break;
-	case 3:
-		ZSTD_writeLE32(op + pos, dictID);
-		pos += 4;
-		break;
-	}
-	switch (fcsCode) {
-	default: /* impossible */
-	case 0:
-		if (singleSegment)
-			op[pos++] = (BYTE)(pledgedSrcSize);
-		break;
-	case 1:
-		ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256));
-		pos += 2;
-		break;
-	case 2:
-		ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize));
-		pos += 4;
-		break;
-	case 3:
-		ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize));
-		pos += 8;
-		break;
-	}
-	return pos;
-}
-
-static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk)
-{
-	const BYTE *const ip = (const BYTE *)src;
-	size_t fhSize = 0;
-
-	if (cctx->stage == ZSTDcs_created)
-		return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
-
-	if (frame && (cctx->stage == ZSTDcs_init)) {
-		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
-		if (ZSTD_isError(fhSize))
-			return fhSize;
-		dstCapacity -= fhSize;
-		dst = (char *)dst + fhSize;
-		cctx->stage = ZSTDcs_ongoing;
-	}
-
-	/* Check if blocks follow each other */
-	if (src != cctx->nextSrc) {
-		/* not contiguous */
-		ptrdiff_t const delta = cctx->nextSrc - ip;
-		cctx->lowLimit = cctx->dictLimit;
-		cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
-		cctx->dictBase = cctx->base;
-		cctx->base -= delta;
-		cctx->nextToUpdate = cctx->dictLimit;
-		if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE)
-			cctx->lowLimit = cctx->dictLimit; /* too small extDict */
-	}
-
-	/* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
-	if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
-		ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
-		U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
-		cctx->lowLimit = lowLimitMax;
-	}
-
-	cctx->nextSrc = ip + srcSize;
-
-	if (srcSize) {
-		size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk)
-					   : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize);
-		if (ZSTD_isError(cSize))
-			return cSize;
-		return cSize + fhSize;
-	} else
-		return fhSize;
-}
-
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
-}
-
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); }
-
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
-	if (srcSize > blockSizeMax)
-		return ERROR(srcSize_wrong);
-	return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
-}
-
-/*! ZSTD_loadDictionaryContent() :
- *  @return : 0, or an error code
- */
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize)
-{
-	const BYTE *const ip = (const BYTE *)src;
-	const BYTE *const iend = ip + srcSize;
-
-	/* input becomes curr prefix */
-	zc->lowLimit = zc->dictLimit;
-	zc->dictLimit = (U32)(zc->nextSrc - zc->base);
-	zc->dictBase = zc->base;
-	zc->base += ip - zc->nextSrc;
-	zc->nextToUpdate = zc->dictLimit;
-	zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
-
-	zc->nextSrc = iend;
-	if (srcSize <= HASH_READ_SIZE)
-		return 0;
-
-	switch (zc->params.cParams.strategy) {
-	case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-	case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-	case ZSTD_greedy:
-	case ZSTD_lazy:
-	case ZSTD_lazy2:
-		if (srcSize >= HASH_READ_SIZE)
-			ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength);
-		break;
-
-	case ZSTD_btlazy2:
-	case ZSTD_btopt:
-	case ZSTD_btopt2:
-		if (srcSize >= HASH_READ_SIZE)
-			ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
-		break;
-
-	default:
-		return ERROR(GENERIC); /* strategy doesn't exist; impossible */
-	}
-
-	zc->nextToUpdate = (U32)(iend - zc->base);
-	return 0;
-}
-
-/* Dictionaries that assign zero probability to symbols that show up causes problems
-   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
-   that we may encounter during compression.
-   NOTE: This behavior is not standard and could be improved in the future. */
-static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
-{
-	U32 s;
-	if (dictMaxSymbolValue < maxSymbolValue)
-		return ERROR(dictionary_corrupted);
-	for (s = 0; s <= maxSymbolValue; ++s) {
-		if (normalizedCounter[s] == 0)
-			return ERROR(dictionary_corrupted);
-	}
-	return 0;
-}
-
-/* Dictionary format :
- * See :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
- */
-/*! ZSTD_loadZstdDictionary() :
- * @return : 0, or an error code
- *  assumptions : magic number supposed already checked
- *                dictSize supposed > 8
- */
-static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-	const BYTE *dictPtr = (const BYTE *)dict;
-	const BYTE *const dictEnd = dictPtr + dictSize;
-	short offcodeNCount[MaxOff + 1];
-	unsigned offcodeMaxValue = MaxOff;
-
-	dictPtr += 4; /* skip magic number */
-	cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr);
-	dictPtr += 4;
-
-	{
-		size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters));
-		if (HUF_isError(hufHeaderSize))
-			return ERROR(dictionary_corrupted);
-		dictPtr += hufHeaderSize;
-	}
-
-	{
-		unsigned offcodeLog;
-		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(offcodeHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (offcodeLog > OffFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
-		CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-			dictionary_corrupted);
-		dictPtr += offcodeHeaderSize;
-	}
-
-	{
-		short matchlengthNCount[MaxML + 1];
-		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(matchlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (matchlengthLog > MLFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Every match length code must have non-zero probability */
-		CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
-		CHECK_E(
-		    FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-		    dictionary_corrupted);
-		dictPtr += matchlengthHeaderSize;
-	}
-
-	{
-		short litlengthNCount[MaxLL + 1];
-		unsigned litlengthMaxValue = MaxLL, litlengthLog;
-		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(litlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (litlengthLog > LLFSELog)
-			return ERROR(dictionary_corrupted);
-		/* Every literal length code must have non-zero probability */
-		CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
-		CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-			dictionary_corrupted);
-		dictPtr += litlengthHeaderSize;
-	}
-
-	if (dictPtr + 12 > dictEnd)
-		return ERROR(dictionary_corrupted);
-	cctx->rep[0] = ZSTD_readLE32(dictPtr + 0);
-	cctx->rep[1] = ZSTD_readLE32(dictPtr + 4);
-	cctx->rep[2] = ZSTD_readLE32(dictPtr + 8);
-	dictPtr += 12;
-
-	{
-		size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-		U32 offcodeMax = MaxOff;
-		if (dictContentSize <= ((U32)-1) - 128 KB) {
-			U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
-			offcodeMax = ZSTD_highbit32(maxOffset);		     /* Calculate minimum offset code required to represent maxOffset */
-		}
-		/* All offset values <= dictContentSize + 128 KB must be representable */
-		CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
-		/* All repCodes must be <= dictContentSize and != 0*/
-		{
-			U32 u;
-			for (u = 0; u < 3; u++) {
-				if (cctx->rep[u] == 0)
-					return ERROR(dictionary_corrupted);
-				if (cctx->rep[u] > dictContentSize)
-					return ERROR(dictionary_corrupted);
-			}
-		}
-
-		cctx->flagStaticTables = 1;
-		cctx->flagStaticHufTable = HUF_repeat_valid;
-		return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
-	}
-}
-
-/** ZSTD_compress_insertDictionary() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-	if ((dict == NULL) || (dictSize <= 8))
-		return 0;
-
-	/* dict as pure content */
-	if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
-		return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
-
-	/* dict as zstd dictionary */
-	return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_internal() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize)
-{
-	ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
-	CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
-	return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_advanced() :
-*   @return : 0, or an error code */
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-	/* compression parameters verification and optimization */
-	CHECK_F(ZSTD_checkCParams(params.cParams));
-	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
-}
-
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel)
-{
-	ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-	return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
-}
-
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); }
-
-/*! ZSTD_writeEpilogue() :
-*   Ends a frame.
-*   @return : nb of bytes written into dst (or an error code) */
-static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	size_t fhSize = 0;
-
-	if (cctx->stage == ZSTDcs_created)
-		return ERROR(stage_wrong); /* init missing */
-
-	/* special case : empty frame */
-	if (cctx->stage == ZSTDcs_init) {
-		fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
-		if (ZSTD_isError(fhSize))
-			return fhSize;
-		dstCapacity -= fhSize;
-		op += fhSize;
-		cctx->stage = ZSTDcs_ongoing;
-	}
-
-	if (cctx->stage != ZSTDcs_ending) {
-		/* write one last empty block, make it the "last" block */
-		U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0;
-		if (dstCapacity < 4)
-			return ERROR(dstSize_tooSmall);
-		ZSTD_writeLE32(op, cBlockHeader24);
-		op += ZSTD_blockHeaderSize;
-		dstCapacity -= ZSTD_blockHeaderSize;
-	}
-
-	if (cctx->params.fParams.checksumFlag) {
-		U32 const checksum = (U32)xxh64_digest(&cctx->xxhState);
-		if (dstCapacity < 4)
-			return ERROR(dstSize_tooSmall);
-		ZSTD_writeLE32(op, checksum);
-		op += 4;
-	}
-
-	cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
-	return op - ostart;
-}
-
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t endResult;
-	size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
-	if (ZSTD_isError(cSize))
-		return cSize;
-	endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize);
-	if (ZSTD_isError(endResult))
-		return endResult;
-	return cSize + endResult;
-}
-
-static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-				     ZSTD_parameters params)
-{
-	CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
-	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-			       ZSTD_parameters params)
-{
-	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
-}
-
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params)
-{
-	return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
-}
-
-/* =====  Dictionary API  ===== */
-
-struct ZSTD_CDict_s {
-	void *dictBuffer;
-	const void *dictContent;
-	size_t dictContentSize;
-	ZSTD_CCtx *refContext;
-}; /* typedef'd tp ZSTD_CDict within "zstd.h" */
-
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); }
-
-static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem)
-{
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	{
-		ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-		ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem);
-
-		if (!cdict || !cctx) {
-			ZSTD_free(cdict, customMem);
-			ZSTD_freeCCtx(cctx);
-			return NULL;
-		}
-
-		if ((byReference) || (!dictBuffer) || (!dictSize)) {
-			cdict->dictBuffer = NULL;
-			cdict->dictContent = dictBuffer;
-		} else {
-			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-			if (!internalBuffer) {
-				ZSTD_free(cctx, customMem);
-				ZSTD_free(cdict, customMem);
-				return NULL;
-			}
-			memcpy(internalBuffer, dictBuffer, dictSize);
-			cdict->dictBuffer = internalBuffer;
-			cdict->dictContent = internalBuffer;
-		}
-
-		{
-			size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
-			if (ZSTD_isError(errorCode)) {
-				ZSTD_free(cdict->dictBuffer, customMem);
-				ZSTD_free(cdict, customMem);
-				ZSTD_freeCCtx(cctx);
-				return NULL;
-			}
-		}
-
-		cdict->refContext = cctx;
-		cdict->dictContentSize = dictSize;
-		return cdict;
-	}
-}
-
-ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem);
-}
-
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict)
-{
-	if (cdict == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = cdict->refContext->customMem;
-		ZSTD_freeCCtx(cdict->refContext);
-		ZSTD_free(cdict->dictBuffer, cMem);
-		ZSTD_free(cdict, cMem);
-		return 0;
-	}
-}
-
-static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); }
-
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize)
-{
-	if (cdict->dictContentSize)
-		CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
-	else {
-		ZSTD_parameters params = cdict->refContext->params;
-		params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-		CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
-	}
-	return 0;
-}
-
-/*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict)
-{
-	CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
-	if (cdict->refContext->params.fParams.contentSizeFlag == 1) {
-		cctx->params.fParams.contentSizeFlag = 1;
-		cctx->frameContentSize = srcSize;
-	} else {
-		cctx->params.fParams.contentSizeFlag = 0;
-	}
-
-	return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-/* ******************************************************************
-*  Streaming
-********************************************************************/
-
-typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
-
-struct ZSTD_CStream_s {
-	ZSTD_CCtx *cctx;
-	ZSTD_CDict *cdictLocal;
-	const ZSTD_CDict *cdict;
-	char *inBuff;
-	size_t inBuffSize;
-	size_t inToCompress;
-	size_t inBuffPos;
-	size_t inBuffTarget;
-	size_t blockSize;
-	char *outBuff;
-	size_t outBuffSize;
-	size_t outBuffContentSize;
-	size_t outBuffFlushedSize;
-	ZSTD_cStreamStage stage;
-	U32 checksum;
-	U32 frameEnded;
-	U64 pledgedSrcSize;
-	U64 inputProcessed;
-	ZSTD_parameters params;
-	ZSTD_customMem customMem;
-}; /* typedef'd to ZSTD_CStream within "zstd.h" */
-
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-	size_t const inBuffSize = (size_t)1 << cParams.windowLog;
-	size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize);
-	size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
-
-	return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_CStream *zcs;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
-	if (zcs == NULL)
-		return NULL;
-	memset(zcs, 0, sizeof(ZSTD_CStream));
-	memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
-	zcs->cctx = ZSTD_createCCtx_advanced(customMem);
-	if (zcs->cctx == NULL) {
-		ZSTD_freeCStream(zcs);
-		return NULL;
-	}
-	return zcs;
-}
-
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs)
-{
-	if (zcs == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = zcs->customMem;
-		ZSTD_freeCCtx(zcs->cctx);
-		zcs->cctx = NULL;
-		ZSTD_freeCDict(zcs->cdictLocal);
-		zcs->cdictLocal = NULL;
-		ZSTD_free(zcs->inBuff, cMem);
-		zcs->inBuff = NULL;
-		ZSTD_free(zcs->outBuff, cMem);
-		zcs->outBuff = NULL;
-		ZSTD_free(zcs, cMem);
-		return 0;
-	}
-}
-
-/*======   Initialization   ======*/
-
-size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; }
-
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-	if (zcs->inBuffSize == 0)
-		return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
-
-	if (zcs->cdict)
-		CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
-	else
-		CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
-
-	zcs->inToCompress = 0;
-	zcs->inBuffPos = 0;
-	zcs->inBuffTarget = zcs->blockSize;
-	zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-	zcs->stage = zcss_load;
-	zcs->frameEnded = 0;
-	zcs->pledgedSrcSize = pledgedSrcSize;
-	zcs->inputProcessed = 0;
-	return 0; /* ready to go */
-}
-
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-
-	zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-
-	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-	/* allocate buffers */
-	{
-		size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-		if (zcs->inBuffSize < neededInBuffSize) {
-			zcs->inBuffSize = neededInBuffSize;
-			ZSTD_free(zcs->inBuff, zcs->customMem);
-			zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem);
-			if (zcs->inBuff == NULL)
-				return ERROR(memory_allocation);
-		}
-		zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-	}
-	if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) {
-		zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1;
-		ZSTD_free(zcs->outBuff, zcs->customMem);
-		zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
-		if (zcs->outBuff == NULL)
-			return ERROR(memory_allocation);
-	}
-
-	if (dict && dictSize >= 8) {
-		ZSTD_freeCDict(zcs->cdictLocal);
-		zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
-		if (zcs->cdictLocal == NULL)
-			return ERROR(memory_allocation);
-		zcs->cdict = zcs->cdictLocal;
-	} else
-		zcs->cdict = NULL;
-
-	zcs->checksum = params.fParams.checksumFlag > 0;
-	zcs->params = params;
-
-	return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem);
-	if (zcs) {
-		size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
-		if (ZSTD_isError(code)) {
-			return NULL;
-		}
-	}
-	return zcs;
-}
-
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
-	ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize);
-	if (zcs) {
-		zcs->cdict = cdict;
-		if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) {
-			return NULL;
-		}
-	}
-	return zcs;
-}
-
-/*======   Compression   ======*/
-
-typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const length = MIN(dstCapacity, srcSize);
-	memcpy(dst, src, length);
-	return length;
-}
-
-static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush)
-{
-	U32 someMoreWork = 1;
-	const char *const istart = (const char *)src;
-	const char *const iend = istart + *srcSizePtr;
-	const char *ip = istart;
-	char *const ostart = (char *)dst;
-	char *const oend = ostart + *dstCapacityPtr;
-	char *op = ostart;
-
-	while (someMoreWork) {
-		switch (zcs->stage) {
-		case zcss_init:
-			return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
-
-		case zcss_load:
-			/* complete inBuffer */
-			{
-				size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-				size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip);
-				zcs->inBuffPos += loaded;
-				ip += loaded;
-				if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) {
-					someMoreWork = 0;
-					break; /* not enough input to get a full block : stop there, wait for more */
-				}
-			}
-			/* compress curr block (note : this stage cannot be stopped in the middle) */
-			{
-				void *cDst;
-				size_t cSize;
-				size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
-				size_t oSize = oend - op;
-				if (oSize >= ZSTD_compressBound(iSize))
-					cDst = op; /* compress directly into output buffer (avoid flush stage) */
-				else
-					cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-				cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize)
-							   : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
-				if (ZSTD_isError(cSize))
-					return cSize;
-				if (flush == zsf_end)
-					zcs->frameEnded = 1;
-				/* prepare next block */
-				zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
-				if (zcs->inBuffTarget > zcs->inBuffSize)
-					zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
-				zcs->inToCompress = zcs->inBuffPos;
-				if (cDst == op) {
-					op += cSize;
-					break;
-				} /* no need to flush */
-				zcs->outBuffContentSize = cSize;
-				zcs->outBuffFlushedSize = 0;
-				zcs->stage = zcss_flush; /* pass-through to flush stage */
-			}
-			fallthrough;
-
-		case zcss_flush: {
-			size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-			size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-			op += flushed;
-			zcs->outBuffFlushedSize += flushed;
-			if (toFlush != flushed) {
-				someMoreWork = 0;
-				break;
-			} /* dst too small to store flushed data : stop there */
-			zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-			zcs->stage = zcss_load;
-			break;
-		}
-
-		case zcss_final:
-			someMoreWork = 0; /* do nothing */
-			break;
-
-		default:
-			return ERROR(GENERIC); /* impossible */
-		}
-	}
-
-	*srcSizePtr = ip - istart;
-	*dstCapacityPtr = op - ostart;
-	zcs->inputProcessed += *srcSizePtr;
-	if (zcs->frameEnded)
-		return 0;
-	{
-		size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
-		if (hintInSize == 0)
-			hintInSize = zcs->blockSize;
-		return hintInSize;
-	}
-}
-
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-	size_t sizeRead = input->size - input->pos;
-	size_t sizeWritten = output->size - output->pos;
-	size_t const result =
-	    ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather);
-	input->pos += sizeRead;
-	output->pos += sizeWritten;
-	return result;
-}
-
-/*======   Finalize   ======*/
-
-/*! ZSTD_flushStream() :
-*   @return : amount of data remaining to flush */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-	size_t srcSize = 0;
-	size_t sizeWritten = output->size - output->pos;
-	size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize,
-							  &srcSize, /* use a valid src address instead of NULL */
-							  zsf_flush);
-	output->pos += sizeWritten;
-	if (ZSTD_isError(result))
-		return result;
-	return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
-}
-
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-	BYTE *const ostart = (BYTE *)(output->dst) + output->pos;
-	BYTE *const oend = (BYTE *)(output->dst) + output->size;
-	BYTE *op = ostart;
-
-	if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
-		return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
-
-	if (zcs->stage != zcss_final) {
-		/* flush whatever remains */
-		size_t srcSize = 0;
-		size_t sizeWritten = output->size - output->pos;
-		size_t const notEnded =
-		    ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
-		size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-		op += sizeWritten;
-		if (remainingToFlush) {
-			output->pos += sizeWritten;
-			return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
-		}
-		/* create epilogue */
-		zcs->stage = zcss_final;
-		zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL,
-									   0); /* write epilogue, including final empty block, into outBuff */
-	}
-
-	/* flush epilogue */
-	{
-		size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-		size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-		op += flushed;
-		zcs->outBuffFlushedSize += flushed;
-		output->pos += op - ostart;
-		if (toFlush == flushed)
-			zcs->stage = zcss_init; /* end reached */
-		return toFlush - flushed;
-	}
-}
-
-/*-=====  Pre-defined compression levels  =====-*/
-
-#define ZSTD_DEFAULT_CLEVEL 1
-#define ZSTD_MAX_CLEVEL 22
-int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
-
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = {
-    {
-	/* "default" */
-	/* W,  C,  H,  S,  L, TL, strat */
-	{18, 12, 12, 1, 7, 16, ZSTD_fast},    /* level  0 - never used */
-	{19, 13, 14, 1, 7, 16, ZSTD_fast},    /* level  1 */
-	{19, 15, 16, 1, 6, 16, ZSTD_fast},    /* level  2 */
-	{20, 16, 17, 1, 5, 16, ZSTD_dfast},   /* level  3.*/
-	{20, 18, 18, 1, 5, 16, ZSTD_dfast},   /* level  4.*/
-	{20, 15, 18, 3, 5, 16, ZSTD_greedy},  /* level  5 */
-	{21, 16, 19, 2, 5, 16, ZSTD_lazy},    /* level  6 */
-	{21, 17, 20, 3, 5, 16, ZSTD_lazy},    /* level  7 */
-	{21, 18, 20, 3, 5, 16, ZSTD_lazy2},   /* level  8 */
-	{21, 20, 20, 3, 5, 16, ZSTD_lazy2},   /* level  9 */
-	{21, 19, 21, 4, 5, 16, ZSTD_lazy2},   /* level 10 */
-	{22, 20, 22, 4, 5, 16, ZSTD_lazy2},   /* level 11 */
-	{22, 20, 22, 5, 5, 16, ZSTD_lazy2},   /* level 12 */
-	{22, 21, 22, 5, 5, 16, ZSTD_lazy2},   /* level 13 */
-	{22, 21, 22, 6, 5, 16, ZSTD_lazy2},   /* level 14 */
-	{22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */
-	{23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */
-	{23, 21, 22, 4, 5, 24, ZSTD_btopt},   /* level 17 */
-	{23, 23, 22, 6, 5, 32, ZSTD_btopt},   /* level 18 */
-	{23, 23, 22, 6, 3, 48, ZSTD_btopt},   /* level 19 */
-	{25, 25, 23, 7, 3, 64, ZSTD_btopt2},  /* level 20 */
-	{26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */
-	{27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */
-    },
-    {
-	/* for srcSize <= 256 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{0, 0, 0, 0, 0, 0, ZSTD_fast},	 /* level  0 - not used */
-	{18, 13, 14, 1, 6, 8, ZSTD_fast},      /* level  1 */
-	{18, 14, 13, 1, 5, 8, ZSTD_dfast},     /* level  2 */
-	{18, 16, 15, 1, 5, 8, ZSTD_dfast},     /* level  3 */
-	{18, 15, 17, 1, 5, 8, ZSTD_greedy},    /* level  4.*/
-	{18, 16, 17, 4, 5, 8, ZSTD_greedy},    /* level  5.*/
-	{18, 16, 17, 3, 5, 8, ZSTD_lazy},      /* level  6.*/
-	{18, 17, 17, 4, 4, 8, ZSTD_lazy},      /* level  7 */
-	{18, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-	{18, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-	{18, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-	{18, 18, 17, 6, 4, 8, ZSTD_lazy2},     /* level 11.*/
-	{18, 18, 17, 7, 4, 8, ZSTD_lazy2},     /* level 12.*/
-	{18, 19, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13 */
-	{18, 18, 18, 4, 4, 16, ZSTD_btopt},    /* level 14.*/
-	{18, 18, 18, 4, 3, 16, ZSTD_btopt},    /* level 15.*/
-	{18, 19, 18, 6, 3, 32, ZSTD_btopt},    /* level 16.*/
-	{18, 19, 18, 8, 3, 64, ZSTD_btopt},    /* level 17.*/
-	{18, 19, 18, 9, 3, 128, ZSTD_btopt},   /* level 18.*/
-	{18, 19, 18, 10, 3, 256, ZSTD_btopt},  /* level 19.*/
-	{18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/
-	{18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/
-	{18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-	/* for srcSize <= 128 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{17, 12, 12, 1, 7, 8, ZSTD_fast},      /* level  0 - not used */
-	{17, 12, 13, 1, 6, 8, ZSTD_fast},      /* level  1 */
-	{17, 13, 16, 1, 5, 8, ZSTD_fast},      /* level  2 */
-	{17, 16, 16, 2, 5, 8, ZSTD_dfast},     /* level  3 */
-	{17, 13, 15, 3, 4, 8, ZSTD_greedy},    /* level  4 */
-	{17, 15, 17, 4, 4, 8, ZSTD_greedy},    /* level  5 */
-	{17, 16, 17, 3, 4, 8, ZSTD_lazy},      /* level  6 */
-	{17, 15, 17, 4, 4, 8, ZSTD_lazy2},     /* level  7 */
-	{17, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-	{17, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-	{17, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-	{17, 17, 17, 7, 4, 8, ZSTD_lazy2},     /* level 11 */
-	{17, 17, 17, 8, 4, 8, ZSTD_lazy2},     /* level 12 */
-	{17, 18, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13.*/
-	{17, 17, 17, 7, 3, 8, ZSTD_btopt},     /* level 14.*/
-	{17, 17, 17, 7, 3, 16, ZSTD_btopt},    /* level 15.*/
-	{17, 18, 17, 7, 3, 32, ZSTD_btopt},    /* level 16.*/
-	{17, 18, 17, 7, 3, 64, ZSTD_btopt},    /* level 17.*/
-	{17, 18, 17, 7, 3, 256, ZSTD_btopt},   /* level 18.*/
-	{17, 18, 17, 8, 3, 256, ZSTD_btopt},   /* level 19.*/
-	{17, 18, 17, 9, 3, 256, ZSTD_btopt2},  /* level 20.*/
-	{17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/
-	{17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-	/* for srcSize <= 16 KB */
-	/* W,  C,  H,  S,  L,  T, strat */
-	{14, 12, 12, 1, 7, 6, ZSTD_fast},      /* level  0 - not used */
-	{14, 14, 14, 1, 6, 6, ZSTD_fast},      /* level  1 */
-	{14, 14, 14, 1, 4, 6, ZSTD_fast},      /* level  2 */
-	{14, 14, 14, 1, 4, 6, ZSTD_dfast},     /* level  3.*/
-	{14, 14, 14, 4, 4, 6, ZSTD_greedy},    /* level  4.*/
-	{14, 14, 14, 3, 4, 6, ZSTD_lazy},      /* level  5.*/
-	{14, 14, 14, 4, 4, 6, ZSTD_lazy2},     /* level  6 */
-	{14, 14, 14, 5, 4, 6, ZSTD_lazy2},     /* level  7 */
-	{14, 14, 14, 6, 4, 6, ZSTD_lazy2},     /* level  8.*/
-	{14, 15, 14, 6, 4, 6, ZSTD_btlazy2},   /* level  9.*/
-	{14, 15, 14, 3, 3, 6, ZSTD_btopt},     /* level 10.*/
-	{14, 15, 14, 6, 3, 8, ZSTD_btopt},     /* level 11.*/
-	{14, 15, 14, 6, 3, 16, ZSTD_btopt},    /* level 12.*/
-	{14, 15, 14, 6, 3, 24, ZSTD_btopt},    /* level 13.*/
-	{14, 15, 15, 6, 3, 48, ZSTD_btopt},    /* level 14.*/
-	{14, 15, 15, 6, 3, 64, ZSTD_btopt},    /* level 15.*/
-	{14, 15, 15, 6, 3, 96, ZSTD_btopt},    /* level 16.*/
-	{14, 15, 15, 6, 3, 128, ZSTD_btopt},   /* level 17.*/
-	{14, 15, 15, 6, 3, 256, ZSTD_btopt},   /* level 18.*/
-	{14, 15, 15, 7, 3, 256, ZSTD_btopt},   /* level 19.*/
-	{14, 15, 15, 8, 3, 256, ZSTD_btopt2},  /* level 20.*/
-	{14, 15, 15, 9, 3, 256, ZSTD_btopt2},  /* level 21.*/
-	{14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/
-    },
-};
-
-/*! ZSTD_getCParams() :
-*   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
-*   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-	ZSTD_compressionParameters cp;
-	size_t const addedSize = srcSize ? 0 : 500;
-	U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1;
-	U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
-	if (compressionLevel <= 0)
-		compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
-	if (compressionLevel > ZSTD_MAX_CLEVEL)
-		compressionLevel = ZSTD_MAX_CLEVEL;
-	cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-	if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */
-		if (cp.windowLog > ZSTD_WINDOWLOG_MAX)
-			cp.windowLog = ZSTD_WINDOWLOG_MAX;
-		if (cp.chainLog > ZSTD_CHAINLOG_MAX)
-			cp.chainLog = ZSTD_CHAINLOG_MAX;
-		if (cp.hashLog > ZSTD_HASHLOG_MAX)
-			cp.hashLog = ZSTD_HASHLOG_MAX;
-	}
-	cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
-	return cp;
-}
-
-/*! ZSTD_getParams() :
-*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
-*   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-	ZSTD_parameters params;
-	ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
-	memset(&params, 0, sizeof(params));
-	params.cParams = cParams;
-	return params;
-}
-
-size_t zstd_compress_bound(size_t src_size)
-{
-	return ZSTD_compressBound(src_size);
-}
-EXPORT_SYMBOL(zstd_compress_bound);
-
-int zstd_min_clevel(void)
-{
-	/*
-	 * zstd-1.3.1 doesn't implement ZSTD_minCLevel().
-	 * Return 0 (default level).
-	 */
-	return 0;
-}
-EXPORT_SYMBOL(zstd_min_clevel);
-
-int zstd_max_clevel(void)
-{
-	return ZSTD_maxCLevel();
-}
-EXPORT_SYMBOL(zstd_max_clevel);
-
-zstd_parameters zstd_get_params(int level,
-	unsigned long long estimated_src_size)
-{
-	return ZSTD_getParams(level, estimated_src_size, 0);
-}
-EXPORT_SYMBOL(zstd_get_params);
-
-size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
-{
-	return ZSTD_CCtxWorkspaceBound(*cparams);
-}
-EXPORT_SYMBOL(zstd_cctx_workspace_bound);
-
-zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
-{
-	return ZSTD_initCCtx(workspace, workspace_size);
-}
-EXPORT_SYMBOL(zstd_init_cctx);
-
-size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size, const zstd_parameters *parameters)
-{
-	return ZSTD_compressCCtx(cctx, dst, dst_capacity, src, src_size, *parameters);
-}
-EXPORT_SYMBOL(zstd_compress_cctx);
-
-size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
-{
-	return ZSTD_CStreamWorkspaceBound(*cparams);
-}
-EXPORT_SYMBOL(zstd_cstream_workspace_bound);
-
-zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
-	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
-{
-	return ZSTD_initCStream(*parameters, pledged_src_size, workspace, workspace_size);
-}
-EXPORT_SYMBOL(zstd_init_cstream);
-
-size_t zstd_reset_cstream(zstd_cstream *cstream,
-	unsigned long long pledged_src_size)
-{
-	return ZSTD_resetCStream(cstream, pledged_src_size);
-}
-EXPORT_SYMBOL(zstd_reset_cstream);
-
-size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
-	zstd_in_buffer *input)
-{
-	return ZSTD_compressStream(cstream, output, input);
-}
-EXPORT_SYMBOL(zstd_compress_stream);
-
-size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
-{
-	return ZSTD_flushStream(cstream, output);
-}
-EXPORT_SYMBOL(zstd_flush_stream);
-
-size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
-{
-	return ZSTD_endStream(cstream, output);
-}
-EXPORT_SYMBOL(zstd_end_stream);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
new file mode 100644
index 000000000000..436985b620e5
--- /dev/null
+++ b/lib/zstd/compress/fse_compress.c
@@ -0,0 +1,625 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/compiler.h"
+#include "../common/mem.h"        /* U32, U16, etc. */
+#include "../common/debug.h"      /* assert, DEBUGLOG */
+#include "hist.h"       /* HIST_count_wksp */
+#include "../common/bitstream.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                            void* workSpace, size_t wkspSize)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+
+    U32* cumul = (U32*)workSpace;
+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+
+    U32 highThreshold = tableSize-1;
+
+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
+    /* CTable header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+    assert(tableLog < 16);   /* required for threshold strategy to work */
+
+    /* For explanations on how to distribute symbol values over the table :
+     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+     #ifdef __clang_analyzer__
+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     #endif
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u <= maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurrences;
+            int const freq = normalizedCounter[symbol];
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        assert(position==0);  /* Must have initialized all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
+                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
+                break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+#if 0  /* debug : symbol costs */
+    DEBUGLOG(5, "\n --- table statistics : ");
+    {   U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
+                symbol, normalizedCounter[symbol],
+                FSE_getMaxNbBits(symbolTT, symbol),
+                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+
+/*-**************************************************************
+*  FSE NCount encoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static size_t
+FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                         unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream = 0;
+    int bitCount = 0;
+    unsigned symbol = 0;
+    unsigned const alphabetSize = maxSymbolValue + 1;
+    int previousIs0 = 0;
+
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+        if (previousIs0) {
+            unsigned start = symbol;
+            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
+            if (symbol == alphabetSize) break;   /* incorrect distribution */
+            while (symbol >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (symbol >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (symbol-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   int count = normalizedCounter[symbol++];
+            int const max = (2*threshold-1) - remaining;
+            remaining -= count < 0 ? -count : count;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold)
+                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previousIs0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
+            while (remaining<threshold) { nbBits--; threshold>>=1; }
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2))
+                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    if (remaining != 1)
+        return ERROR(GENERIC);  /* incorrect normalized distribution */
+    assert(symbol <= alphabetSize);
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2))
+        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
+}
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)ZSTD_malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
+{
+    short const NOT_YET_ASSIGNED = -2;
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 const lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = lowProbCount;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+
+        norm[s]=NOT_YET_ASSIGNED;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if (ToDistribute == 0)
+        return 0;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue, unsigned useLowProbCount)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        short const lowProbCount = useLowProbCount ? -1 : 1;
+        U64 const scale = 62 - tableLog;
+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = lowProbCount;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) { largestP=proba; largest=s; }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while ( ip>istart ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c
new file mode 100644
index 000000000000..3ddc6dfb6894
--- /dev/null
+++ b/lib/zstd/compress/hist.c
@@ -0,0 +1,165 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/mem.h"             /* U32, BYTE, etc. */
+#include "../common/debug.h"           /* assert, DEBUGLOG */
+#include "../common/error_private.h"   /* ERROR */
+#include "hist.h"
+
+
+/* --- Error management --- */
+unsigned HIST_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+ *  Histogram functions
+ ****************************************************************/
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned largestCount=0;
+
+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) {
+        assert(*ip <= maxSymbolValue);
+        count[*ip++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    {   U32 s;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > largestCount) largestCount = count[s];
+    }
+
+    return largestCount;
+}
+
+typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
+
+/* HIST_count_parallel_wksp() :
+ * store histogram into 4 intermediate tables, recombined at the end.
+ * this design makes better use of OoO cpus,
+ * and is noticeably faster when some values are heavily repeated.
+ * But it needs some additional workspace for intermediate tables.
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
+ * @return : largest histogram frequency,
+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
+static size_t HIST_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                HIST_checkInput_e check,
+                                U32* const workSpace)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
+    unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
+
+    /* safety checks */
+    assert(*maxSymbolValuePtr <= 255);
+    if (!sourceSize) {
+        ZSTD_memset(count, 0, countSize);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    {   U32 s;
+        for (s=0; s<256; s++) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s] > max) max = Counting1[s];
+    }   }
+
+    {   unsigned maxSymbolValue = 255;
+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
+        *maxSymbolValuePtr = maxSymbolValue;
+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
+    }
+    return (size_t)max;
+}
+
+/* HIST_countFast_wksp() :
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                          const void* source, size_t sourceSize,
+                          void* workSpace, size_t workSpaceSize)
+{
+    if (sourceSize < 1500) /* heuristic threshold */
+        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
+}
+
+/* HIST_count_wksp() :
+ * Same as HIST_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* source, size_t sourceSize,
+                       void* workSpace, size_t workSpaceSize)
+{
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    if (*maxSymbolValuePtr < 255)
+        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
+    *maxSymbolValuePtr = 255;
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
+}
+
diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h
new file mode 100644
index 000000000000..fc1830abc9c6
--- /dev/null
+++ b/lib/zstd/compress/hist.h
@@ -0,0 +1,75 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/zstd_deps.h"   /* size_t */
+
+
+/* --- simple histogram functions --- */
+
+/*! HIST_count():
+ *  Provides the precise count of each byte within a table 'count'.
+ * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+ *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
+ * @return : count of the most frequent symbol (which isn't identified).
+ *           or an error code, which can be tested using HIST_isError().
+ *           note : if return == srcSize, there is only one symbol.
+ */
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                  const void* src, size_t srcSize);
+
+unsigned HIST_isError(size_t code);  /*< tells if a return value is an error code */
+
+
+/* --- advanced histogram functions --- */
+
+#define HIST_WKSP_SIZE_U32 1024
+#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
+/* HIST_count_wksp() :
+ *  Same as HIST_count(), but using an externally provided scratch buffer.
+ *  Benefit is this function will use very little stack space.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* src, size_t srcSize,
+                       void* workSpace, size_t workSpaceSize);
+
+/* HIST_countFast() :
+ *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
+ *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
+ */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                      const void* src, size_t srcSize);
+
+/* HIST_countFast_wksp() :
+ *  Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize,
+                           void* workSpace, size_t workSpaceSize);
+
+/*! HIST_count_simple() :
+ *  Same as HIST_countFast(), this function is unsafe,
+ *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
+ *  It is also a bit slower for large inputs.
+ *  However, it does not need any additional memory (not even on stack).
+ * @return : count of the most frequent symbol.
+ *  Note this function doesn't produce any error (i.e. it must succeed).
+ */
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize);
diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
new file mode 100644
index 000000000000..f76a526bfa54
--- /dev/null
+++ b/lib/zstd/compress/huf_compress.c
@@ -0,0 +1,905 @@
+/* ******************************************************************
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"
+#include "hist.h"
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+#include "../common/fse.h"        /* header compression */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
+
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
+
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+
+    /* convert to weight */
+    wksp->bitsToWeight[0] = 0;
+    for (n=1; n<huffLog+1; n++)
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
+
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
+
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
+}
+
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+
+    /* get symbol weights */
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+    *hasZeroWeights = (rankVal[0] > 0);
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 curr = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = curr;
+    }   }
+
+    /* fill nbBits */
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+    }   }
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
+        {   U16 min = 0;
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    *maxSymbolValuePtr = nbSymbols - 1;
+    return readSize;
+}
+
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+{
+    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    return table[symbolValue].nbBits;
+}
+
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*
+ * HUF_setMaxHeight():
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
+ *
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+ * the tree to so that it is a valid canonical Huffman tree.
+ *
+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits == huffNode[lastNonNull].nbBits.
+ * @post              The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits is the return value <= maxNbBits.
+ *
+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
+ *                    may not respect. After this function the Huffman tree will
+ *                    respect maxNbBits.
+ * @return            The maximum number of bits of the Huffman tree after adjustment,
+ *                    necessarily no more than maxNbBits.
+ */
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+    if (largestBits <= maxNbBits) return largestBits;
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        int n = (int)lastNonNull;
+
+        /* Adjust any ranks > maxNbBits to maxNbBits.
+         * Compute totalCost, which is how far the sum of the ranks is
+         * we are over 2^largestBits after adjust the offending ranks.
+         */
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n--;
+        }
+        /* n stops at huffNode[n].nbBits <= maxNbBits */
+        assert(huffNode[n].nbBits <= maxNbBits);
+        /* n end at index of smallest symbol using < maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) --n;
+
+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
+         * note : totalCost is necessarily a multiple of baseCost */
+        assert((totalCost & (baseCost - 1)) == 0);
+        totalCost >>= (largestBits - maxNbBits);
+        assert(totalCost > 0);
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+2];
+
+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                int pos;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
+            }   }
+
+            while (totalCost > 0) {
+                /* Try to reduce the next power of 2 above totalCost because we
+                 * gain back half the rank.
+                 */
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    /* Decrease highPos if no symbols of lowPos or if it is
+                     * not cheaper to remove 2 lowPos than highPos.
+                     */
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+                    nBitsToDecrease++;
+                assert(rankLast[nBitsToDecrease] != noSymbol);
+                /* Increase the number of bits to gain back half the rank cost. */
+                totalCost -= 1 << (nBitsToDecrease-1);
+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
+
+                /* Fix up the new rank.
+                 * If the new rank was empty, this symbol is now its smallest.
+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
+                 */
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
+                /* Fix up the old rank.
+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
+                 * the smallest node in the rank. If the previous position belongs to a different rank,
+                 * then the rank is now empty.
+                 */
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                }
+            }   /* while (totalCost > 0) */
+
+            /* If we've removed too much weight, then we have to add it back.
+             * To avoid overshooting again, we only adjust the smallest rank.
+             * We take the largest nodes from the lowest rank 0 and move them
+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
+             * TODO.
+             */
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                /* special case : no rank 1 symbol (using maxNbBits-1);
+                 * let's create one from largest rank 0 (using maxNbBits).
+                 */
+                if (rankLast[1] == noSymbol) {
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+            }
+        }   /* repay normalized cost */
+    }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+typedef struct {
+    U32 base;
+    U32 curr;
+} rankPos;
+
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+/*
+ * HUF_sort():
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ *
+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
+ *                            Must have (maxSymbolValue + 1) entries.
+ * @param[in]  count          Histogram of the symbols.
+ * @param[in]  maxSymbolValue Maximum symbol value.
+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
+ */
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    int n;
+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+
+    /* Compute base and set curr to base.
+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * We attribute each symbol to lowerRank's base value, because we want to know where
+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
+     */
+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        rankPosition[lowerRank].base++;
+    }
+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
+        rankPosition[n-1].base += rankPosition[n].base;
+        rankPosition[n-1].curr = rankPosition[n-1].base;
+    }
+    /* Sort */
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rankPosition[r].curr++;
+        /* Insert into the correct position in the rank.
+         * We have at most 256 symbols, so this insertion should be fine.
+         */
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
+            huffNode[pos] = huffNode[pos-1];
+            pos--;
+        }
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+
+/* HUF_buildTree():
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
+ *
+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
+ * @param maxSymbolValue  The maximum symbol value.
+ * @return                The smallest node in the Huffman tree (by count).
+ */
+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+{
+    nodeElt* const huffNode0 = huffNode - 1;
+    int nonNullRank;
+    int lowS, lowN;
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
+    /* init for parents */
+    nonNullRank = (int)maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    return nonNullRank;
+}
+
+/*
+ * HUF_buildCTableFromTree():
+ * Build the CTable given the Huffman tree in huffNode.
+ *
+ * @param[out] CTable         The output Huffman CTable.
+ * @param      huffNode       The Huffman tree.
+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
+ * @param      maxSymbolValue The maximum symbol value.
+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
+ */
+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
+{
+    /* fill result into ctable (val, nbBits) */
+    int n;
+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    int const alphabetSize = (int)(maxSymbolValue + 1);
+    for (n=0; n<=nonNullRank; n++)
+        nbPerRank[huffNode[n].nbBits]++;
+    /* determine starting value per rank */
+    {   U16 min = 0;
+        for (n=(int)maxNbBits; n>0; n--) {
+            valPerRank[n] = min;      /* get starting value within each rank */
+            min += nbPerRank[n];
+            min >>= 1;
+    }   }
+    for (n=0; n<alphabetSize; n++)
+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+    for (n=0; n<alphabetSize; n++)
+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+}
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* build tree */
+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+
+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
+
+    return maxNbBits;
+}
+
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+      if (HUF_isError(initErr)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3:
+            HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+            HUF_FLUSHBITS_2(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 2:
+            HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+            HUF_FLUSHBITS_1(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 1:
+            HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+            HUF_FLUSHBITS(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 0: ZSTD_FALLTHROUGH;
+        default: break;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+                                      const void* src, size_t srcSize,
+                                      const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    if (bmi2) {
+        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+    }
+    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    (void)bmi2;
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, int bmi2)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+{
+    size_t const cSize = (nbStreams==HUF_singleStream) ?
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    assert(op >= ostart);
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return (size_t)(op-ostart);
+}
+
+typedef struct {
+    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
+} HUF_compress_tables_t;
+
+/* HUF_compress_internal() :
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
+static size_t
+HUF_compress_internal (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       HUF_nbStreams_e nbStreams,
+                       void* workSpace_align4, size_t wkspSize,
+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+                 const int bmi2)
+{
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+
+    /* checks & inits */
+    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (!srcSize) return 0;  /* Uncompressed */
+    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Heuristic : If old table is valid, use it for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
+    /* Check validity of previous table */
+    if ( repeat
+      && *repeat == HUF_repeat_check
+      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                            maxSymbolValue, huffLog,
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
+        CHECK_F(maxBits);
+        huffLog = (U32)maxBits;
+        /* Zero unused symbols in CTable, so we can check it for validity */
+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
+               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
+        /* Check if using previous huffman table is beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend,
+                                                   src, srcSize,
+                                                   nbStreams, oldHufTable, bmi2);
+        }   }
+
+        /* Use the new huffman table */
+        if (hSize + 12ul >= srcSize) { return 0; }
+        op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable)
+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+    }
+    return HUF_compressCTable_internal(ostart, op, oend,
+                                       src, srcSize,
+                                       nbStreams, table->CTable, bmi2);
+}
+
+
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize, hufTable,
+                                 repeat, preferRepeat, bmi2);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 hufTable, repeat, preferRepeat, bmi2);
+}
+
diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
new file mode 100644
index 000000000000..a4e916008b3a
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress.c
@@ -0,0 +1,5109 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
+#include "../common/cpu.h"
+#include "../common/mem.h"
+#include "hist.h"           /* HIST_countFast_wksp */
+#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+#include "zstd_fast.h"
+#include "zstd_double_fast.h"
+#include "zstd_lazy.h"
+#include "zstd_opt.h"
+#include "zstd_ldm.h"
+#include "zstd_compress_superblock.h"
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * COMPRESS_HEAPMODE :
+ * Select how default decompression function ZSTD_compress() allocates its context,
+ * on stack (0, default), or into heap (1).
+ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
+ */
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
+size_t ZSTD_compressBound(size_t srcSize) {
+    return ZSTD_COMPRESSBOUND(srcSize);
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CDict_s {
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
+    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    ZSTD_cwksp workspace;
+    ZSTD_matchState_t matchState;
+    ZSTD_compressedBlockState_t cBlockState;
+    ZSTD_customMem customMem;
+    U32 dictID;
+    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
+{
+    assert(cctx != NULL);
+    ZSTD_memset(cctx, 0, sizeof(*cctx));
+    cctx->customMem = memManager;
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
+        if (!cctx) return NULL;
+        ZSTD_initCCtx(cctx, customMem);
+        return cctx;
+    }
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
+{
+    ZSTD_cwksp ws;
+    ZSTD_CCtx* cctx;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+
+    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+    if (cctx == NULL) return NULL;
+
+    ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
+    ZSTD_cwksp_move(&cctx->workspace, &ws);
+    cctx->staticSize = workspaceSize;
+
+    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+    if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    return cctx;
+}
+
+/*
+ * Clears and frees all of the dictionaries in the CCtx.
+ */
+static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
+{
+    ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
+    ZSTD_freeCDict(cctx->localDict.cdict);
+    ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
+    cctx->cdict = NULL;
+}
+
+static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
+{
+    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
+    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
+    return bufferSize + cdictSize;
+}
+
+static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+{
+    assert(cctx != NULL);
+    assert(cctx->staticSize == 0);
+    ZSTD_clearAllDicts(cctx);
+    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "not compatible with static CCtx");
+    {
+        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+        ZSTD_freeCCtxContent(cctx);
+        if (!cctxInWorkspace) {
+            ZSTD_customFree(cctx, cctx->customMem);
+        }
+    }
+    return 0;
+}
+
+
+static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support sizeof on NULL */
+    /* cctx may be in the workspace */
+    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+           + ZSTD_cwksp_sizeof(&cctx->workspace)
+           + ZSTD_sizeof_localDict(cctx->localDict)
+           + ZSTD_sizeof_mtctx(cctx);
+}
+
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
+{
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+/* Returns 1 if compression parameters are such that we should
+ * enable long distance matching (wlog >= 27, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+}
+
+static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+        ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params cctxParams;
+    /* should not matter, as all cParams are presumed properly defined */
+    ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
+    cctxParams.cParams = cParams;
+
+    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
+        cctxParams.ldmParams.enableLdm = 1;
+        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
+        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
+        assert(cctxParams.ldmParams.hashRateLog < 32);
+    }
+
+    assert(!ZSTD_checkCParams(cParams));
+    return cctxParams;
+}
+
+static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params* params;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
+            sizeof(ZSTD_CCtx_params), customMem);
+    if (!params) { return NULL; }
+    ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+    params->customMem = customMem;
+    return params;
+}
+
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
+{
+    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
+{
+    if (params == NULL) { return 0; }
+    ZSTD_customFree(params, params->customMem);
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
+{
+    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+}
+
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->fParams.contentSizeFlag = 1;
+    return 0;
+}
+
+#define ZSTD_NO_CLEVEL 0
+
+/*
+ * Initializes the cctxParams from params and compressionLevel.
+ * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+ */
+static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = compressionLevel;
+}
+
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+{
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
+    return 0;
+}
+
+/*
+ * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+ * @param param Validated zstd parameters.
+ */
+static void ZSTD_CCtxParams_setZstdParams(
+        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
+}
+
+ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+        bounds.lowerBound = ZSTD_minCLevel();
+        bounds.upperBound = ZSTD_maxCLevel();
+        return bounds;
+
+    case ZSTD_c_windowLog:
+        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_hashLog:
+        bounds.lowerBound = ZSTD_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_chainLog:
+        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
+        bounds.upperBound = ZSTD_CHAINLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_searchLog:
+        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
+        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_minMatch:
+        bounds.lowerBound = ZSTD_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_targetLength:
+        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
+        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
+        return bounds;
+
+    case ZSTD_c_strategy:
+        bounds.lowerBound = ZSTD_STRATEGY_MIN;
+        bounds.upperBound = ZSTD_STRATEGY_MAX;
+        return bounds;
+
+    case ZSTD_c_contentSizeFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_checksumFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_dictIDFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_nbWorkers:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_jobSize:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_overlapLog:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_enableDedicatedDictSearch:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_enableLongDistanceMatching:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_ldmHashLog:
+        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmMinMatch:
+        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmBucketSizeLog:
+        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmHashRateLog:
+        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
+        return bounds;
+
+    /* experimental parameters */
+    case ZSTD_c_rsyncable:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_forceMaxWindow :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_format:
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        bounds.lowerBound = ZSTD_f_zstd1;
+        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_forceAttachDict:
+        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
+        bounds.lowerBound = ZSTD_dictDefaultAttach;
+        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_literalCompressionMode:
+        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
+        bounds.lowerBound = ZSTD_lcm_auto;
+        bounds.upperBound = ZSTD_lcm_uncompressed;
+        return bounds;
+
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+        bounds.lowerBound = (int)ZSTD_bm_buffered;
+        bounds.upperBound = (int)ZSTD_bm_stable;
+        return bounds;
+
+    case ZSTD_c_blockDelimiters:
+        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
+        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
+        return bounds;
+
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    default:
+        bounds.error = ERROR(parameter_unsupported);
+        return bounds;
+    }
+}
+
+/* ZSTD_cParam_clampBounds:
+ * Clamps the value into the bounded range.
+ */
+static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return bounds.error;
+    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
+    if (*value > bounds.upperBound) *value = bounds.upperBound;
+    return 0;
+}
+
+#define BOUNDCHECK(cParam, val) { \
+    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+                    parameter_outOfBound, "Param out of bounds"); \
+}
+
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+        return 1;
+
+    case ZSTD_c_format:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow :
+    case ZSTD_c_nbWorkers:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+    default:
+        return 0;
+    }
+}
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
+    if (cctx->streamStage != zcss_init) {
+        if (ZSTD_isUpdateAuthorized(param)) {
+            cctx->cParamsChanged = 1;
+        } else {
+            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+    }   }
+
+    switch(param)
+    {
+    case ZSTD_c_nbWorkers:
+        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
+                        "MT not compatible with static alloc");
+        break;
+
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_format:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+        break;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+                                    ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
+    switch(param)
+    {
+    case ZSTD_c_format :
+        BOUNDCHECK(ZSTD_c_format, value);
+        CCtxParams->format = (ZSTD_format_e)value;
+        return (size_t)CCtxParams->format;
+
+    case ZSTD_c_compressionLevel : {
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        if (value == 0)
+            CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+        else
+            CCtxParams->compressionLevel = value;
+        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }
+
+    case ZSTD_c_windowLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_windowLog, value);
+        CCtxParams->cParams.windowLog = (U32)value;
+        return CCtxParams->cParams.windowLog;
+
+    case ZSTD_c_hashLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_hashLog, value);
+        CCtxParams->cParams.hashLog = (U32)value;
+        return CCtxParams->cParams.hashLog;
+
+    case ZSTD_c_chainLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_chainLog, value);
+        CCtxParams->cParams.chainLog = (U32)value;
+        return CCtxParams->cParams.chainLog;
+
+    case ZSTD_c_searchLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_searchLog, value);
+        CCtxParams->cParams.searchLog = (U32)value;
+        return (size_t)value;
+
+    case ZSTD_c_minMatch :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_minMatch, value);
+        CCtxParams->cParams.minMatch = value;
+        return CCtxParams->cParams.minMatch;
+
+    case ZSTD_c_targetLength :
+        BOUNDCHECK(ZSTD_c_targetLength, value);
+        CCtxParams->cParams.targetLength = value;
+        return CCtxParams->cParams.targetLength;
+
+    case ZSTD_c_strategy :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_strategy, value);
+        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
+        return (size_t)CCtxParams->cParams.strategy;
+
+    case ZSTD_c_contentSizeFlag :
+        /* Content size written in frame header _when known_ (default:1) */
+        DEBUGLOG(4, "set content size flag = %u", (value!=0));
+        CCtxParams->fParams.contentSizeFlag = value != 0;
+        return CCtxParams->fParams.contentSizeFlag;
+
+    case ZSTD_c_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        CCtxParams->fParams.checksumFlag = value != 0;
+        return CCtxParams->fParams.checksumFlag;
+
+    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+        CCtxParams->fParams.noDictIDFlag = !value;
+        return !CCtxParams->fParams.noDictIDFlag;
+
+    case ZSTD_c_forceMaxWindow :
+        CCtxParams->forceWindow = (value != 0);
+        return CCtxParams->forceWindow;
+
+    case ZSTD_c_forceAttachDict : {
+        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+        CCtxParams->attachDictPref = pref;
+        return CCtxParams->attachDictPref;
+    }
+
+    case ZSTD_c_literalCompressionMode : {
+        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+        CCtxParams->literalCompressionMode = lcm;
+        return CCtxParams->literalCompressionMode;
+    }
+
+    case ZSTD_c_nbWorkers :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_jobSize :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_enableDedicatedDictSearch :
+        CCtxParams->enableDedicatedDictSearch = (value!=0);
+        return CCtxParams->enableDedicatedDictSearch;
+
+    case ZSTD_c_enableLongDistanceMatching :
+        CCtxParams->ldmParams.enableLdm = (value!=0);
+        return CCtxParams->ldmParams.enableLdm;
+
+    case ZSTD_c_ldmHashLog :
+        if (value!=0)   /* 0 ==> auto */
+            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+        CCtxParams->ldmParams.hashLog = value;
+        return CCtxParams->ldmParams.hashLog;
+
+    case ZSTD_c_ldmMinMatch :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+        CCtxParams->ldmParams.minMatchLength = value;
+        return CCtxParams->ldmParams.minMatchLength;
+
+    case ZSTD_c_ldmBucketSizeLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+        CCtxParams->ldmParams.bucketSizeLog = value;
+        return CCtxParams->ldmParams.bucketSizeLog;
+
+    case ZSTD_c_ldmHashRateLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
+        CCtxParams->ldmParams.hashRateLog = value;
+        return CCtxParams->ldmParams.hashRateLog;
+
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
+    case ZSTD_c_stableInBuffer:
+        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->inBufferMode;
+
+    case ZSTD_c_stableOutBuffer:
+        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
+        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->outBufferMode;
+
+    case ZSTD_c_blockDelimiters:
+        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
+        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
+        return CCtxParams->blockDelimiters;
+
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+}
+
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
+{
+    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_getParameter(
+        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
+{
+    switch(param)
+    {
+    case ZSTD_c_format :
+        *value = CCtxParams->format;
+        break;
+    case ZSTD_c_compressionLevel :
+        *value = CCtxParams->compressionLevel;
+        break;
+    case ZSTD_c_windowLog :
+        *value = (int)CCtxParams->cParams.windowLog;
+        break;
+    case ZSTD_c_hashLog :
+        *value = (int)CCtxParams->cParams.hashLog;
+        break;
+    case ZSTD_c_chainLog :
+        *value = (int)CCtxParams->cParams.chainLog;
+        break;
+    case ZSTD_c_searchLog :
+        *value = CCtxParams->cParams.searchLog;
+        break;
+    case ZSTD_c_minMatch :
+        *value = CCtxParams->cParams.minMatch;
+        break;
+    case ZSTD_c_targetLength :
+        *value = CCtxParams->cParams.targetLength;
+        break;
+    case ZSTD_c_strategy :
+        *value = (unsigned)CCtxParams->cParams.strategy;
+        break;
+    case ZSTD_c_contentSizeFlag :
+        *value = CCtxParams->fParams.contentSizeFlag;
+        break;
+    case ZSTD_c_checksumFlag :
+        *value = CCtxParams->fParams.checksumFlag;
+        break;
+    case ZSTD_c_dictIDFlag :
+        *value = !CCtxParams->fParams.noDictIDFlag;
+        break;
+    case ZSTD_c_forceMaxWindow :
+        *value = CCtxParams->forceWindow;
+        break;
+    case ZSTD_c_forceAttachDict :
+        *value = CCtxParams->attachDictPref;
+        break;
+    case ZSTD_c_literalCompressionMode :
+        *value = CCtxParams->literalCompressionMode;
+        break;
+    case ZSTD_c_nbWorkers :
+        assert(CCtxParams->nbWorkers == 0);
+        *value = CCtxParams->nbWorkers;
+        break;
+    case ZSTD_c_jobSize :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_enableDedicatedDictSearch :
+        *value = CCtxParams->enableDedicatedDictSearch;
+        break;
+    case ZSTD_c_enableLongDistanceMatching :
+        *value = CCtxParams->ldmParams.enableLdm;
+        break;
+    case ZSTD_c_ldmHashLog :
+        *value = CCtxParams->ldmParams.hashLog;
+        break;
+    case ZSTD_c_ldmMinMatch :
+        *value = CCtxParams->ldmParams.minMatchLength;
+        break;
+    case ZSTD_c_ldmBucketSizeLog :
+        *value = CCtxParams->ldmParams.bucketSizeLog;
+        break;
+    case ZSTD_c_ldmHashRateLog :
+        *value = CCtxParams->ldmParams.hashRateLog;
+        break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
+        break;
+    case ZSTD_c_stableInBuffer :
+        *value = (int)CCtxParams->inBufferMode;
+        break;
+    case ZSTD_c_stableOutBuffer :
+        *value = (int)CCtxParams->outBufferMode;
+        break;
+    case ZSTD_c_blockDelimiters :
+        *value = (int)CCtxParams->blockDelimiters;
+        break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return 0;
+}
+
+/* ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  just applies `params` into `cctx`
+ *  no action is performed, parameters are merely stored.
+ *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ *    This is possible even if a compression is ongoing.
+ *    In which case, new parameters will be applied on the fly, starting with next compression job.
+ */
+size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "The context is in the wrong stage!");
+    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+                    "Can't override parameters with cdict attached (some must "
+                    "be inherited from the cdict).");
+
+    cctx->requestedParams = *params;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't set pledgedSrcSize when not in init stage.");
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
+        int const compressionLevel,
+        size_t const dictSize);
+static int ZSTD_dedicatedDictSearch_isSupported(
+        const ZSTD_compressionParameters* cParams);
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams);
+
+/*
+ * Initializes the local dict using the requested parameters.
+ * NOTE: This does not use the pledged src size, because it may be used for more
+ * than one compression.
+ */
+static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+{
+    ZSTD_localDict* const dl = &cctx->localDict;
+    if (dl->dict == NULL) {
+        /* No local dictionary. */
+        assert(dl->dictBuffer == NULL);
+        assert(dl->cdict == NULL);
+        assert(dl->dictSize == 0);
+        return 0;
+    }
+    if (dl->cdict != NULL) {
+        assert(cctx->cdict == dl->cdict);
+        /* Local dictionary already initialized. */
+        return 0;
+    }
+    assert(dl->dictSize > 0);
+    assert(cctx->cdict == NULL);
+    assert(cctx->prefixDict.dict == NULL);
+
+    dl->cdict = ZSTD_createCDict_advanced2(
+            dl->dict,
+            dl->dictSize,
+            ZSTD_dlm_byRef,
+            dl->dictContentType,
+            &cctx->requestedParams,
+            cctx->customMem);
+    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
+    cctx->cdict = dl->cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_loadDictionary_advanced(
+        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't load a dictionary when ctx is not in init stage.");
+    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
+        return 0;
+    if (dictLoadMethod == ZSTD_dlm_byRef) {
+        cctx->localDict.dict = dict;
+    } else {
+        void* dictBuffer;
+        RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                        "no malloc for static CCtx");
+        dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
+        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+        ZSTD_memcpy(dictBuffer, dict, dictSize);
+        cctx->localDict.dictBuffer = dictBuffer;
+        cctx->localDict.dict = dictBuffer;
+    }
+    cctx->localDict.dictSize = dictSize;
+    cctx->localDict.dictContentType = dictContentType;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a dict when ctx not in init stage.");
+    /* Free the existing local cdict (if any) to save memory. */
+    ZSTD_clearAllDicts(cctx);
+    cctx->cdict = cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a pool when ctx not in init stage.");
+    cctx->pool = pool;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+size_t ZSTD_CCtx_refPrefix_advanced(
+        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a prefix when ctx not in init stage.");
+    ZSTD_clearAllDicts(cctx);
+    if (prefix != NULL && prefixSize > 0) {
+        cctx->prefixDict.dict = prefix;
+        cctx->prefixDict.dictSize = prefixSize;
+        cctx->prefixDict.dictContentType = dictContentType;
+    }
+    return 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        cctx->streamStage = zcss_init;
+        cctx->pledgedSrcSizePlusOne = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                        "Can't reset parameters only when not in init stage.");
+        ZSTD_clearAllDicts(cctx);
+        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+    }
+    return 0;
+}
+
+
+/* ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
+    return 0;
+}
+
+/* ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters
+ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP_TYPE(cParam, val, type) {                                \
+        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+    }
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
+    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
+    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
+    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
+    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
+    return cParams;
+}
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/* ZSTD_dictAndWindowLog() :
+ * Returns an adjusted window log that is large enough to fit the source and the dictionary.
+ * The zstd format says that the entire dictionary is valid if one byte of the dictionary
+ * is within the window. So the hashLog and chainLog should be large enough to reference both
+ * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
+ * the hashLog and windowLog.
+ * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
+{
+    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
+    /* No dictionary ==> No change */
+    if (dictSize == 0) {
+        return windowLog;
+    }
+    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
+    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
+    {
+        U64 const windowSize = 1ULL << windowLog;
+        U64 const dictAndWindowSize = dictSize + windowSize;
+        /* If the window size is already large enough to fit both the source and the dictionary
+         * then just use the window size. Otherwise adjust so that it fits the dictionary and
+         * the window.
+         */
+        if (windowSize >= dictSize + srcSize) {
+            return windowLog; /* Window size large enough already */
+        } else if (dictAndWindowSize >= maxWindowSize) {
+            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
+        } else  {
+            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
+        }
+    }
+}
+
+/* ZSTD_adjustCParams_internal() :
+ *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
+ *  mostly downsize to reduce memory consumption and initialization latency.
+ * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
+ *  note : `srcSize==0` means 0!
+ *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
+static ZSTD_compressionParameters
+ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                            unsigned long long srcSize,
+                            size_t dictSize,
+                            ZSTD_cParamMode_e mode)
+{
+    const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    assert(ZSTD_checkCParams(cPar)==0);
+
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+        /* If we don't know the source size, don't make any
+         * assumptions about it. We will already have selected
+         * smaller parameters if a dictionary is in use.
+         */
+        break;
+    case ZSTD_cpm_createCDict:
+        /* Assume a small source size when creating a dictionary
+         * with an unkown source size.
+         */
+        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            srcSize = minSrcSize;
+        break;
+    case ZSTD_cpm_attachDict:
+        /* Dictionary has its own dedicated parameters which have
+         * already been selected. We are selecting parameters
+         * for only the source.
+         */
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    /* resize windowLog if input is small enough, to use less memory */
+    if ( (srcSize < maxWindowResize)
+      && (dictSize < maxWindowResize) )  {
+        U32 const tSize = (U32)(srcSize + dictSize);
+        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+                            ZSTD_highbit32(tSize-1) + 1;
+        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
+    }
+    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
+        if (cycleLog > dictAndWindowLog)
+            cPar.chainLog -= (cycleLog - dictAndWindowLog);
+    }
+
+    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+
+    return cPar;
+}
+
+ZSTD_compressionParameters
+ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                   unsigned long long srcSize,
+                   size_t dictSize)
+{
+    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
+}
+
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+static void ZSTD_overrideCParams(
+              ZSTD_compressionParameters* cParams,
+        const ZSTD_compressionParameters* overrides)
+{
+    if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
+    if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
+    if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
+    if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
+    if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
+    if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
+    if (overrides->strategy)     cParams->strategy     = overrides->strategy;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
+    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
+    assert(!ZSTD_checkCParams(cParams));
+    /* srcSizeHint == 0 means 0 */
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
+}
+
+static size_t
+ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const U32 forCCtx)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+     * surrounded by redzones in ASAN. */
+    size_t const tableSpace = chainSize * sizeof(U32)
+                            + hSize * sizeof(U32)
+                            + h3Size * sizeof(U32);
+    size_t const optPotentialSpace =
+        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                ? optPotentialSpace
+                                : 0;
+    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+                (U32)chainSize, (U32)hSize, (U32)h3Size);
+    return tableSpace + optSpace;
+}
+
+static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        const ZSTD_compressionParameters* cParams,
+        const ldmParams_t* ldmParams,
+        const int isStatic,
+        const size_t buffInSize,
+        const size_t buffOutSize,
+        const U64 pledgedSrcSize)
+{
+    size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
+    size_t const maxNbSeq = blockSize / divider;
+    size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+    size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
+    size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+
+    size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
+    size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
+    size_t const ldmSeqSpace = ldmParams->enableLdm ?
+        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+
+
+    size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
+                             + ZSTD_cwksp_alloc_size(buffOutSize);
+
+    size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+
+    size_t const neededSpace =
+        cctxSpace +
+        entropySpace +
+        blockStateSpace +
+        ldmSpace +
+        ldmSeqSpace +
+        matchStateSize +
+        tokenSpace +
+        bufferSpace;
+
+    DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+    return neededSpace;
+}
+
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    /* estimateCCtxSize is for one-shot compression. So no buffers should
+     * be needed. However, we still allocate two 0-sized buffers, which can
+     * take space under ASAN. */
+    return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
+{
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
+}
+
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
+        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                ? ((size_t)1 << cParams.windowLog) + blockSize
+                : 0;
+        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+
+        return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            ZSTD_CONTENTSIZE_UNKNOWN);
+    }
+}
+
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    return ZSTD_estimateCStreamSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+    {   ZSTD_frameProgression fp;
+        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+                                cctx->inBuffPos - cctx->inToCompress;
+        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+        fp.ingested = cctx->consumedSrcSize + buffered;
+        fp.consumed = cctx->consumedSrcSize;
+        fp.produced = cctx->producedCSize;
+        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
+        fp.currentJobID = 0;
+        fp.nbActiveWorkers = 0;
+        return fp;
+}   }
+
+/*! ZSTD_toFlushNow()
+ *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
+ */
+size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
+}
+
+static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
+                                    ZSTD_compressionParameters cParams2)
+{
+    (void)cParams1;
+    (void)cParams2;
+    assert(cParams1.windowLog    == cParams2.windowLog);
+    assert(cParams1.chainLog     == cParams2.chainLog);
+    assert(cParams1.hashLog      == cParams2.hashLog);
+    assert(cParams1.searchLog    == cParams2.searchLog);
+    assert(cParams1.minMatch     == cParams2.minMatch);
+    assert(cParams1.targetLength == cParams2.targetLength);
+    assert(cParams1.strategy     == cParams2.strategy);
+}
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+    int i;
+    for (i = 0; i < ZSTD_REP_NUM; ++i)
+        bs->rep[i] = repStartValue[i];
+    bs->entropy.huf.repeatMode = HUF_repeat_none;
+    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+    ZSTD_window_clear(&ms->window);
+
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->loadedDictEnd = 0;
+    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
+    ms->dictMatchState = NULL;
+}
+
+/*
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+    ZSTDcrp_makeClean,
+    ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
+
+/*
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+    ZSTDirp_continue,
+    ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+    ZSTD_resetTarget_CDict,
+    ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
+
+static size_t
+ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+                      ZSTD_cwksp* ws,
+                const ZSTD_compressionParameters* cParams,
+                const ZSTD_compResetPolicy_e crp,
+                const ZSTD_indexResetPolicy_e forceResetIndex,
+                const ZSTD_resetTarget_e forWho)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    if (forceResetIndex == ZSTDirp_reset) {
+        ZSTD_window_init(&ms->window);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+    }
+
+    ms->hashLog3 = hashLog3;
+
+    ZSTD_invalidateMatchState(ms);
+
+    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+    ZSTD_cwksp_clear_tables(ws);
+
+    DEBUGLOG(5, "reserving table space");
+    /* table Space */
+    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+    if (crp!=ZSTDcrp_leaveDirty) {
+        /* reset tables only */
+        ZSTD_cwksp_clean_tables(ws);
+    }
+
+    /* opt parser space */
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+        DEBUGLOG(4, "reserving optimal parser space");
+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    }
+
+    ms->cParams = *cParams;
+
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    return 0;
+}
+
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_CCtx_params params,
+                                      U64 const pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
+{
+    ZSTD_cwksp* const ws = &zc->workspace;
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
+                (U32)pledgedSrcSize, params.cParams.windowLog);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+
+    zc->isFirstBlock = 1;
+
+    if (params.ldmParams.enableLdm) {
+        /* Adjust long distance matching parameters */
+        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+        assert(params.ldmParams.hashRateLog < 32);
+    }
+
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+                ? windowSize + blockSize
+                : 0;
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+
+        int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        ZSTD_indexResetPolicy_e needsIndexReset =
+            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+
+        size_t const neededSpace =
+            ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                buffInSize, buffOutSize, pledgedSrcSize);
+        FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
+
+        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+        /* Check if workspace is large enough, alloc a new one if needed */
+        {
+            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+
+            DEBUGLOG(4, "Need %zu B workspace", neededSpace);
+            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+
+            if (workspaceTooSmall || workspaceWasteful) {
+                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+                            ZSTD_cwksp_sizeof(ws) >> 10,
+                            neededSpace >> 10);
+
+                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
+
+                needsIndexReset = ZSTDirp_reset;
+
+                ZSTD_cwksp_free(ws, zc->customMem);
+                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+                DEBUGLOG(5, "reserving object space");
+                /* Statically sized space.
+                 * entropyWorkspace never moves,
+                 * though prev/next block swap places */
+                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+        }   }
+
+        ZSTD_cwksp_clear(ws);
+
+        /* init params */
+        zc->appliedParams = params;
+        zc->blockState.matchState.cParams = params.cParams;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        zc->producedCSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
+            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
+
+        xxh64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->dictContentSize = 0;
+
+        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+        /* ZSTD_wildcopy() is used to copy into the literals buffer,
+         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+         */
+        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+        zc->seqStore.maxNbLit = blockSize;
+
+        /* buffers */
+        zc->bufferedPolicy = zbuff;
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
+
+        /* ldm bucketOffsets table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const numBuckets =
+                  ((size_t)1) << (params.ldmParams.hashLog -
+                                  params.ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
+            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
+        }
+
+        /* sequences storage */
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
+        zc->seqStore.maxNbSeq = maxNbSeq;
+        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+            &zc->blockState.matchState,
+            ws,
+            &params.cParams,
+            crp,
+            needsIndexReset,
+            ZSTD_resetTarget_CCtx), "");
+
+        /* ldm hash table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+            zc->maxNbLdmSequences = maxNbLdmSeq;
+
+            ZSTD_window_init(&zc->ldmState.window);
+            ZSTD_window_clear(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
+        }
+
+        /* Due to alignment, when reusing a workspace, we can actually consume
+         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
+         */
+        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
+               ZSTD_cwksp_used(ws) <= neededSpace + 3);
+
+        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+        zc->initialized = 1;
+
+        return 0;
+    }
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+}
+
+/* These are the approximate sizes for each strategy past which copying the
+ * dictionary tables into the working context is faster than using them
+ * in-place.
+ */
+static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
+    8 KB,  /* unused */
+    8 KB,  /* ZSTD_fast */
+    16 KB, /* ZSTD_dfast */
+    32 KB, /* ZSTD_greedy */
+    32 KB, /* ZSTD_lazy */
+    32 KB, /* ZSTD_lazy2 */
+    32 KB, /* ZSTD_btlazy2 */
+    32 KB, /* ZSTD_btopt */
+    8 KB,  /* ZSTD_btultra */
+    8 KB   /* ZSTD_btultra2 */
+};
+
+static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
+                                 const ZSTD_CCtx_params* params,
+                                 U64 pledgedSrcSize)
+{
+    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
+    int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
+    return dedicatedDictSearch
+        || ( ( pledgedSrcSize <= cutoff
+            || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+            || params->attachDictPref == ZSTD_dictForceAttach )
+          && params->attachDictPref != ZSTD_dictForceCopy
+          && !params->forceWindow ); /* dictMatchState isn't correctly
+                                      * handled in _enforceMaxDist */
+}
+
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
+{
+    {
+        ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
+        unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Resize working context table params for input only, since the dict
+         * has its own tables. */
+        /* pledgedSrcSize == 0 means 0! */
+
+        if (cdict->matchState.dedicatedDictSearch) {
+            ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
+        }
+
+        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_makeClean, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
+    }
+
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+                                  - cdict->matchState.window.base);
+        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
+        if (cdictLen == 0) {
+            /* don't even attach dictionaries with no contents */
+            DEBUGLOG(4, "skipping attaching empty dictionary");
+        } else {
+            DEBUGLOG(4, "attaching dictionary into context");
+            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
+
+            /* prep working match state so dict matches never have negative indices
+             * when they are translated to the working context's index space. */
+            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
+                cctx->blockState.matchState.window.nextSrc =
+                    cctx->blockState.matchState.window.base + cdictEnd;
+                ZSTD_window_clear(&cctx->blockState.matchState.window);
+            }
+            /* loadedDictEnd is expressed within the referential of the active context */
+            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
+    }   }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            ZSTD_CCtx_params params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+
+    assert(!cdict->matchState.dedicatedDictSearch);
+
+    DEBUGLOG(4, "copying dictionary into context");
+
+    {   unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Copy only compression parameters related to tables. */
+        params.cParams = *cdict_cParams;
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_leaveDirty, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
+        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+
+        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
+               cdict->matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+               cdict->matchState.chainTable,
+               chainSize * sizeof(U32));
+    }
+
+    /* Zero the hashTable3, since the cdict never fills it */
+    {   int const h3log = cctx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+        assert(cdict->matchState.hashLog3 == 0);
+        ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
+    /* copy dictionary offsets */
+    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+/* We have a choice between copying the dictionary context into the working
+ * context, or referencing the dictionary context from the working context
+ * in-place. We decide here which strategy to use. */
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            const ZSTD_CCtx_params* params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+
+    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
+                (unsigned)pledgedSrcSize);
+
+    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
+        return ZSTD_resetCCtx_byAttachingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    } else {
+        return ZSTD_resetCCtx_byCopyingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    }
+}
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  The "context", in this case, refers to the hash and chain tables,
+ *  entropy tables, and dictionary references.
+ * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
+ * @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
+    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+                    "Can't copy a ctx that's not in init stage.");
+
+    ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
+        /* Copy only compression parameters related to tables. */
+        params.cParams = srcCCtx->appliedParams.cParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_leaveDirty, zbuff);
+        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
+        int const h3log = srcCCtx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
+               srcCCtx->blockState.matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
+               srcCCtx->blockState.matchState.chainTable,
+               chainSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
+               srcCCtx->blockState.matchState.hashTable3,
+               h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
+    /* copy dictionary offsets */
+    {
+        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
+        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+    dstCCtx->dictID = srcCCtx->dictID;
+    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
+
+    return 0;
+}
+
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
+                                fParams, pledgedSrcSize,
+                                zbuff);
+}
+
+
+#define ZSTD_ROWSIZE 16
+/*! ZSTD_reduceTable() :
+ *  reduce table indexes by `reducerValue`, or squash to zero.
+ *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ *  It must be set to a clear 0/1 value, to remove branch during inlining.
+ *  Presume table size is a multiple of ZSTD_ROWSIZE
+ *  to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+    int const nbRows = (int)size / ZSTD_ROWSIZE;
+    int cellNb = 0;
+    int rowNb;
+    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
+    assert(size < (1U<<31));   /* can be casted to int */
+
+
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            if (preserveMark) {
+                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+                table[cellNb] += adder;
+            }
+            if (table[cellNb] < reducerValue) table[cellNb] = 0;
+            else table[cellNb] -= reducerValue;
+            cellNb++;
+    }   }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
+}
+
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
+{
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
+        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+    }
+
+    if (params->cParams.strategy != ZSTD_fast) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
+            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+        else
+            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+    }
+
+    if (ms->hashLog3) {
+        U32 const h3Size = (U32)1 << ms->hashLog3;
+        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
+    }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+{
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    BYTE* const llCodeTable = seqStorePtr->llCode;
+    BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    U32 u;
+    assert(nbSeq <= seqStorePtr->maxNbSeq);
+    for (u=0; u<nbSeq; u++) {
+        U32 const llv = sequences[u].litLength;
+        U32 const mlv = sequences[u].matchLength;
+        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
+    }
+    if (seqStorePtr->longLengthID==1)
+        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    if (seqStorePtr->longLengthID==2)
+        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+    return (cctxParams->targetCBlockSize != 0);
+}
+
+/* ZSTD_entropyCompressSequences_internal():
+ * actually compresses both literals and sequences */
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+                          const ZSTD_entropyCTables_t* prevEntropy,
+                                ZSTD_entropyCTables_t* nextEntropy,
+                          const ZSTD_CCtx_params* cctxParams,
+                                void* dst, size_t dstCapacity,
+                                void* entropyWorkspace, size_t entropyWkspSize,
+                          const int bmi2)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    unsigned* count = (unsigned*)entropyWorkspace;
+    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    BYTE* seqHead;
+    BYTE* lastNCount = NULL;
+
+    entropyWorkspace = count + (MaxSeq + 1);
+    entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
+
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    &prevEntropy->huf, &nextEntropy->huf,
+                                    cctxParams->cParams.strategy,
+                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    entropyWorkspace, entropyWkspSize,
+                                    bmi2);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+        assert(cSize <= dstCapacity);
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+    if (nbSeq < 128) {
+        *op++ = (BYTE)nbSeq;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
+    assert(op <= oend);
+    if (nbSeq==0) {
+        /* Copy the old tables over as if we repeated them */
+        ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+        return (size_t)(op - ostart);
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+    assert(op <= oend);
+
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->fse.litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                count, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->fse.litlengthCTable,
+                sizeof(prevEntropy->fse.litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->fse.offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                count, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->fse.offcodeCTable,
+                sizeof(prevEntropy->fse.offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                count, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->fse.matchlengthCTable,
+                sizeof(prevEntropy->fse.matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, (size_t)(oend - op),
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        assert(op <= oend);
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+        if (lastNCount && (op - lastNCount) < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(op - lastNCount == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
+    return (size_t)(op - ostart);
+}
+
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+                       const ZSTD_entropyCTables_t* prevEntropy,
+                             ZSTD_entropyCTables_t* nextEntropy,
+                       const ZSTD_CCtx_params* cctxParams,
+                             void* dst, size_t dstCapacity,
+                             size_t srcSize,
+                             void* entropyWorkspace, size_t entropyWkspSize,
+                             int bmi2)
+{
+    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+                            dst, dstCapacity,
+                            entropyWorkspace, entropyWkspSize, bmi2);
+    if (cSize == 0) return 0;
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+     */
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+
+    /* Check compressibility */
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    return cSize;
+}
+
+/* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
+ * assumption : strat is a valid strategy */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+{
+    static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast,
+          ZSTD_compressBlock_doubleFast,
+          ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy,
+          ZSTD_compressBlock_lazy2,
+          ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt,
+          ZSTD_compressBlock_btultra,
+          ZSTD_compressBlock_btultra2 },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict,
+          ZSTD_compressBlock_doubleFast_extDict,
+          ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,
+          ZSTD_compressBlock_lazy2_extDict,
+          ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict,
+          ZSTD_compressBlock_btultra_extDict,
+          ZSTD_compressBlock_btultra_extDict },
+        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+          ZSTD_compressBlock_fast_dictMatchState,
+          ZSTD_compressBlock_doubleFast_dictMatchState,
+          ZSTD_compressBlock_greedy_dictMatchState,
+          ZSTD_compressBlock_lazy_dictMatchState,
+          ZSTD_compressBlock_lazy2_dictMatchState,
+          ZSTD_compressBlock_btlazy2_dictMatchState,
+          ZSTD_compressBlock_btopt_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState },
+        { NULL  /* default for 0 */,
+          NULL,
+          NULL,
+          ZSTD_compressBlock_greedy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+          NULL,
+          NULL,
+          NULL,
+          NULL }
+    };
+    ZSTD_blockCompressor selectedCompressor;
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    assert(selectedCompressor != NULL);
+    return selectedCompressor;
+}
+
+static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
+                                   const BYTE* anchor, size_t lastLLSize)
+{
+    ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
+    seqStorePtr->lit += lastLLSize;
+}
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->sequences = ssPtr->sequencesStart;
+    ssPtr->longLengthID = 0;
+}
+
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    /* Assert that we have correctly flushed the ctx params into the ms's copy */
+    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+        } else {
+            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        }
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
+    }
+    ZSTD_resetSeqStore(&(zc->seqStore));
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
+    /* a gap between an attached dict and the current window is not safe,
+     * they must remain adjacent,
+     * and when that stops being the case, the dict must be unset */
+    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
+
+    /* limited update after a very long match */
+    {   const BYTE* const base = ms->window.base;
+        const BYTE* const istart = (const BYTE*)src;
+        const U32 curr = (U32)(istart-base);
+        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
+        if (curr > ms->nextToUpdate + 384)
+            ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
+    }
+
+    /* select and store sequences */
+    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
+        size_t lastLLSize;
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
+        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+            assert(!zc->appliedParams.ldmParams.enableLdm);
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&zc->externSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+        } else if (zc->appliedParams.ldmParams.enableLdm) {
+            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+
+            ldmSeqStore.seq = zc->ldmSequences;
+            ldmSeqStore.capacity = zc->maxNbLdmSequences;
+            /* Updates ldmSeqStore.size */
+            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+                                               &zc->appliedParams.ldmParams,
+                                               src, srcSize), "");
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&ldmSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(ldmSeqStore.pos == ldmSeqStore.size);
+        } else {   /* not long range mode */
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ms->ldmSeqStore = NULL;
+            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+        }
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+    return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+    size_t literalsRead = 0;
+    size_t lastLLSize;
+
+    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    size_t i;
+    repcodes_t updatedRepcodes;
+
+    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+    /* Ensure we have enough space for last literals "sequence" */
+    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (i = 0; i < seqStoreSeqSize; ++i) {
+        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].rep = 0;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthID == 1) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthID == 2) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+            /* Derive the correct offset corresponding to a repcode */
+            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            if (outSeqs[i].litLength != 0) {
+                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+            } else {
+                if (outSeqs[i].rep == 3) {
+                    rawOffset = updatedRepcodes.rep[0] - 1;
+                } else {
+                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+                }
+            }
+        }
+        outSeqs[i].offset = rawOffset;
+        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+           so we provide seqStoreSeqs[i].offset - 1 */
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
+                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].litLength == 0);
+        literalsRead += outSeqs[i].litLength;
+    }
+    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+     * for the block boundary, according to the API.
+     */
+    assert(seqStoreLiteralsSize >= literalsRead);
+    lastLLSize = seqStoreLiteralsSize - literalsRead;
+    outSeqs[i].litLength = (U32)lastLLSize;
+    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+    seqStoreSeqSize++;
+    zc->seqCollector.seqIndex += seqStoreSeqSize;
+}
+
+size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                              size_t outSeqsSize, const void* src, size_t srcSize)
+{
+    const size_t dstCapacity = ZSTD_compressBound(srcSize);
+    void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
+    SeqCollector seqCollector;
+
+    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqIndex = 0;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+    ZSTD_customFree(dst, ZSTD_defaultCMem);
+    return zc->seqCollector.seqIndex;
+}
+
+size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
+    size_t in = 0;
+    size_t out = 0;
+    for (; in < seqsSize; ++in) {
+        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
+            if (in != seqsSize - 1) {
+                sequences[in+1].litLength += sequences[in].litLength;
+            }
+        } else {
+            sequences[out] = sequences[in];
+            ++out;
+        }
+    }
+    return out;
+}
+
+/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
+static int ZSTD_isRLE(const BYTE* src, size_t length) {
+    const BYTE* ip = src;
+    const BYTE value = ip[0];
+    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
+    const size_t unrollSize = sizeof(size_t) * 4;
+    const size_t unrollMask = unrollSize - 1;
+    const size_t prefixLength = length & unrollMask;
+    size_t i;
+    size_t u;
+    if (length == 1) return 1;
+    /* Check if prefix is RLE first before using unrolled loop */
+    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+        return 0;
+    }
+    for (i = prefixLength; i != length; i += unrollSize) {
+        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+            if (MEM_readST(ip + i + u) != valueST) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+{
+    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+    zc->blockState.nextCBlock = tmp;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame)
+{
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        return 0;
+    }
+
+    /* encode sequences and literals */
+    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            srcSize,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+            zc->bmi2);
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+
+    if (frame &&
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+         * This is only an issue for zstd <= v1.4.3
+         */
+        !zc->isFirstBlock &&
+        cSize < rleMaxLength &&
+        ZSTD_isRLE(ip, srcSize))
+    {
+        cSize = 1;
+        op[0] = ip[0];
+    }
+
+out:
+    if (!ZSTD_isError(cSize) && cSize > 1) {
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+    }
+    /* We check that dictionaries have offset codes available for the first
+     * block. After the first block, the offcode table might not have large
+     * enough codes to represent the offsets in the data.
+     */
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const size_t bss, U32 lastBlock)
+{
+    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+    if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
+        /* Attempt superblock compression.
+         *
+         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+         * standard ZSTD_compressBound(). This is a problem, because even if we have
+         * space now, taking an extra byte now could cause us to run out of space later
+         * and violate ZSTD_compressBound().
+         *
+         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+         *
+         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+         * uncompressed block in these cases:
+         *   * cSize == 0: Return code for an uncompressed block.
+         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+         *     output space.
+         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+         *     emit an uncompressed block.
+         */
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    return cSize;
+                }
+            }
+        }
+    }
+
+    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+    /* Superblock compression failed, attempt to emit a single no compress block.
+     * The decoder will be able to stream this block since it is uncompressed.
+     */
+    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               U32 lastBlock)
+{
+    size_t cSize = 0;
+    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         void const* ip,
+                                         void const* iend)
+{
+    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+        U32 const maxDist = (U32)1 << params->cParams.windowLog;
+        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+        ZSTD_reduceIndex(ms, params, correction);
+        ZSTD_cwksp_mark_tables_clean(ws);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+/*! ZSTD_compress_frameChunk() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     U32 lastFrameChunk)
+{
+    size_t blockSize = cctx->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
+
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
+
+    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
+        xxh64_update(&cctx->xxhState, src, srcSize);
+
+    while (remaining) {
+        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+
+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
+                        dstSize_tooSmall,
+                        "not enough space to store compressed block");
+        if (remaining < blockSize) blockSize = remaining;
+
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+
+        {   size_t cSize;
+            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+                assert(cSize > 0);
+                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else {
+                cSize = ZSTD_compressBlock_internal(cctx,
+                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+                                        ip, blockSize, 1 /* frame */);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+                if (cSize == 0) {  /* block is not compressible */
+                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                } else {
+                    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+                    MEM_writeLE24(op, cBlockHeader);
+                    cSize += ZSTD_blockHeaderSize;
+                }
+            }
+
+
+            ip += blockSize;
+            assert(remaining >= blockSize);
+            remaining -= blockSize;
+            op += cSize;
+            assert(dstCapacity >= cSize);
+            dstCapacity -= cSize;
+            cctx->isFirstBlock = 0;
+            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+                        (unsigned)cSize);
+    }   }
+
+    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
+    return (size_t)(op-ostart);
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
+{   BYTE* const op = (BYTE*)dst;
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
+    U32   const checksumFlag = params->fParams.checksumFlag>0;
+    U32   const windowSize = (U32)1 << params->cParams.windowLog;
+    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+    U32   const fcsCode = params->fParams.contentSizeFlag ?
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
+    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
+    size_t pos=0;
+
+    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+                    "dst buf is too small to fit worst-case frame header size.");
+    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+    if (params->format == ZSTD_f_zstd1) {
+        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+        pos = 4;
+    }
+    op[pos++] = frameHeaderDescriptionByte;
+    if (!singleSegment) op[pos++] = windowLogByte;
+    switch(dictIDSizeCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : break;
+        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
+        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
+        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
+    }
+    switch(fcsCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
+        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
+        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
+        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
+    }
+    return pos;
+}
+
+/* ZSTD_writeSkippableFrame_advanced() :
+ * Writes out a skippable frame with the specified magic number variant (16 are supported),
+ * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
+ *
+ * Returns the total number of bytes written, or a ZSTD error code.
+ */
+size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant) {
+    BYTE* op = (BYTE*)dst;
+    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
+                    dstSize_tooSmall, "Not enough room for skippable frame");
+    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
+    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
+
+    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
+    MEM_writeLE32(op+4, (U32)srcSize);
+    ZSTD_memcpy(op+8, src, srcSize);
+    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
+}
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+                    "dst buf is too small to write frame trailer empty block.");
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+                    "wrong cctx stage");
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+                    parameter_unsupported,
+                    "incompatible with ldm");
+    cctx->externSeqStore.seq = seq;
+    cctx->externSeqStore.size = nbSeq;
+    cctx->externSeqStore.capacity = nbSeq;
+    cctx->externSeqStore.pos = 0;
+    cctx->externSeqStore.posInSequence = 0;
+    return 0;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame, U32 lastFrameChunk)
+{
+    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+    size_t fhSize = 0;
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+                cctx->stage, (unsigned)srcSize);
+    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
+                    "missing init (ZSTD_compressBegin)");
+
+    if (frame && (cctx->stage==ZSTDcs_init)) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
+                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        assert(fhSize <= dstCapacity);
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
+
+    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+        ms->nextToUpdate = ms->window.dictLimit;
+    }
+    if (cctx->appliedParams.ldmParams.enableLdm) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+    }
+
+    if (!frame) {
+        /* overflow check and correction for block mode */
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams,
+            src, (BYTE const*)src + srcSize);
+    }
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
+        cctx->consumedSrcSize += srcSize;
+        cctx->producedCSize += (cSize + fhSize);
+        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+            RETURN_ERROR_IF(
+                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
+                srcSize_wrong,
+                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+                (unsigned)cctx->pledgedSrcSizePlusOne-1,
+                (unsigned)cctx->consumedSrcSize);
+        }
+        return cSize + fhSize;
+    }
+}
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+}
+
+
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+    assert(!ZSTD_checkCParams(cParams));
+    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
+}
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ldmState_t* ls,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+
+    ZSTD_window_update(&ms->window, src, srcSize);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+
+    if (params->ldmParams.enableLdm && ls != NULL) {
+        ZSTD_window_update(&ls->window, src, srcSize);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize <= HASH_READ_SIZE) return 0;
+
+    while (iend - ip > HASH_READ_SIZE) {
+        size_t const remaining = (size_t)(iend - ip);
+        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+        const BYTE* const ichunk = ip + chunk;
+
+        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+
+        if (params->ldmParams.enableLdm && ls != NULL)
+            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+
+        switch(params->cParams.strategy)
+        {
+        case ZSTD_fast:
+            ZSTD_fillHashTable(ms, ichunk, dtlm);
+            break;
+        case ZSTD_dfast:
+            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+            break;
+
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
+                assert(chunk == remaining); /* must load everything in one go */
+                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
+            } else if (chunk >= HASH_READ_SIZE) {
+                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+            }
+            break;
+
+        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+            break;
+
+        default:
+            assert(0);  /* not possible : not a valid strategy id */
+        }
+
+        ip = ichunk;
+    }
+
+    ms->nextToUpdate = (U32)(iend - ms->window.base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+ * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
+ * and only dictionaries with 100% valid symbols can be assumed valid.
+ */
+static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) {
+        return FSE_repeat_check;
+    }
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) {
+            return FSE_repeat_check;
+        }
+    }
+    return FSE_repeat_valid;
+}
+
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize)
+{
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    dictPtr += 8;
+    bs->entropy.huf.repeatMode = HUF_repeat_check;
+
+    {   unsigned maxSymbolValue = 255;
+        unsigned hasZeroWeights = 1;
+        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+            dictEnd-dictPtr, &hasZeroWeights);
+
+        /* We only set the loaded table as valid if it contains all non-zero
+         * weights. Otherwise, we set it to check */
+        if (!hasZeroWeights)
+            bs->entropy.huf.repeatMode = HUF_repeat_valid;
+
+        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+        dictPtr += hufHeaderSize;
+    }
+
+    {   unsigned offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        /* fill all offset symbols to avoid garbage at end of table */
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.offcodeCTable,
+                offcodeNCount, MaxOff, offcodeLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.matchlengthCTable,
+                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.litlengthCTable,
+                litlengthNCount, litlengthMaxValue, litlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    bs->rep[0] = MEM_readLE32(dictPtr+0);
+    bs->rep[1] = MEM_readLE32(dictPtr+4);
+    bs->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
+        bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
+
+        /* All repCodes must be <= dictContentSize and != 0 */
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+    }   }   }
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_cwksp* ws,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    size_t dictID;
+    size_t eSize;
+
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(dictSize >= 8);
+    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+    eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
+    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+    dictPtr += eSize;
+
+    {
+        size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
+    }
+    return dictID;
+}
+
+/* ZSTD_compress_insertDictionary() :
+*   @return : dictID, or an error code */
+static size_t
+ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                               ZSTD_matchState_t* ms,
+                               ldmState_t* ls,
+                               ZSTD_cwksp* ws,
+                         const ZSTD_CCtx_params* params,
+                         const void* dict, size_t dictSize,
+                               ZSTD_dictContentType_e dictContentType,
+                               ZSTD_dictTableLoadMethod_e dtlm,
+                               void* workspace)
+{
+    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+    if ((dict==NULL) || (dictSize<8)) {
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        return 0;
+    }
+
+    ZSTD_reset_compressedBlockState(bs);
+
+    /* dict restricted modes */
+    if (dictContentType == ZSTD_dct_rawContent)
+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
+
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictContentType == ZSTD_dct_auto) {
+            DEBUGLOG(4, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(
+                ms, ls, ws, params, dict, dictSize, dtlm);
+        }
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        assert(0);   /* impossible */
+    }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(
+        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
+}
+
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
+
+/*! ZSTD_compressBegin_internal() :
+ * @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
+                                    ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if ( (cdict)
+      && (cdict->dictContentSize > 0)
+      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+        || cdict->compressionLevel == 0)
+      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
+        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
+    }
+
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+                                     ZSTDcrp_makeClean, zbuff) , "");
+    {   size_t const dictID = cdict ?
+                ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
+                        cctx->entropyWorkspace)
+              : ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+                        dictContentType, dtlm, cctx->entropyWorkspace);
+        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+        assert(dictID <= UINT_MAX);
+        cctx->dictID = (U32)dictID;
+        cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
+    /* compression parameters verification and optimization */
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
+    return ZSTD_compressBegin_internal(cctx,
+                                       dict, dictSize, dictContentType, dtlm,
+                                       cdict,
+                                       params, pledgedSrcSize,
+                                       ZSTDb_not_buffered);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compressBegin_advanced_internal(cctx,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            NULL /*cdict*/,
+                                            &cctxParams, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+{
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t fhSize = 0;
+
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
+    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+
+    /* special case : empty frame */
+    if (cctx->stage == ZSTDcs_init) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        dstCapacity -= fhSize;
+        op += fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (cctx->stage != ZSTDcs_ending) {
+        /* write one last empty block, make it the "last" block */
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+    }
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32(op, checksum);
+        op += 4;
+    }
+
+    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
+    return op-ostart;
+}
+
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+{
+    (void)cctx;
+    (void)extraCSize;
+}
+
+size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize)
+{
+    size_t endResult;
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
+    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
+    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
+    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+        DEBUGLOG(4, "end of frame : controlling src size");
+        RETURN_ERROR_IF(
+            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
+            srcSize_wrong,
+             "error : pledgedSrcSize = %u, while realSrcSize = %u",
+            (unsigned)cctx->pledgedSrcSizePlusOne-1,
+            (unsigned)cctx->consumedSrcSize);
+    }
+    ZSTD_CCtx_trace(cctx, endResult);
+    return cSize + endResult;
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compress_advanced");
+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctxParams);
+}
+
+/* Internal */
+size_t ZSTD_compress_advanced_internal(
+        ZSTD_CCtx* cctx,
+        void* dst, size_t dstCapacity,
+        const void* src, size_t srcSize,
+        const void* dict,size_t dictSize,
+        const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                         params, srcSize, ZSTDb_not_buffered) , "");
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict, size_t dictSize,
+                               int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
+        assert(params.fParams.contentSizeFlag == 1);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
+    assert(cctx != NULL);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity,
+               const void* src, size_t srcSize,
+                     int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
+    result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtx(cctx);
+    return result;
+}
+
+
+/* =====  Dictionary API  ===== */
+
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(
+        size_t dictSize, ZSTD_compressionParameters cParams,
+        ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
+    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
+}
+
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support sizeof on NULL */
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
+    /* cdict may be in the workspace */
+    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+        + ZSTD_cwksp_sizeof(&cdict->workspace);
+}
+
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    ZSTD_dictLoadMethod_e dictLoadMethod,
+                    ZSTD_dictContentType_e dictContentType,
+                    ZSTD_CCtx_params params)
+{
+    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
+    assert(!ZSTD_checkCParams(params.cParams));
+    cdict->matchState.cParams = params.cParams;
+    cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
+    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
+        cdict->matchState.dedicatedDictSearch = 0;
+    }
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictContent = dictBuffer;
+    } else {
+         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
+        cdict->dictContent = internalBuffer;
+        ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
+
+    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
+    /* Reset the state to no dictionary */
+    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+    FORWARD_IF_ERROR(ZSTD_reset_matchState(
+        &cdict->matchState,
+        &cdict->workspace,
+        &params.cParams,
+        ZSTDcrp_makeClean,
+        ZSTDirp_reset,
+        ZSTD_resetTarget_CDict), "");
+    /* (Maybe) load the dictionary
+     * Skips loading the dictionary if it is < 8 bytes.
+     */
+    {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+        params.fParams.contentSizeFlag = 1;
+        {   size_t const dictID = ZSTD_compress_insertDictionary(
+                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                    &params, cdict->dictContent, cdict->dictContentSize,
+                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+            assert(dictID <= (size_t)(U32)-1);
+            cdict->dictID = (U32)dictID;
+        }
+    }
+
+    return 0;
+}
+
+static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   size_t const workspaceSize =
+            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            (dictLoadMethod == ZSTD_dlm_byRef ? 0
+             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
+        void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
+        ZSTD_cwksp ws;
+        ZSTD_CDict* cdict;
+
+        if (!workspace) {
+            ZSTD_customFree(workspace, customMem);
+            return NULL;
+        }
+
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
+
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        assert(cdict != NULL);
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+        cdict->customMem = customMem;
+        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
+
+        return cdict;
+    }
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
+    ZSTD_CCtxParams_init(&cctxParams, 0);
+    cctxParams.cParams = cParams;
+    cctxParams.customMem = customMem;
+    return ZSTD_createCDict_advanced2(
+        dictBuffer, dictSize,
+        dictLoadMethod, dictContentType,
+        &cctxParams, customMem);
+}
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+        const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod,
+        ZSTD_dictContentType_e dictContentType,
+        const ZSTD_CCtx_params* originalCctxParams,
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams = *originalCctxParams;
+    ZSTD_compressionParameters cParams;
+    ZSTD_CDict* cdict;
+
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    if (cctxParams.enableDedicatedDictSearch) {
+        cParams = ZSTD_dedicatedDictSearch_getCParams(
+            cctxParams.compressionLevel, dictSize);
+        ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
+    } else {
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
+        /* Fall back to non-DDSS params */
+        cctxParams.enableDedicatedDictSearch = 0;
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    cctxParams.cParams = cParams;
+
+    cdict = ZSTD_createCDict_advanced_internal(dictSize,
+                        dictLoadMethod, cctxParams.cParams,
+                        customMem);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                    dict, dictSize,
+                                    dictLoadMethod, dictContentType,
+                                    cctxParams) )) {
+        ZSTD_freeCDict(cdict);
+        return NULL;
+    }
+
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
+                                                  cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = cdict->customMem;
+        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+        ZSTD_cwksp_free(&cdict->workspace, cMem);
+        if (!cdictInWorkspace) {
+            ZSTD_customFree(cdict, cMem);
+        }
+        return 0;
+    }
+}
+
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+const ZSTD_CDict* ZSTD_initStaticCDict(
+                                 void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod,
+                                 ZSTD_dictContentType_e dictContentType,
+                                 ZSTD_compressionParameters cParams)
+{
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+                            + matchStateSize;
+    ZSTD_CDict* cdict;
+    ZSTD_CCtx_params params;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+
+    {
+        ZSTD_cwksp ws;
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        if (cdict == NULL) return NULL;
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+    }
+
+    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    ZSTD_CCtxParams_init(&params, 0);
+    params.cParams = cParams;
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              dictLoadMethod, dictContentType,
+                                              params) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+    assert(cdict != NULL);
+    return cdict->matchState.cParams;
+}
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;
+    return cdict->dictID;
+}
+
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    /* Initialize the cctxParams from the cdict */
+    {
+        ZSTD_parameters params;
+        params.fParams = fParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 ) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
+    }
+    /* Increase window log to fit the entire dictionary and source if the
+     * source size is known. Limit the increase to 19, which is the
+     * window log for compression level 1 with the largest source size.
+     */
+    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
+    }
+    return ZSTD_compressBegin_internal(cctx,
+                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                        cdict,
+                                        &cctxParams, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+ZSTD_CStream* ZSTD_createCStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createCStream");
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
+}
+
+ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
+{
+    return ZSTD_freeCCtx(zcs);   /* same object */
+}
+
+
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
+{
+    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
+        return ZSTD_cpm_attachDict;
+    else
+        return ZSTD_cpm_noAttachDict;
+}
+
+/* ZSTD_resetCStream():
+ * pledgedSrcSize == 0 means "unknown" */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : for lib/compress only. Used by zstdmt_compress.c.
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    const ZSTD_CCtx_params* params,
+                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_internal");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    zcs->requestedParams = *params;
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if (dict) {
+        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    } else {
+        /* Dictionary is cleared if !cdict */
+        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    }
+    return 0;
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    zcs->requestedParams.fParams = fParams;
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+
+/* ZSTD_initCStream_advanced() :
+ * pledgedSrcSize must be exact.
+ * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_parameters params, unsigned long long pss)
+{
+    /* for compatibility with older programs relying on this behavior.
+     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
+     * This line will be removed in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    return 0;
+}
+
+/*======   Compression   ======*/
+
+static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+{
+    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+    if (hintInSize==0) hintInSize = cctx->blockSize;
+    return hintInSize;
+}
+
+/* ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants
+ *  non-static, because can be called from zstdmt_compress.c
+ * @return : hint size for next input */
+static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective const flushMode)
+{
+    const char* const istart = (const char*)input->src;
+    const char* const iend = input->size != 0 ? istart + input->size : istart;
+    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+    char* const ostart = (char*)output->dst;
+    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+    char* op = output->pos != 0 ? ostart + output->pos : ostart;
+    U32 someMoreWork = 1;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->inBuff != NULL);
+        assert(zcs->inBuffSize > 0);
+    }
+    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->outBuff !=  NULL);
+        assert(zcs->outBuffSize > 0);
+    }
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
+    assert((U32)flushMode <= (U32)ZSTD_e_end);
+
+    while (someMoreWork) {
+        switch(zcs->streamStage)
+        {
+        case zcss_init:
+            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
+
+        case zcss_load:
+            if ( (flushMode == ZSTD_e_end)
+              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
+                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer in buffered mode */
+            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
+                zcs->inBuffPos += loaded;
+                if (loaded != 0)
+                    ip += loaded;
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
+                void* cDst;
+                size_t cSize;
+                size_t oSize = oend-op;
+                size_t const iSize = inputBuffered
+                    ? zcs->inBuffPos - zcs->inToCompress
+                    : MIN((size_t)(iend - ip), zcs->blockSize);
+                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
+                else
+                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+                if (inputBuffered) {
+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize);
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    /* prepare next block */
+                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                    if (zcs->inBuffTarget > zcs->inBuffSize)
+                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                    if (!lastBlock)
+                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                    zcs->inToCompress = zcs->inBuffPos;
+                } else {
+                    unsigned const lastBlock = (ip + iSize == iend);
+                    assert(flushMode == ZSTD_e_end /* Already validated */);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+                    /* Consume the input prior to error checking to mirror buffered mode. */
+                    if (iSize > 0)
+                        ip += iSize;
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    if (lastBlock)
+                        assert(ip == iend);
+                }
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    }
+                    break;
+                }
+                zcs->outBuffContentSize = cSize;
+                zcs->outBuffFlushedSize = 0;
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
+            }
+	    ZSTD_FALLTHROUGH;
+        case zcss_flush:
+            DEBUGLOG(5, "flush stage");
+            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
+            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
+                if (flushed)
+                    op += flushed;
+                zcs->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
+                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
+                break;
+            }
+
+        default: /* impossible */
+            assert(0);
+        }
+    }
+
+    input->pos = ip - istart;
+    output->pos = op - ostart;
+    if (zcs->frameEnded) return 0;
+    return ZSTD_nextInputSizeHint(zcs);
+}
+
+static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
+{
+    return ZSTD_nextInputSizeHint(cctx);
+
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
+    return ZSTD_nextInputSizeHint_MTorST(zcs);
+}
+
+/* After a compression call set the expected input/output buffer.
+ * This is validated at the start of the next compression call.
+ */
+static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        cctx->expectedInBuffer = *input;
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        cctx->expectedOutBufferSize = output->size - output->pos;
+    }
+}
+
+/* Validate that the input/output buffers match the expectations set by
+ * ZSTD_setBufferExpectations.
+ */
+static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+                                        ZSTD_outBuffer const* output,
+                                        ZSTD_inBuffer const* input,
+                                        ZSTD_EndDirective endOp)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+        if (endOp != ZSTD_e_end)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        size_t const outBufferSize = output->size - output->pos;
+        if (cctx->expectedOutBufferSize != outBufferSize)
+            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+    }
+    return 0;
+}
+
+static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                             ZSTD_EndDirective endOp,
+                                             size_t inSize) {
+    ZSTD_CCtx_params params = cctx->requestedParams;
+    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+    if (cctx->cdict)
+        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+    {
+        size_t const dictSize = prefixDict.dict
+                ? prefixDict.dictSize
+                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &params, cctx->pledgedSrcSizePlusOne-1,
+                dictSize, mode);
+    }
+
+    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
+        /* Enable LDM by default for optimal parser and window size >= 128MB */
+        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
+        params.ldmParams.enableLdm = 1;
+    }
+
+    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
+                cctx->cdict,
+                &params, pledgedSrcSize,
+                ZSTDb_buffered) , "");
+        assert(cctx->appliedParams.nbWorkers == 0);
+        cctx->inToCompress = 0;
+        cctx->inBuffPos = 0;
+        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+            /* for small input: avoid automatic flush on reaching end of block, since
+            * it would require to add a 3-bytes null block to end frame
+            */
+            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+        } else {
+            cctx->inBuffTarget = 0;
+        }
+        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->frameEnded = 0;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp)
+{
+    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
+    /* check conditions */
+    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
+    RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
+    RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
+    assert(cctx != NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
+    }
+    /* end of transparent initialization stage */
+
+    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
+    /* compression stage */
+    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
+    DEBUGLOG(5, "completed ZSTD_compressStream2");
+    ZSTD_setBufferExpectations(cctx, output, input);
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+
+size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
+    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
+    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    /* Enable stable input/output buffers. */
+    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
+    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
+    {   size_t oPos = 0;
+        size_t iPos = 0;
+        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
+                                        dst, dstCapacity, &oPos,
+                                        src, srcSize, &iPos,
+                                        ZSTD_e_end);
+        /* Reset to the original values. */
+        cctx->requestedParams.inBufferMode = originalInBufferMode;
+        cctx->requestedParams.outBufferMode = originalOutBufferMode;
+        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+        if (result != 0) {  /* compression not completed, due to lack of output space */
+            assert(oPos == dstCapacity);
+            RETURN_ERROR(dstSize_tooSmall, "");
+        }
+        assert(iPos == srcSize);   /* all input is expected consumed */
+        return oPos;
+    }
+}
+
+typedef struct {
+    U32 idx;             /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;   /* Position within sequence at idx */
+    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
+    size_t offsetBound;
+    U32 windowSize = 1 << windowLog;
+    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+     * As long as the amount of data decoded is less than or equal to window size, offsets may be
+     * larger than the total length of output decoded in order to reference the dict, even larger than
+     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+     */
+    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    return 0;
+}
+
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
+    U32 offCode = rawOffset + ZSTD_REP_MOVE;
+    U32 repCode = 0;
+
+    if (!ll0 && rawOffset == rep[0]) {
+        repCode = 1;
+    } else if (rawOffset == rep[1]) {
+        repCode = 2 - ll0;
+    } else if (rawOffset == rep[2]) {
+        repCode = 3 - ll0;
+    } else if (ll0 && rawOffset == rep[0] - 1) {
+        repCode = 3;
+    }
+    if (repCode) {
+        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
+        offCode = repCode - 1;
+    }
+    return offCode;
+}
+
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
+static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                             const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    BYTE const* ip = (BYTE const*)(src);
+    const BYTE* const iend = ip + blockSize;
+    repcodes_t updatedRepcodes;
+    U32 dictSize;
+    U32 litLength;
+    U32 matchLength;
+    U32 ll0;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = (U32)cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = (U32)cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+        litLength = inSeqs[idx].litLength;
+        matchLength = inSeqs[idx].matchLength;
+        ll0 = litLength == 0;
+        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
+                                                "Sequence validation failed");
+        }
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    if (inSeqs[idx].litLength) {
+        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
+        ip += inSeqs[idx].litLength;
+        seqPos->posInSrc += inSeqs[idx].litLength;
+    }
+    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    seqPos->idx = idx+1;
+    return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                       const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    U32 startPosInSequence = seqPos->posInSequence;
+    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
+    size_t dictSize;
+    BYTE const* ip = (BYTE const*)(src);
+    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
+    repcodes_t updatedRepcodes;
+    U32 bytesAdjustment = 0;
+    U32 finalMatchSplit = 0;
+    U32 litLength;
+    U32 matchLength;
+    U32 rawOffset;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+        const ZSTD_Sequence currSeq = inSeqs[idx];
+        litLength = currSeq.litLength;
+        matchLength = currSeq.matchLength;
+        rawOffset = currSeq.offset;
+
+        /* Modify the sequence depending on where endPosInSequence lies */
+        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+            if (startPosInSequence >= litLength) {
+                startPosInSequence -= litLength;
+                litLength = 0;
+                matchLength -= startPosInSequence;
+            } else {
+                litLength -= startPosInSequence;
+            }
+            /* Move to the next sequence */
+            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+            startPosInSequence = 0;
+            idx++;
+        } else {
+            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+               does not reach the end of the match. So, we have to split the sequence */
+            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+            if (endPosInSequence > litLength) {
+                U32 firstHalfMatchLength;
+                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
+                    /* Only ever split the match if it is larger than the block size */
+                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        firstHalfMatchLength -= bytesAdjustment;
+                    }
+                    matchLength = firstHalfMatchLength;
+                    /* Flag that we split the last match - after storing the sequence, exit the loop,
+                       but keep the value of endPosInSequence */
+                    finalMatchSplit = 1;
+                } else {
+                    /* Move the position in sequence backwards so that we don't split match, and break to store
+                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+                     * would cause the first half of the match to be too small
+                     */
+                    bytesAdjustment = endPosInSequence - currSeq.litLength;
+                    endPosInSequence = currSeq.litLength;
+                    break;
+                }
+            } else {
+                /* This sequence ends inside the literals, break to store the last literals */
+                break;
+            }
+        }
+        /* Check if this offset can be represented with a repcode */
+        {   U32 ll0 = (litLength == 0);
+            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        }
+
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
+                                                   "Sequence validation failed");
+        }
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+    seqPos->idx = idx;
+    seqPos->posInSequence = endPosInSequence;
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    iend -= bytesAdjustment;
+    if (ip != iend) {
+        /* Store any last literals */
+        U32 lastLLSize = (U32)(iend - ip);
+        assert(ip <= iend);
+        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
+        seqPos->posInSrc += lastLLSize;
+    }
+
+    return bytesAdjustment;
+}
+
+typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                       const void* src, size_t blockSize);
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
+    }
+    assert(sequenceCopier != NULL);
+    return sequenceCopier;
+}
+
+/* Compress, block-by-block, all of the sequences given.
+ *
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ */
+static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                              const void* src, size_t srcSize) {
+    size_t cSize = 0;
+    U32 lastBlock;
+    size_t blockSize;
+    size_t compressedSeqsSize;
+    size_t remaining = srcSize;
+    ZSTD_sequencePosition seqPos = {0, 0, 0};
+
+    BYTE const* ip = (BYTE const*)src;
+    BYTE* op = (BYTE*)dst;
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+
+    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
+    /* Special case: empty frame */
+    if (remaining == 0) {
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+        cSize += ZSTD_blockHeaderSize;
+    }
+
+    while (remaining) {
+        size_t cBlockSize;
+        size_t additionalByteAdjustment;
+        lastBlock = remaining <= cctx->blockSize;
+        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
+        ZSTD_resetSeqStore(&cctx->seqStore);
+        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
+
+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
+        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+        blockSize -= additionalByteAdjustment;
+
+        /* If blocks are too small, emit as a nocompress block */
+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+            cSize += cBlockSize;
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            continue;
+        }
+
+        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                &cctx->appliedParams,
+                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
+                                blockSize,
+                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                cctx->bmi2);
+        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
+
+        if (!cctx->isFirstBlock &&
+            ZSTD_maybeRLE(&cctx->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            compressedSeqsSize = 1;
+        }
+
+        if (compressedSeqsSize == 0) {
+            /* ZSTD_noCompressBlock writes the block header as well */
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
+        } else if (compressedSeqsSize == 1) {
+            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+            /* Write block header into beginning of block*/
+            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+            MEM_writeLE24(op, cBlockHeader);
+            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
+        }
+
+        cSize += cBlockSize;
+        DEBUGLOG(4, "cSize running total: %zu", cSize);
+
+        if (lastBlock) {
+            break;
+        } else {
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            cctx->isFirstBlock = 0;
+        }
+    }
+
+    return cSize;
+}
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
+                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                              const void* src, size_t srcSize) {
+    BYTE* op = (BYTE*)dst;
+    size_t cSize = 0;
+    size_t compressedBlocksSize = 0;
+    size_t frameHeaderSize = 0;
+
+    /* Transparent initialization stage, same as compressStream2() */
+    DEBUGLOG(3, "ZSTD_compressSequences()");
+    assert(cctx != NULL);
+    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    /* Begin writing output, starting with frame header */
+    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
+    op += frameHeaderSize;
+    dstCapacity -= frameHeaderSize;
+    cSize += frameHeaderSize;
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
+        xxh64_update(&cctx->xxhState, src, srcSize);
+    }
+    /* cSize includes block header size and compressed sequences size */
+    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
+                                                           op, dstCapacity,
+                                                           inSeqs, inSeqsSize,
+                                                           src, srcSize);
+    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
+    cSize += compressedBlocksSize;
+    dstCapacity -= compressedBlocksSize;
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32((char*)dst + cSize, checksum);
+        cSize += 4;
+    }
+
+    DEBUGLOG(3, "Final compressed size: %zu", cSize);
+    return cSize;
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+ * @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+}
+
+
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
+    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+    /* single thread mode : attempt to calculate remaining to flush more precisely */
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
+        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
+        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
+        return toFlush;
+    }
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
+    switch (cParams.strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+    return cParams;
+}
+
+static int ZSTD_dedicatedDictSearch_isSupported(
+        ZSTD_compressionParameters const* cParams)
+{
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog >= cParams->chainLog)
+        && (cParams->chainLog <= 24);
+}
+
+/*
+ * Reverses the adjustment applied to cparams when enabling dedicated dict
+ * search. This is used to recover the params set to be used in the working
+ * context. (Otherwise, those tables would also grow.)
+ */
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams) {
+    switch (cParams->strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+}
+
+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    }
+}
+
+/*! ZSTD_getCParams_internal() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ *        Use dictSize == 0 for unknown or unused.
+ *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
+    int row;
+    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
+
+    /* row */
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    else row = compressionLevel;
+
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        /* acceleration factor */
+        if (compressionLevel < 0) {
+            int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
+            cp.targetLength = (unsigned)(-clampedCompressionLevel);
+        }
+        /* refine parameters based on srcSize & dictSize */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
+    }
+}
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
+    ZSTD_memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    return params;
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
new file mode 100644
index 000000000000..685d2f996cc2
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_internal.h
@@ -0,0 +1,1188 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This header contains definitions
+ * that shall **only** be used by modules within lib/compress.
+ */
+
+#ifndef ZSTD_COMPRESS_H
+#define ZSTD_COMPRESS_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+#include "zstd_cwksp.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define kSearchStrength      8
+#define HASH_READ_SIZE       8
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
+                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+                                       It's not a big deal though : candidate will just be sorted again.
+                                       Additionally, candidate position 1 will be lost.
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+typedef struct ZSTD_prefixDict_s {
+    const void* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+} ZSTD_prefixDict;
+
+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_repeat repeatMode;
+} ZSTD_hufCTables_t;
+
+typedef struct {
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_fseCTables_t;
+
+typedef struct {
+    ZSTD_hufCTables_t huf;
+    ZSTD_fseCTables_t fse;
+} ZSTD_entropyCTables_t;
+
+typedef struct {
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
+} ZSTD_match_t;
+
+typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
+typedef struct {
+    int price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+
+typedef struct {
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+
+    U32  litSum;                 /* nb of literals */
+    U32  litLengthSum;           /* nb of litLength codes */
+    U32  matchLengthSum;         /* nb of matchLength codes */
+    U32  offCodeSum;             /* nb of offset codes */
+    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
+    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
+    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
+    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
+    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
+    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+} optState_t;
+
+typedef struct {
+  ZSTD_entropyCTables_t entropy;
+  U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+    BYTE const* nextSrc;    /* next block here to continue on current prefix */
+    BYTE const* base;       /* All regular indexes relative to this position */
+    BYTE const* dictBase;   /* extDict indexes relative to this position */
+    U32 dictLimit;          /* below that point, need extDict */
+    U32 lowLimit;           /* below that point, no more valid data */
+} ZSTD_window_t;
+
+typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+struct ZSTD_matchState_t {
+    ZSTD_window_t window;   /* State for window round buffer management */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
+    U32 nextToUpdate;       /* index from which to continue table update */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
+                               * dedicated dictionary search structure.
+                               */
+    optState_t opt;         /* optimal parser state */
+    const ZSTD_matchState_t* dictMatchState;
+    ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
+};
+
+typedef struct {
+    ZSTD_compressedBlockState_t* prevCBlock;
+    ZSTD_compressedBlockState_t* nextCBlock;
+    ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
+    U32 offset;
+    U32 checksum;
+} ldmEntry_t;
+
+typedef struct {
+    BYTE const* split;
+    U32 hash;
+    U32 checksum;
+    ldmEntry_t* bucket;
+} ldmMatchCandidate_t;
+
+#define LDM_BATCH_SIZE 64
+
+typedef struct {
+    ZSTD_window_t window;   /* State for the window round buffer management */
+    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
+    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
+    size_t splitIndices[LDM_BATCH_SIZE];
+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
+} ldmState_t;
+
+typedef struct {
+    U32 enableLdm;          /* 1 if enable long distance matching */
+    U32 hashLog;            /* Log size of hashTable */
+    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
+    U32 minMatchLength;     /* Minimum match length */
+    U32 hashRateLog;       /* Log number of entries to skip */
+    U32 windowLog;          /* Window log for the LDM */
+} ldmParams_t;
+
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
+struct ZSTD_CCtx_params_s {
+    ZSTD_format_e format;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+
+    int compressionLevel;
+    int forceWindow;           /* force back-references to respect limit of
+                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
+
+    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+
+    /* Multithreading: used to pass parameters to mtctx */
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
+
+    /* Long distance matching parameters */
+    ldmParams_t ldmParams;
+
+    /* Dedicated dict search algorithm trigger */
+    int enableDedicatedDictSearch;
+
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
+    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+    ZSTD_customMem customMem;
+};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+
+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+
+/*
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
+struct ZSTD_CCtx_s {
+    ZSTD_compressionStage_e stage;
+    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+    ZSTD_CCtx_params requestedParams;
+    ZSTD_CCtx_params appliedParams;
+    U32   dictID;
+    size_t dictContentSize;
+
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
+    size_t blockSize;
+    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    unsigned long long consumedSrcSize;
+    unsigned long long producedCSize;
+    struct xxh64_state xxhState;
+    ZSTD_customMem customMem;
+    ZSTD_threadPool* pool;
+    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;
+    int initialized;
+
+    seqStore_t seqStore;      /* sequences storage ptrs */
+    ldmState_t ldmState;      /* long distance matching state */
+    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
+    size_t maxNbLdmSequences;
+    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    ZSTD_blockState_t blockState;
+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
+    /* Dictionary */
+    ZSTD_localDict localDict;
+    const ZSTD_CDict* cdict;
+    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
+
+    /* Multi-threading */
+
+    /* Tracing */
+};
+
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+
+typedef enum {
+    ZSTD_noDict = 0,
+    ZSTD_extDict = 1,
+    ZSTD_dictMatchState = 2,
+    ZSTD_dedicatedDictSearch = 3
+} ZSTD_dictMode_e;
+
+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;
+
+typedef size_t (*ZSTD_blockCompressor) (
+        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+
+
+MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
+{
+    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                       8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 16, 17, 17, 18, 18, 19, 19,
+                                      20, 20, 20, 20, 21, 21, 21, 21,
+                                      22, 22, 22, 22, 22, 22, 22, 22,
+                                      23, 23, 23, 23, 23, 23, 23, 23,
+                                      24, 24, 24, 24, 24, 24, 24, 24,
+                                      24, 24, 24, 24, 24, 24, 24, 24 };
+    static const U32 LL_deltaCode = 19;
+    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+}
+
+/* ZSTD_MLcode() :
+ * note : mlBase = matchLength - MINMATCH;
+ *        because it's the format it's stored in seqStore->sequences */
+MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
+{
+    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+    static const U32 ML_deltaCode = 36;
+    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+    }
+#endif
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+    /* copy Literals */
+    assert(seqStorePtr->maxNbLit <= 128 KB);
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+	 * First copy 16 bytes, because literals are likely short.
+	 */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offset = offCode + 1;
+
+    /* match Length */
+    if (mlBase>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+
+    seqStorePtr->sequences++;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+{
+    const BYTE* const pStart = pIn;
+    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/* ZSTD_count_2segments() :
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
+    size_t const matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength != mEnd) return matchLength;
+    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
+    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
+    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
+    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
+}
+
+
+/*-*************************************
+ *  Hashes
+ ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
+    }
+}
+
+/* ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/* ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/* ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/* ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/* ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/*-*************************************
+*  Round buffer management
+***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX                                                     \
+    ( ((U32)-1)                  /* Maximum ending current index */            \
+    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
+
+/*
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+    size_t const endT = (size_t)(window->nextSrc - window->base);
+    U32 const end = (U32)endT;
+
+    window->lowLimit = end;
+    window->dictLimit = end;
+}
+
+/*
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+    return window.lowLimit < window.dictLimit;
+}
+
+/*
+ * ZSTD_matchState_dictMode():
+ * Inspects the provided matchState and figures out what dictMode should be
+ * passed to the compressor.
+ */
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+{
+    return ZSTD_window_hasExtDict(ms->window) ?
+        ZSTD_extDict :
+        ms->dictMatchState != NULL ?
+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
+            ZSTD_noDict;
+}
+
+/*
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  void const* srcEnd)
+{
+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    return curr > ZSTD_CURRENT_MAX;
+}
+
+/*
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                           U32 maxDist, void const* src)
+{
+    /* preemptive overflow correction:
+     * 1. correction is large enough:
+     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+     *
+     *    current - newCurrent
+     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+     *    > (3<<29) - (1<<chainLog)
+     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
+     *    > 1<<29
+     *
+     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
+     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+     *    In 32-bit mode we are safe, because (chainLog <= 29), so
+     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+     */
+    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const curr = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = curr & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const correction = curr - newCurrent;
+    assert((maxDist & cycleMask) == 0);
+    assert(curr > newCurrent);
+    /* Loose bound, should be around 1<<29 (see above) */
+    assert(correction > 1<<28);
+
+    window->base += correction;
+    window->dictBase += correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);
+
+    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+             window->lowLimit);
+    return correction;
+}
+
+/*
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ *
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
+ *
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
+ */
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
+        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+        if (window->dictLimit < window->lowLimit) {
+            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
+            window->dictLimit = window->lowLimit;
+        }
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    ZSTD_memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+}
+
+/*
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+                                  void const* src, size_t srcSize)
+{
+    BYTE const* const ip = (BYTE const*)src;
+    U32 contiguous = 1;
+    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
+    /* Check if blocks follow each other */
+    if (src != window->nextSrc) {
+        /* not contiguous */
+        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
+        window->lowLimit = window->dictLimit;
+        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
+        window->dictLimit = (U32)distanceFromBase;
+        window->dictBase = window->base;
+        window->base = ip - distanceFromBase;
+        /* ms->nextToUpdate = window->dictLimit; */
+        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
+        contiguous = 0;
+    }
+    window->nextSrc = ip + srcSize;
+    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+    if ( (ip+srcSize > window->dictBase + window->lowLimit)
+       & (ip < window->dictBase + window->dictLimit)) {
+        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        window->lowLimit = lowLimitMax;
+        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
+    }
+    return contiguous;
+}
+
+/*
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/*
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
+     * the edge case where the dictionary and the source are contiguous in memory.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+
+
+/* debug functions */
+#if (DEBUGLEVEL>=2)
+
+MEM_STATIC double ZSTD_fWeight(U32 rawStat)
+{
+    U32 const fp_accuracy = 8;
+    U32 const fp_multiplier = (1 << fp_accuracy);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
+    U32 const BWeight = hb * fp_multiplier;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + fp_accuracy < 31);
+    return (double)weight / fp_multiplier;
+}
+
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
+MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+{
+    unsigned u, sum;
+    for (u=0, sum=0; u<=max; u++) sum += table[u];
+    DEBUGLOG(2, "total nb elts: %u", sum);
+    for (u=0; u<=max; u++) {
+        DEBUGLOG(2, "%2u: %5u  (%.2f)",
+                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
+    }
+}
+
+#endif
+
+
+
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
+
+/* ==============================================================
+ * Private declarations
+ * These prototypes shall only be called from within lib/compress
+ * ============================================================== */
+
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+
+/*! ZSTD_getCParamsFromCDict() :
+ *  as the name implies */
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+
+/* ZSTD_compressBegin_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize);
+
+/* ZSTD_compress_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict,size_t dictSize,
+                                 const ZSTD_CCtx_params* params);
+
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+
+/* ZSTD_CCtx_trace() :
+ *  Trace the end of a compression call.
+ */
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+
+#endif /* ZSTD_COMPRESS_H */
diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
new file mode 100644
index 000000000000..655bcda4d1f1
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_literals.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ZSTD_memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+    return lhSize+cLitSize;
+}
diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
new file mode 100644
index 000000000000..9904c0cd30a0
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_literals.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
new file mode 100644
index 000000000000..dcfcdc9cc5e8
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_sequences.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/*
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/*
+ * Returns true if we should use ncount=-1 else we should
+ * use ncount=1 for low probability symbols instead.
+ */
+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+{
+    /* Heuristic: This should cover most blocks <= 16K and
+     * start to fade out after 16K to about 32K depending on
+     * comprssibility.
+     */
+    return nbSeq >= 2048;
+}
+
+/*
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h
new file mode 100644
index 000000000000..7991364c2f71
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_sequences.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
+#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
new file mode 100644
index 000000000000..ee03e0aedb03
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_superblock.c
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_superblock.h"
+
+#include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
+#include "hist.h"                     /* HIST_countFast_wksp */
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+
+/*-*************************************
+*  Superblock entropy buffer structs
+***************************************/
+/* ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/* ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/* ZSTD_buildSuperBlockEntropy_literal() :
+ *  Builds entropy for the super-block literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = 255;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+/* ZSTD_buildSuperBlockEntropy_sequences() :
+ *  Builds entropy for the super-block sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+    BYTE* const cTableWksp = countWkspStart + countWkspSize;
+    const size_t cTableWkspSize = wkspEnd-cTableWksp;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+
+    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+    ZSTD_memset(workspace, 0, wkspSize);
+
+    fseMetadata->lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   U32 LLtype;
+        unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->llType = (symbolEncodingType_e) LLtype;
+    }   }
+    /* build CTable for Offsets */
+    {   U32 Offtype;
+        unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+    }   }
+    /* build CTable for MatchLengths */
+    {   U32 MLtype;
+        unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+    }   }
+    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+    return op-ostart;
+}
+
+
+/* ZSTD_buildSuperBlockEntropy() :
+ *  Builds entropy for the super-block.
+ *  @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+                      const ZSTD_entropyCTables_t* prevEntropy,
+                            ZSTD_entropyCTables_t* nextEntropy,
+                      const ZSTD_CCtx_params* cctxParams,
+                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/* ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/* ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
+                : ERROR(GENERIC);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h
new file mode 100644
index 000000000000..224ece79546e
--- /dev/null
+++ b/lib/zstd/compress/zstd_compress_superblock.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+#include <linux/zstd.h> /* ZSTD_CCtx */
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
new file mode 100644
index 000000000000..98e359adf5d4
--- /dev/null
+++ b/lib/zstd/compress/zstd_cwksp.h
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/*
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+    ZSTD_cwksp_dynamic_alloc,
+    ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
+/*
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    BYTE allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+    ZSTD_cwksp_static_alloc_e isStatic;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/*
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/*
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+    if (size == 0)
+        return 0;
+    return size;
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/*
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/*
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+
+    if (bytes == 0)
+        return NULL;
+
+
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+
+
+    return alloc;
+}
+
+/*
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/*
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/*
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+
+    return alloc;
+}
+
+/*
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/*
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ws->isStatic = isStatic;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_customMalloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_customFree(ptr, customMem);
+}
+
+/*
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before it's used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+
+#endif /* ZSTD_CWKSP_H */
diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
new file mode 100644
index 000000000000..b0424d23ac57
--- /dev/null
+++ b/lib/zstd/compress/zstd_double_fast.c
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_double_fast.h"
+
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashLarge = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash tables.
+     * Insert the other positions into the large hash table if their entry
+     * is empty.
+     */
+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        U32 i;
+        for (i = 0; i < fastHashFillStep; ++i) {
+            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+            if (i == 0)
+                hashSmall[smHash] = curr + i;
+            if (i == 0 || hashLarge[lgHash] == 0)
+                hashLarge[lgHash] = curr + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
+    }   }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams =
+                                     dictMode == ZSTD_dictMatchState ?
+                                     &dms->cParams : NULL;
+    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
+                                     dms->hashTable : NULL;
+    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
+                                     dms->chainTable : NULL;
+    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictStartIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->hashLog : hBitsL;
+    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->chainLog : hBitsS;
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
+    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+    }
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 offset;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                            && repIndex < prefixLowestIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
+
+        /* check dictMatchState repcode */
+        if (dictMode == ZSTD_dictMatchState
+            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        /* check noDict repcode */
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        if (matchIndexL > prefixLowestIndex) {
+            /* check prefix long match */
+            if (MEM_read64(matchLong) == MEM_read64(ip)) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState long match */
+            U32 const dictMatchIndexL = dictHashLong[dictHL];
+            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+            assert(dictMatchL < dictEnd);
+
+            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
+                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
+                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
+                goto _match_found;
+        }   }
+
+        if (matchIndexS > prefixLowestIndex) {
+            /* check prefix short match */
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState short match */
+            U32 const dictMatchIndexS = dictHashSmall[dictHS];
+            match = dictBase + dictMatchIndexS;
+            matchIndexS = dictMatchIndexS + dictIndexDelta;
+
+            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+        }   }
+
+        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
+        continue;
+
+_search_next_long:
+
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+            U32 const matchIndexL3 = hashLong[hl3];
+            const BYTE* matchL3 = base + matchIndexL3;
+            hashLong[hl3] = curr + 1;
+
+            /* check prefix long +1 match */
+            if (matchIndexL3 > prefixLowestIndex) {
+                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                    ip++;
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            } else if (dictMode == ZSTD_dictMatchState) {
+                /* check dict long +1 match */
+                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
+                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                assert(dictMatchL3 < dictEnd);
+                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
+                    ip++;
+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+        }   }   }
+
+        /* if no long +1 match, explore the short match we found */
+        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
+            offset = (U32)(curr - matchIndexS);
+            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        } else {
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip - match);
+            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        }
+
+_match_found:
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            if (dictMode == ZSTD_dictMatchState) {
+                while (ip <= ilimit) {
+                    U32 const current2 = (U32)(ip-base);
+                    U32 const repIndex2 = current2 - offset_2;
+                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
+                        && repIndex2 < prefixLowestIndex ?
+                            dictBase + repIndex2 - dictIndexDelta :
+                            base + repIndex2;
+                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                        ip += repLength2;
+                        anchor = ip;
+                        continue;
+                    }
+                    break;
+            }   }
+
+            if (dictMode == ZSTD_noDict) {
+                while ( (ip <= ilimit)
+                     && ( (offset_2>0)
+                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                    ip += rLength;
+                    anchor = ip;
+                    continue;   /* faster when present ... (?) */
+        }   }   }
+    }   /* while (ip < ilimit) */
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+    }
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 curr = (U32)(ip-base);
+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
+
+        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+            & (repIndex > dictStartIndex))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
+                offset = curr - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndex3 = hashLong[h3];
+                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
+                const BYTE* match3 = match3Base + matchIndex3;
+                U32 offset;
+                hashLong[h3] = curr + 1;
+                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
+                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
+                    ip++;
+                    offset = curr+1 - matchIndex3;
+                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                } else {
+                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                    offset = curr - matchIndex;
+                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                }
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else {
+                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                continue;
+        }   }
+
+        /* move to next sequence start */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                    & (repIndex2 > dictStartIndex))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h
new file mode 100644
index 000000000000..6822bde65a1d
--- /dev/null
+++ b/lib/zstd/compress/zstd_double_fast.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_DOUBLE_FAST_H
+#define ZSTD_DOUBLE_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+
+#endif /* ZSTD_DOUBLE_FAST_H */
diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
new file mode 100644
index 000000000000..96b7d48e2868
--- /dev/null
+++ b/lib/zstd/compress/zstd_fast.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
+#include "zstd_fast.h"
+
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hBits = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash table.
+     * Insert the other positions if their hash entry is empty.
+     */
+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
+        hashTable[hash0] = curr;
+        if (dtlm == ZSTD_dtlm_fast) continue;
+        /* Only load extra positions for ZSTD_dtlm_full */
+        {   U32 p;
+            for (p = 1; p < fastHashFillStep; ++p) {
+                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
+                if (hashTable[hash] == 0) {  /* not yet filled */
+                    hashTable[hash] = curr + p;
+    }   }   }   }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const curr = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2 - offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            mLength += 4;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 4;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   prefixStartIndex = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+    const U32 dictHLog             = dictCParams->hashLog;
+
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+    ip += (dictAndPrefixLength == 0);
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else if ( (matchIndex <= prefixStartIndex) ) {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            }
+        } else if (MEM_read32(match) != MEM_read32(ip)) {
+            /* it's not a match, and we're not going to check the dictionary */
+            assert(stepSize >= 1);
+            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+            continue;
+        } else {
+            /* found a regular match */
+            U32 const offset = (U32)(ip-match);
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            while (((ip>anchor) & (match>prefixStart))
+                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            assert(base+curr+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState != NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+        const U32    matchIndex = hashTable[h];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE*  match = matchBase + matchIndex;
+        const U32    curr = (U32)(ip-base);
+        const U32    repIndex = curr + 1 - offset_1;
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+        assert(offset_1 <= curr +1);   /* check repIndex */
+
+        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
+        } else {
+            if ( (matchIndex < dictStartIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            }
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = curr - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
+        }   }
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h
new file mode 100644
index 000000000000..fddc2f532d21
--- /dev/null
+++ b/lib/zstd/compress/zstd_fast.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_FAST_H
+#define ZSTD_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
new file mode 100644
index 000000000000..fb54d4e28a2b
--- /dev/null
+++ b/lib/zstd/compress/zstd_lazy.c
@@ -0,0 +1,1414 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_lazy.h"
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+
+static void
+ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                const BYTE* ip, const BYTE* iend,
+                U32 mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hashLog = cParams->hashLog;
+
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    if (idx != target)
+        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+                    idx, target, ms->window.dictLimit);
+    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
+    (void)iend;
+
+    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
+    for ( ; idx < target ; idx++) {
+        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
+        U32    const matchIndex = hashTable[h];
+
+        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
+        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
+
+        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+        hashTable[h] = idx;   /* Update Hash Table */
+        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
+        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+    }
+    ms->nextToUpdate = target;
+}
+
+
+/* ZSTD_insertDUBT1() :
+ *  sort one already inserted but unsorted position
+ *  assumption : curr >= btlow == (curr - btmask)
+ *  doesn't fail */
+static void
+ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+                 U32 curr, const BYTE* inputEnd,
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
+    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
+
+
+    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+                curr, dictLimit, windowLow);
+    assert(curr >= btLow);
+    assert(ip < iend);   /* condition for ZSTD_count */
+
+    for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
+
+        if ( (dictMode != ZSTD_extDict)
+          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
+          || (curr < dictLimit) /* both in extDict */) {
+            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
+                                     || (matchIndex+matchLength >= dictLimit)) ?
+                                        base : dictBase;
+            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
+                 || (curr < dictLimit) );
+            match = mBase + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
+        }
+
+        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+                    curr, matchIndex, (U32)matchLength);
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+                        matchIndex, btLow, nextPtr[1]);
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+                        matchIndex, btLow, nextPtr[0]);
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+}
+
+
+static size_t
+ZSTD_DUBT_findBetterDictMatch (
+        ZSTD_matchState_t* ms,
+        const BYTE* const ip, const BYTE* const iend,
+        size_t* offsetPtr,
+        size_t bestLength,
+        U32 nbCompares,
+        U32 const mls,
+        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
+    const U32 * const dictHashTable = dms->hashTable;
+    U32         const hashLog = dmsCParams->hashLog;
+    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32               dictMatchIndex = dictHashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    U32         const curr = (U32)(ip-base);
+    const BYTE* const dictBase = dms->window.base;
+    const BYTE* const dictEnd = dms->window.nextSrc;
+    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
+    U32         const dictLowLimit = dms->window.lowLimit;
+    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
+
+    U32*        const dictBt = dms->chainTable;
+    U32         const btLog  = dmsCParams->chainLog - 1;
+    U32         const btMask = (1 << btLog) - 1;
+    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
+
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+
+    (void)dictMode;
+    assert(dictMode == ZSTD_dictMatchState);
+
+    for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
+        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match = dictBase + dictMatchIndex;
+        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+        if (dictMatchIndex+matchLength >= dictHighLimit)
+            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+        if (matchLength > bestLength) {
+            U32 matchIndex = dictMatchIndex + dictIndexDelta;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+            }
+            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthLarger = matchLength;
+            dictMatchIndex = nextPtr[0];
+        }
+    }
+
+    if (bestLength >= MINMATCH) {
+        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+    }
+    return bestLength;
+
+}
+
+
+static size_t
+ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 const mls,
+                        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32          matchIndex  = hashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    U32    const curr = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32    const unsortLimit = MAX(btLow, windowLow);
+
+    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
+    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+    U32          nbCompares = 1U << cParams->searchLog;
+    U32          nbCandidates = nbCompares;
+    U32          previousCandidate = 0;
+
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
+    assert(ip <= iend-8);   /* required for h calculation */
+    assert(dictMode != ZSTD_dedicatedDictSearch);
+
+    /* reach end of unsorted candidates list */
+    while ( (matchIndex > unsortLimit)
+         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+         && (nbCandidates > 1) ) {
+        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+                    matchIndex);
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
+        previousCandidate = matchIndex;
+        matchIndex = *nextCandidate;
+        nextCandidate = bt + 2*(matchIndex&btMask);
+        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+        nbCandidates --;
+    }
+
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
+    if ( (matchIndex > unsortLimit)
+      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+                    matchIndex);
+        *nextCandidate = *unsortedMark = 0;
+    }
+
+    /* batch sort stacked candidates */
+    matchIndex = previousCandidate;
+    while (matchIndex) {  /* will end on matchIndex == 0 */
+        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+        ZSTD_insertDUBT1(ms, matchIndex, iend,
+                         nbCandidates, unsortLimit, dictMode);
+        matchIndex = nextCandidateIdx;
+        nbCandidates++;
+    }
+
+    /* find longest match */
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+        const BYTE* const dictBase = ms->window.dictBase;
+        const U32 dictLimit = ms->window.dictLimit;
+        const BYTE* const dictEnd = dictBase + dictLimit;
+        const BYTE* const prefixStart = base + dictLimit;
+        U32* smallerPtr = bt + 2*(curr&btMask);
+        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+        U32 matchEndIdx = curr + 8 + 1;
+        U32 dummy32;   /* to be nullified at the end */
+        size_t bestLength = 0;
+
+        matchIndex  = hashTable[h];
+        hashTable[h] = curr;   /* Update Hash Table */
+
+        for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+            U32* const nextPtr = bt + 2*(matchIndex & btMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match;
+
+            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
+                match = base + matchIndex;
+                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+            } else {
+                match = dictBase + matchIndex;
+                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+                if (matchIndex+matchLength >= dictLimit)
+                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+            }
+
+            if (matchLength > bestLength) {
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                    if (dictMode == ZSTD_dictMatchState) {
+                        nbCompares = 0; /* in addition to avoiding checking any
+                                         * further in this loop, make sure we
+                                         * skip checking in the dictionary. */
+                    }
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (match[matchLength] < ip[matchLength]) {
+                /* match is smaller than current */
+                *smallerPtr = matchIndex;             /* update smaller idx */
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                *largerPtr = matchIndex;
+                commonLengthLarger = matchLength;
+                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                largerPtr = nextPtr;
+                matchIndex = nextPtr[0];
+        }   }
+
+        *smallerPtr = *largerPtr = 0;
+
+        assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+        if (dictMode == ZSTD_dictMatchState && nbCompares) {
+            bestLength = ZSTD_DUBT_findBetterDictMatch(
+                    ms, ip, iend,
+                    offsetPtr, bestLength, nbCompares,
+                    mls, dictMode);
+        }
+
+        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
+        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+        if (bestLength >= MINMATCH) {
+            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+        }
+        return bestLength;
+    }
+}
+
+
+/* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iLimit,
+                      size_t* offsetPtr,
+                const U32 mls /* template */,
+                const ZSTD_dictMode_e dictMode)
+{
+    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateDUBT(ms, ip, iLimit, mls);
+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
+}
+
+
+static size_t
+ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
+                            const BYTE* ip, const BYTE* const iLimit,
+                                  size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32* const hashTable = ms->hashTable;
+    U32* const chainTable = ms->chainTable;
+    U32 const chainSize = 1 << ms->cParams.chainLog;
+    U32 idx = ms->nextToUpdate;
+    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32 const cacheSize = bucketSize - 1;
+    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
+    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
+
+    /* We know the hashtable is oversized by a factor of `bucketSize`.
+     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
+     * single entry. We will use the rest of the space to construct a temporary
+     * chaintable.
+     */
+    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32* const tmpHashTable = hashTable;
+    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
+    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
+
+    U32 hashIdx;
+
+    assert(ms->cParams.chainLog <= 24);
+    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(idx != 0);
+    assert(tmpMinChain <= minChain);
+
+    /* fill conventional hash table and conventional chain table */
+    for ( ; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
+        if (idx >= tmpMinChain) {
+            tmpChainTable[idx - tmpMinChain] = hashTable[h];
+        }
+        tmpHashTable[h] = idx;
+    }
+
+    /* sort chains into ddss chain table */
+    {
+        U32 chainPos = 0;
+        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
+            U32 count;
+            U32 countBeyondMinChain = 0;
+            U32 i = tmpHashTable[hashIdx];
+            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
+                /* skip through the chain to the first position that won't be
+                 * in the hash cache bucket */
+                if (i < minChain) {
+                    countBeyondMinChain++;
+                }
+                i = tmpChainTable[i - tmpMinChain];
+            }
+            if (count == cacheSize) {
+                for (count = 0; count < chainLimit;) {
+                    if (i < minChain) {
+                        if (!i || countBeyondMinChain++ > cacheSize) {
+                            /* only allow pulling `cacheSize` number of entries
+                             * into the cache or chainTable beyond `minChain`,
+                             * to replace the entries pulled out of the
+                             * chainTable into the cache. This lets us reach
+                             * back further without increasing the total number
+                             * of entries in the chainTable, guaranteeing the
+                             * DDSS chain table will fit into the space
+                             * allocated for the regular one. */
+                            break;
+                        }
+                    }
+                    chainTable[chainPos++] = i;
+                    count++;
+                    if (i < tmpMinChain) {
+                        break;
+                    }
+                    i = tmpChainTable[i - tmpMinChain];
+                }
+            } else {
+                count = 0;
+            }
+            if (count) {
+                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
+            } else {
+                tmpHashTable[hashIdx] = 0;
+            }
+        }
+        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
+    }
+
+    /* move chain pointers into the last entry of each hash bucket */
+    for (hashIdx = (1 << hashLog); hashIdx; ) {
+        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 const chainPackedPointer = tmpHashTable[hashIdx];
+        U32 i;
+        for (i = 0; i < cacheSize; i++) {
+            hashTable[bucketIdx + i] = 0;
+        }
+        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
+    }
+
+    /* fill the buckets of the hash table */
+    for (idx = ms->nextToUpdate; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
+                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 i;
+        /* Shift hash cache down 1. */
+        for (i = cacheSize - 1; i; i--)
+            hashTable[h + i] = hashTable[h + i - 1];
+        hashTable[h] = idx;
+    }
+
+    ms->nextToUpdate = target;
+}
+
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainSize = (1 << cParams->chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
+    U32 nbAttempts = 1U << cParams->searchLog;
+    size_t ml=4-1;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
+                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
+                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+
+    U32 matchIndex;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32* entry = &dms->hashTable[ddsIdx];
+        PREFETCH_L1(entry);
+    }
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+        size_t currentMl=0;
+        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsLowestIndex  = dms->window.dictLimit;
+        const BYTE* const ddsBase = dms->window.base;
+        const BYTE* const ddsEnd  = dms->window.nextSrc;
+        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+        const U32 ddsIndexDelta   = dictLimit - ddsSize;
+        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+        U32 ddsAttempt;
+
+        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 const chainIndex = chainPackedPointer >> 8;
+
+            PREFETCH_L1(&dms->chainTable[chainIndex]);
+        }
+
+        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+            match = ddsBase + matchIndex;
+
+            if (!matchIndex) {
+                return ml;
+            }
+
+            /* guaranteed by table construction */
+            (void)ddsLowestIndex;
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) {
+                    /* best possible, avoids read overflow on next attempt */
+                    return ml;
+                }
+            }
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 chainIndex = chainPackedPointer >> 8;
+            U32 const chainLength = chainPackedPointer & 0xFF;
+            U32 const chainAttempts = nbAttempts - ddsAttempt;
+            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+            U32 chainAttempt;
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+            }
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+                size_t currentMl=0;
+                const BYTE* match;
+                matchIndex = dms->chainTable[chainIndex];
+                match = ddsBase + matchIndex;
+
+                /* guaranteed by table construction */
+                assert(matchIndex >= ddsLowestIndex);
+                assert(match+4 <= ddsEnd);
+                if (MEM_read32(match) == MEM_read32(ip)) {
+                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+                }
+
+                /* save best solution */
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+                }
+            }
+        }
+    } else if (dictMode == ZSTD_dictMatchState) {
+        const U32* const dmsChainTable = dms->chainTable;
+        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
+        const U32 dmsChainMask         = dmsChainSize - 1;
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
+
+        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
+
+        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+            size_t currentMl=0;
+            const BYTE* const match = dmsBase + matchIndex;
+            assert(match+4 <= dmsEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+
+            if (matchIndex <= dmsMinChain) break;
+
+            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
+        }
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth,
+                        ZSTD_dictMode_e const dictMode)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+
+    /*
+     * This table is indexed first by the four ZSTD_dictMode_e values, and then
+     * by the two searchMethod_e values. NULLs are placed for configurations
+     * that should never occur (extDict modes go to the other implementation
+     * below and there is no DDSS for binary tree search yet).
+     */
+    const searchMax_f searchFuncs[4][2] = {
+        {
+            ZSTD_HcFindBestMatch_selectMLS,
+            ZSTD_BtFindBestMatch_selectMLS
+        },
+        {
+            NULL,
+            NULL
+        },
+        {
+            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+        },
+        {
+            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
+            NULL
+        }
+    };
+
+    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
+
+    const int isDMS = dictMode == ZSTD_dictMatchState;
+    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+    const int isDxS = isDMS || isDDS;
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
+    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = isDxS ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    assert(searchMax != NULL);
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
+    if (isDxS) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (isDxS) {
+            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
+            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
+                                && repIndex < prefixLowestIndex) ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ( (dictMode == ZSTD_noDict)
+              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                if ((mlRep >= 4) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            if (isDxS) {
+                const U32 repIndex = (U32)(ip - base) - offset_1;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                    int const gain2 = (int)(mlRep * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+            }
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ( (dictMode == ZSTD_noDict)
+                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                    int const gain2 = (int)(mlRep * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+                if (isDxS) {
+                    const U32 repIndex = (U32)(ip - base) - offset_1;
+                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                        int const gain2 = (int)(mlRep * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((mlRep >= 4) && (gain2 > gain1))
+                            matchLength = mlRep, offset = 0, start = ip;
+                    }
+                }
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
+        /* catch up */
+        if (offset) {
+            if (dictMode == ZSTD_noDict) {
+                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
+                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                    { start--; matchLength++; }
+            }
+            if (isDxS) {
+                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            }
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        if (isDxS) {
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex = current2 - offset_2;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                        dictBase - dictIndexDelta + repIndex :
+                        base + repIndex;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ip += matchLength;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        if (dictMode == ZSTD_noDict) {
+            while ( ((ip <= ilimit) & (offset_2>0))
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1 ? offset_1 : savedOffset;
+    rep[1] = offset_2 ? offset_2 : savedOffset;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    U32 offset_1 = rep[0], offset_2 = rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
+    /* init */
+    ip += (ip == prefixStart);
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 curr = (U32)(ip-base);
+
+        /* check repCode */
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
+            const U32 repIndex = (U32)(curr+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            curr++;
+            /* check repCode */
+            if (offset) {
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                const U32 repIndex = (U32)(curr - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((repLength >= 4) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                curr++;
+                /* check repCode */
+                if (offset) {
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                    const U32 repIndex = (U32)(curr - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+}
+
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+}
diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
new file mode 100644
index 000000000000..2fc5a6182134
--- /dev/null
+++ b/lib/zstd/compress/zstd_lazy.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LAZY_H
+#define ZSTD_LAZY_H
+
+
+#include "zstd_compress_internal.h"
+
+/*
+ * Dedicated Dictionary Search Structure bucket log. In the
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
+ * one.
+ */
+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_LAZY_H */
diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
new file mode 100644
index 000000000000..8ef7e88a5add
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm.c
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_ldm.h"
+
+#include "../common/debug.h"
+#include <linux/xxhash.h>
+#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
+#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
+#include "zstd_ldm_geartab.h"
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_RLOG 7
+
+typedef struct {
+    U64 rolling;
+    U64 stopMask;
+} ldmRollingHashState_t;
+
+/* ZSTD_ldm_gear_init():
+ *
+ * Initializes the rolling hash state such that it will honor the
+ * settings in params. */
+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
+{
+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
+    unsigned hashRateLog = params->hashRateLog;
+
+    state->rolling = ~(U32)0;
+
+    /* The choice of the splitting criterion is subject to two conditions:
+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
+     *
+     * In the gear hash algorithm, bit n depends on the last n bytes;
+     * so in order to obtain a good quality splitting criterion it is
+     * preferable to use bits with high weight.
+     *
+     * To match condition 1 we use a mask with hashRateLog bits set
+     * and, because of the previous remark, we make sure these bits
+     * have the highest possible weight while still respecting
+     * condition 2.
+     */
+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
+    } else {
+        /* In this degenerate case we simply honor the hash rate. */
+        state->stopMask = ((U64)1 << hashRateLog) - 1;
+    }
+}
+
+/* ZSTD_ldm_gear_feed():
+ *
+ * Registers in the splits array all the split points found in the first
+ * size bytes following the data pointer. This function terminates when
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
+ * present in the splits array.
+ *
+ * Precondition: The splits array must not be full.
+ * Returns: The number of bytes processed. */
+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
+                                 BYTE const* data, size_t size,
+                                 size_t* splits, unsigned* numSplits)
+{
+    size_t n;
+    U64 hash, mask;
+
+    hash = state->rolling;
+    mask = state->stopMask;
+    n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1; \
+        if (UNLIKELY((hash & mask) == 0)) { \
+            splits[*numSplits] = n; \
+            *numSplits += 1; \
+            if (*numSplits == LDM_BATCH_SIZE) \
+                goto done; \
+        } \
+    } while (0)
+
+    while (n + 3 < size) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < size) {
+        GEAR_ITER_ONCE();
+    }
+
+#undef GEAR_ITER_ONCE
+
+done:
+    state->rolling = hash;
+    return n;
+}
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams)
+{
+    params->windowLog = cParams->windowLog;
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    if (params->hashLog == 0) {
+        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
+        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+    }
+    if (params->hashRateLog == 0) {
+        params->hashRateLog = params->windowLog < params->hashLog
+                                   ? 0
+                                   : params->windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+    size_t const ldmHSize = ((size_t)1) << params.hashLog;
+    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
+    return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+}
+
+/* ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/* ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
+    unsigned const offset = *pOffset;
+
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+
+}
+
+/* ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pMatchBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/* ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
+/* ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+                                      void const* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+
+    switch(ms->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* ldmState, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const istart = ip;
+    ldmRollingHashState_t hashState;
+    size_t* const splits = ldmState->splitIndices;
+    unsigned numSplits;
+
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+
+    ZSTD_ldm_gear_init(&hashState, params);
+    while (ip < iend) {
+        size_t hashed;
+        unsigned n;
+        
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            if (ip + splits[n] >= istart + minMatchLength) {
+                BYTE const* const split = ip + splits[n] - minMatchLength;
+                U64 const xxhash = xxh64(split, minMatchLength, 0);
+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+                ldmEntry_t entry;
+
+                entry.offset = (U32)(split - base);
+                entry.checksum = (U32)(xxhash >> 32);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+            }
+        }
+
+        ip += hashed;
+    }
+}
+
+
+/* ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+{
+    U32 const curr = (U32)(anchor - ms->window.base);
+    if (curr > ms->nextToUpdate + 1024) {
+        ms->nextToUpdate =
+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
+    }
+}
+
+static size_t ZSTD_ldm_generateSequences_internal(
+        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    /* LDM parameters */
+    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    /* Prefix and extDict parameters */
+    U32 const dictLimit = ldmState->window.dictLimit;
+    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+    BYTE const* const lowPrefixPtr = base + dictLimit;
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
+    /* Input positions */
+    BYTE const* anchor = istart;
+    BYTE const* ip = istart;
+    /* Rolling hash state */
+    ldmRollingHashState_t hashState;
+    /* Arrays for staged-processing */
+    size_t* const splits = ldmState->splitIndices;
+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
+    unsigned numSplits;
+
+    if (srcSize < minMatchLength)
+        return iend - anchor;
+
+    /* Initialize the rolling hash state with the first minMatchLength bytes */
+    ZSTD_ldm_gear_init(&hashState, params);
+    {
+        size_t n = 0;
+
+        while (n < minMatchLength) {
+            numSplits = 0;
+            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
+                                    splits, &numSplits);
+        }
+        ip += minMatchLength;
+    }
+
+    while (ip < ilimit) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
+                                    splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            BYTE const* const split = ip + splits[n] - minMatchLength;
+            U64 const xxhash = xxh64(split, minMatchLength, 0);
+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+
+            candidates[n].split = split;
+            candidates[n].hash = hash;
+            candidates[n].checksum = (U32)(xxhash >> 32);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            PREFETCH_L1(candidates[n].bucket);
+        }
+
+        for (n = 0; n < numSplits; n++) {
+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
+                   bestMatchLength = 0, mLength;
+            BYTE const* const split = candidates[n].split;
+            U32 const checksum = candidates[n].checksum;
+            U32 const hash = candidates[n].hash;
+            ldmEntry_t* const bucket = candidates[n].bucket;
+            ldmEntry_t const* cur;
+            ldmEntry_t const* bestEntry = NULL;
+            ldmEntry_t newEntry;
+
+            newEntry.offset = (U32)(split - base);
+            newEntry.checksum = checksum;
+
+            /* If a split point would generate a sequence overlapping with
+             * the previous one, we merely register it in the hash table and
+             * move on */
+            if (split < anchor) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+                if (extDict) {
+                    BYTE const* const curMatchBase =
+                        cur->offset < dictLimit ? dictBase : base;
+                    BYTE const* const pMatch = curMatchBase + cur->offset;
+                    BYTE const* const matchEnd =
+                        cur->offset < dictLimit ? dictEnd : iend;
+                    BYTE const* const lowMatchPtr =
+                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+                    curForwardMatchLength =
+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
+                } else { /* !extDict */
+                    BYTE const* const pMatch = base + cur->offset;
+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
+                }
+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+
+            /* No match found -- insert an entry into the hash table
+             * and process the next candidate match */
+            if (bestEntry == NULL) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            /* Match found */
+            mLength = forwardMatchLength + backwardMatchLength;
+            {
+                U32 const offset = (U32)(split - base) - bestEntry->offset;
+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+                /* Out of sequence storage */
+                if (rawSeqStore->size == rawSeqStore->capacity)
+                    return ERROR(dstSize_tooSmall);
+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
+                seq->matchLength = (U32)mLength;
+                seq->offset = offset;
+                rawSeqStore->size++;
+            }
+
+            /* Insert the current entry into the hash table --- it must be
+             * done after the previous block to avoid clobbering bestEntry */
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+
+            anchor = split + forwardMatchLength;
+        }
+
+        ip += hashed;
+    }
+
+    return iend - anchor;
+}
+
+/*! ZSTD_ldm_reduceTable() :
+ *  reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+                                 U32 const reducerValue)
+{
+    U32 u;
+    for (u = 0; u < size; u++) {
+        if (table[u].offset < reducerValue) table[u].offset = 0;
+        else table[u].offset -= reducerValue;
+    }
+}
+
+size_t ZSTD_ldm_generateSequences(
+        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    U32 const maxDist = 1U << params->windowLog;
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    size_t const kMaxChunkSize = 1 << 20;
+    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+    size_t chunk;
+    size_t leftoverSize = 0;
+
+    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+    /* Check that ZSTD_window_update() has been called for this chunk prior
+     * to passing it to this function.
+     */
+    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+    /* The input could be very large (in zstdmt), so it must be broken up into
+     * chunks to enforce the maximum distance and handle overflow correction.
+     */
+    assert(sequences->pos <= sequences->size);
+    assert(sequences->size <= sequences->capacity);
+    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+        size_t const remaining = (size_t)(iend - chunkStart);
+        BYTE const *const chunkEnd =
+            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+        size_t const chunkSize = chunkEnd - chunkStart;
+        size_t newLeftoverSize;
+        size_t const prevSize = sequences->size;
+
+        assert(chunkStart < iend);
+        /* 1. Perform overflow correction if necessary. */
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+            U32 const ldmHSize = 1U << params->hashLog;
+            U32 const correction = ZSTD_window_correctOverflow(
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
+            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
+        }
+        /* 2. We enforce the maximum offset allowed.
+         *
+         * kMaxChunkSize should be small enough that we don't lose too much of
+         * the window through early invalidation.
+         * TODO: * Test the chunk size.
+         *       * Try invalidation after the sequence generation and test the
+         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
+         */
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
+        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+            ldmState, sequences, params, chunkStart, chunkSize);
+        if (ZSTD_isError(newLeftoverSize))
+            return newLeftoverSize;
+        /* 4. We add the leftover literals from previous iterations to the first
+         *    newly generated sequence, or add the `newLeftoverSize` if none are
+         *    generated.
+         */
+        /* Prepend the leftover literals from the last call */
+        if (prevSize < sequences->size) {
+            sequences->seq[prevSize].litLength += (U32)leftoverSize;
+            leftoverSize = newLeftoverSize;
+        } else {
+            assert(newLeftoverSize == chunkSize);
+            leftoverSize += chunkSize;
+        }
+    }
+    return 0;
+}
+
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= (U32)srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
+/*
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+                                 U32 const remaining, U32 const minMatch)
+{
+    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+    assert(sequence.offset > 0);
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
+    if (remaining <= sequence.litLength) {
+        sequence.offset = 0;
+    } else if (remaining < sequence.litLength + sequence.matchLength) {
+        sequence.matchLength = remaining - sequence.litLength;
+        if (sequence.matchLength < minMatch) {
+            sequence.offset = 0;
+        }
+    }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+    return sequence;
+}
+
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    void const* src, size_t srcSize)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    unsigned const minMatch = cParams->minMatch;
+    ZSTD_blockCompressor const blockCompressor =
+        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    /* Input positions */
+    BYTE const* ip = istart;
+
+    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
+    assert(rawSeqStore->pos <= rawSeqStore->size);
+    assert(rawSeqStore->size <= rawSeqStore->capacity);
+    /* Loop through each sequence and apply the block compressor to the literals */
+    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+        /* maybeSplitSequence updates rawSeqStore->pos */
+        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                   (U32)(iend - ip), minMatch);
+        int i;
+        /* End signal */
+        if (sequence.offset == 0)
+            break;
+
+        assert(ip + sequence.litLength + sequence.matchLength <= iend);
+
+        /* Fill tables for block compressor */
+        ZSTD_ldm_limitTableUpdate(ms, ip);
+        ZSTD_ldm_fillFastTables(ms, ip);
+        /* Run the block compressor */
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+        {
+            size_t const newLitLength =
+                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+            ip += sequence.litLength;
+            /* Update the repcodes */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                rep[i] = rep[i-1];
+            rep[0] = sequence.offset;
+            /* Store the sequence */
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+                          sequence.offset + ZSTD_REP_MOVE,
+                          sequence.matchLength - MINMATCH);
+            ip += sequence.matchLength;
+        }
+    }
+    /* Fill the tables for the block compressor */
+    ZSTD_ldm_limitTableUpdate(ms, ip);
+    ZSTD_ldm_fillFastTables(ms, ip);
+    /* Compress the last literals */
+    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
+}
diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
new file mode 100644
index 000000000000..25b25270b72e
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+
+#include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
+#include <linux/zstd.h>   /* ZSTD_CCtx, size_t */
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
+/*
+ * ZSTD_ldm_generateSequences():
+ *
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
+ *
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ *       sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
+
+/* ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables or 0 if LDM is
+ *  disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/* ZSTD_ldm_getSeqSpace() :
+ *  Return an upper bound on the number of sequences that can be produced by
+ *  the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
+
+/* ZSTD_ldm_adjustParameters() :
+ *  If the params->hashRateLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not).
+ *
+ *  Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
new file mode 100644
index 000000000000..e5c24d856b0a
--- /dev/null
+++ b/lib/zstd/compress/zstd_ldm_geartab.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_GEARTAB_H
+#define ZSTD_LDM_GEARTAB_H
+
+static U64 ZSTD_ldm_gearTab[256] = {
+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
+    0x2b4da14f2613d8f4
+};
+
+#endif /* ZSTD_LDM_GEARTAB_H */
diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
new file mode 100644
index 000000000000..04337050fe9a
--- /dev/null
+++ b/lib/zstd/compress/zstd_opt.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "hist.h"
+#include "zstd_opt.h"
+
+
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
+#define ZSTD_MAX_PRICE     (1<<30)
+
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+
+#if 0    /* approximation at bit level */
+#  define BITCOST_ACCURACY 0
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+#else    /* opt==approx, ultra==accurate */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+#endif
+
+MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+{
+    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+}
+
+MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+{
+    U32 const stat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(stat);
+    U32 const BWeight = hb * BITCOST_MULTIPLIER;
+    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + BITCOST_ACCURACY < 31);
+    return weight;
+}
+
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
+MEM_STATIC double ZSTD_fCost(U32 price)
+{
+    return (double)price / (BITCOST_MULTIPLIER*8);
+}
+#endif
+
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
+static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
+    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
+    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
+}
+
+
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+{
+    U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
+    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
+{
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+    optPtr->priceType = zop_dynamic;
+
+    if (optPtr->litLengthSum == 0) {  /* first block : init */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+            optPtr->priceType = zop_predef;
+        }
+
+        assert(optPtr->symbolCosts != NULL);
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
+            optPtr->priceType = zop_dynamic;
+
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
+                for (lit=0; lit<=MaxLit; lit++) {
+                    U32 const scaleLog = 11;   /* scale to 2K */
+                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    assert(bitCost <= scaleLog);
+                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litSum += optPtr->litFreq[lit];
+            }   }
+
+            {   unsigned ll;
+                FSE_CState_t llstate;
+                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
+                optPtr->litLengthSum = 0;
+                for (ll=0; ll<=MaxLL; ll++) {
+                    U32 const scaleLog = 10;   /* scale to 1K */
+                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
+                    assert(bitCost < scaleLog);
+                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
+            }   }
+
+            {   unsigned ml;
+                FSE_CState_t mlstate;
+                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
+                optPtr->matchLengthSum = 0;
+                for (ml=0; ml<=MaxML; ml++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
+                    assert(bitCost < scaleLog);
+                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
+            }   }
+
+            {   unsigned of;
+                FSE_CState_t ofstate;
+                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
+                optPtr->offCodeSum = 0;
+                for (of=0; of<=MaxOff; of++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
+                    assert(bitCost < scaleLog);
+                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
+            }   }
+
+        } else {  /* not a dictionary */
+
+            assert(optPtr->litFreq != NULL);
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
+                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+            }
+
+            {   unsigned ll;
+                for (ll=0; ll<=MaxLL; ll++)
+                    optPtr->litLengthFreq[ll] = 1;
+            }
+            optPtr->litLengthSum = MaxLL+1;
+
+            {   unsigned ml;
+                for (ml=0; ml<=MaxML; ml++)
+                    optPtr->matchLengthFreq[ml] = 1;
+            }
+            optPtr->matchLengthSum = MaxML+1;
+
+            {   unsigned of;
+                for (of=0; of<=MaxOff; of++)
+                    optPtr->offCodeFreq[of] = 1;
+            }
+            optPtr->offCodeSum = MaxOff+1;
+
+        }
+
+    } else {   /* new block : re-use previous statistics, scaled down */
+
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+    }
+
+    ZSTD_setBasePrices(optPtr, optLevel);
+}
+
+/* ZSTD_rawLiteralsCost() :
+ * price of literals (only) in specified segment (which length can be 0).
+ * does not include price of literalLength symbol */
+static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                const optState_t* const optPtr,
+                                int optLevel)
+{
+    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
+    if (optPtr->priceType == zop_predef)
+        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+
+    /* dynamic statistics */
+    {   U32 price = litLength * optPtr->litSumBasePrice;
+        U32 u;
+        for (u=0; u < litLength; u++) {
+            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
+        }
+        return price;
+    }
+}
+
+/* ZSTD_litLengthPrice() :
+ * cost of literalLength symbol */
+static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
+{
+    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+
+    /* dynamic statistics */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+    }
+}
+
+/* ZSTD_getMatchPrice() :
+ * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32
+ZSTD_getMatchPrice(U32 const offset,
+                   U32 const matchLength,
+             const optState_t* const optPtr,
+                   int const optLevel)
+{
+    U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
+
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+
+    /* dynamic statistics */
+    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+    if ((optLevel<2) /*static*/ && offCode >= 20)
+        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
+
+    /* match Length */
+    {   U32 const mlCode = ZSTD_MLcode(mlBase);
+        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
+    }
+
+    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
+
+    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
+    return price;
+}
+
+/* ZSTD_updateStats() :
+ * assumption : literals + litLengtn <= iend */
+static void ZSTD_updateStats(optState_t* const optPtr,
+                             U32 litLength, const BYTE* literals,
+                             U32 offsetCode, U32 matchLength)
+{
+    /* literals */
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
+
+    /* literal Length */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
+    }
+
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
+        optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+    }
+
+    /* match Length */
+    {   U32 const mlBase = matchLength - MINMATCH;
+        U32 const mlCode = ZSTD_MLcode(mlBase);
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
+    }
+}
+
+
+/* ZSTD_readMINMATCH() :
+ * function safe only for comparisons
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
+{
+    U32* const hashTable3 = ms->hashTable3;
+    U32 const hashLog3 = ms->hashLog3;
+    const BYTE* const base = ms->window.base;
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
+    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+    assert(hashLog3 > 0);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    *nextToUpdate3 = target;
+    return hashTable3[hash3];
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/* ZSTD_insertBt1() : add one or multiple positions to tree.
+ *  ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                U32 const mls, const int extDict)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32 matchIndex = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    const U32 curr = (U32)(ip-base);
+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowLow = ms->window.lowLimit;
+    U32 matchEndIdx = curr+8+1;
+    size_t bestLength = 8;
+    U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
+
+    assert(ip <= iend-8);   /* required for h calculation */
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    assert(windowLow > 0);
+    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+
+        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
+            match = base + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > curr + 8);
+        return MAX(positions, matchEndIdx - (curr + 8));
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                idx, target, dictMode);
+
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
+    ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
+                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+                    const U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    const BYTE* const base = ms->window.base;
+    U32 const curr = (U32)(ip-base);
+    U32 const hashLog = cParams->hashLog;
+    U32 const minMatch = (mls==3) ? 3 : 4;
+    U32* const hashTable = ms->hashTable;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = ms->chainTable;
+    U32 const btLog = cParams->chainLog - 1;
+    U32 const btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = ms->window.dictBase;
+    U32 const dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+    U32 const matchLow = windowLow ? windowLow : 1;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+    U32 nbCompares = 1U << cParams->searchLog;
+
+    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_compressionParameters* const dmsCParams =
+                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
+    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
+    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
+    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
+    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
+    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
+    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
+    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
+    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
+    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
+
+    size_t bestLength = lengthToBeat-1;
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
+
+    /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
+    {   U32 const lastR = ZSTD_REP_NUM + ll0;
+        U32 repCode;
+        for (repCode = ll0; repCode < lastR; repCode++) {
+            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            U32 const repIndex = curr - repOffset;
+            U32 repLen = 0;
+            assert(curr >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
+                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
+                }
+            } else {  /* repIndex < dictLimit || repIndex >= curr */
+                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
+                                             dmsBase + repIndex - dmsIndexDelta :
+                                             dictBase + repIndex;
+                assert(curr >= windowLow);
+                if ( dictMode == ZSTD_extDict
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
+                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
+                }
+                if (dictMode == ZSTD_dictMatchState
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
+                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
+            }   }
+            /* save longer solution */
+            if (repLen > bestLength) {
+                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                            repCode, ll0, repOffset, repLen);
+                bestLength = repLen;
+                matches[mnum].off = repCode - ll0;
+                matches[mnum].len = (U32)repLen;
+                mnum++;
+                if ( (repLen > sufficient_len)
+                   | (ip+repLen == iLimit) ) {  /* best possible */
+                    return mnum;
+    }   }   }   }
+
+    /* HC3 match finder */
+    if ((mls == 3) /*static*/ && (bestLength < mls)) {
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
+        if ((matchIndex3 >= matchLow)
+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+            size_t mlen;
+            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
+                const BYTE* const match = base + matchIndex3;
+                mlen = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex3;
+                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
+            }
+
+            /* save best solution */
+            if (mlen >= mls /* == 3 > bestLength */) {
+                DEBUGLOG(8, "found small match with hlog3, of length %u",
+                            (U32)mlen);
+                bestLength = mlen;
+                assert(curr > matchIndex3);
+                assert(mnum==0);  /* no prior solution */
+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].len = (U32)mlen;
+                mnum = 1;
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    ms->nextToUpdate = curr+1;  /* skip insertion */
+                    return 1;
+        }   }   }
+        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
+    }
+
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(curr > matchIndex);
+
+        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
+            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
+        }
+
+        if (matchLength > bestLength) {
+            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+            assert(matchEndIdx > matchIndex);
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].len = (U32)matchLength;
+            mnum++;
+            if ( (matchLength > ZSTD_OPT_NUM)
+               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
+                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
+        } else {
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dictMatchState && nbCompares) {
+        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
+        U32 dictMatchIndex = dms->hashTable[dmsH];
+        const U32* const dmsBt = dms->chainTable;
+        commonLengthSmaller = commonLengthLarger = 0;
+        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
+            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match = dmsBase + dictMatchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
+            if (dictMatchIndex+matchLength >= dmsHighLimit)
+                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+            if (matchLength > bestLength) {
+                matchIndex = dictMatchIndex + dmsIndexDelta;
+                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                bestLength = matchLength;
+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].len = (U32)matchLength;
+                mnum++;
+                if ( (matchLength > ZSTD_OPT_NUM)
+                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
+            if (match[matchLength] < ip[matchLength]) {
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                commonLengthLarger = matchLength;
+                dictMatchIndex = nextPtr[0];
+            }
+        }
+    }
+
+    assert(matchEndIdx > curr+8);
+    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
+    return mnum;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
+                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
+                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const matchLengthSearch = cParams->minMatch;
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
+    switch(matchLengthSearch)
+    {
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
+    default :
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
+    case 7 :
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    }
+}
+
+/* ***********************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+
+
+static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+{
+    return sol.litlen + sol.mlen;
+}
+
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                               seqStore_t* seqStore,
+                               U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
+{
+    optState_t* const optStatePtr = &ms->opt;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
+
+    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+    ZSTD_match_t* const matches = optStatePtr->matchTable;
+    ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
+    assert(optLevel <= 2);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
+    ip += (ip==prefixStart);
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 cur, last_pos = 0;
+
+        /* find first match */
+        {   U32 const litlen = (U32)(ip - anchor);
+            U32 const ll0 = !litlen;
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
+            if (!nbMatches) { ip++; continue; }
+
+            /* initialize opt[0] */
+            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].litlen = litlen;
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+
+            /* large match -> immediate encoding */
+            {   U32 const maxML = matches[nbMatches-1].len;
+                U32 const maxOffset = matches[nbMatches-1].off;
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+
+                if (maxML > sufficient_len) {
+                    lastSequence.litlen = litlen;
+                    lastSequence.mlen = maxML;
+                    lastSequence.off = maxOffset;
+                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                                maxML, sufficient_len);
+                    cur = 0;
+                    last_pos = ZSTD_totalLen(lastSequence);
+                    goto _shortestPath;
+            }   }
+
+            /* set prices for first matches starting position == 0 */
+            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 pos;
+                U32 matchNb;
+                for (pos = 1; pos < minMatch; pos++) {
+                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                }
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const end = matches[matchNb].len;
+                    for ( ; pos <= end ; pos++ ) {
+                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                    pos, ZSTD_fCost(sequencePrice));
+                        opt[pos].mlen = pos;
+                        opt[pos].off = offset;
+                        opt[pos].litlen = litlen;
+                        opt[pos].price = sequencePrice;
+                }   }
+                last_pos = pos-1;
+            }
+        }
+
+        /* check further positions */
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const inr = ip + cur;
+            assert(cur < ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+
+            /* Fix current position with one literal if cheaper */
+            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+                int const price = opt[cur-1].price
+                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                assert(price < 1000000000); /* overflow check */
+                if (price <= opt[cur].price) {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = 0;
+                    opt[cur].off = 0;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                } else {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                }
+            }
+
+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
+            /* last match must start at a minimum distance of 8 from oend */
+            if (inr > ilimit) continue;
+
+            if (cur == last_pos) break;
+
+            if ( (optLevel==0) /*static_test*/
+              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+            }
+
+            {   U32 const ll0 = (opt[cur].mlen != 0);
+                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+                U32 const previousPrice = opt[cur].price;
+                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
+                if (!nbMatches) {
+                    DEBUGLOG(7, "rPos:%u : no match found", cur);
+                    continue;
+                }
+
+                {   U32 const maxML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+                                inr-istart, cur, nbMatches, maxML);
+
+                    if ( (maxML > sufficient_len)
+                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+                        lastSequence.mlen = maxML;
+                        lastSequence.off = matches[nbMatches-1].off;
+                        lastSequence.litlen = litlen;
+                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+                        last_pos = cur + ZSTD_totalLen(lastSequence);
+                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                        goto _shortestPath;
+                }   }
+
+                /* set prices using matches found at position == cur */
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
+
+                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+                                matchNb, matches[matchNb].off, lastML, litlen);
+
+                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                        U32 const pos = cur + mlen;
+                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+
+                        if ((pos > last_pos) || (price < opt[pos].price)) {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            opt[pos].mlen = mlen;
+                            opt[pos].off = offset;
+                            opt[pos].litlen = litlen;
+                            opt[pos].price = price;
+                        } else {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                        }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
+
+        lastSequence = opt[last_pos];
+        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+
+_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+        assert(opt[0].mlen == 0);
+
+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        } else {
+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
+        {   U32 const storeEnd = cur + 1;
+            U32 storeStart = storeEnd;
+            U32 seqPos = cur;
+
+            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                        last_pos, cur); (void)last_pos;
+            assert(storeEnd < ZSTD_OPT_NUM);
+            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+            opt[storeEnd] = lastSequence;
+            while (seqPos > 0) {
+                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+                storeStart--;
+                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+                opt[storeStart] = opt[seqPos];
+                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+            }
+
+            /* save sequences */
+            DEBUGLOG(6, "sending selected sequences into seqStore")
+            {   U32 storePos;
+                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                    U32 const llen = opt[storePos].litlen;
+                    U32 const mlen = opt[storePos].mlen;
+                    U32 const offCode = opt[storePos].off;
+                    U32 const advance = llen + mlen;
+                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+
+                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
+                        assert(storePos == storeEnd);   /* must be last sequence */
+                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
+                        continue;   /* will finish */
+                    }
+
+                    assert(anchor + llen <= iend);
+                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    anchor += advance;
+                    ip = anchor;
+            }   }
+            ZSTD_setBasePrices(optStatePtr, optLevel);
+        }
+    }   /* while (ip < ilimit) */
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+}
+
+
+/* used in 2-pass strategy */
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
+{
+    U32 s, sum=0;
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] <<= ZSTD_FREQ_DIV+bonus;
+        table[s]--;
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* used in 2-pass strategy */
+MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its contract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
+}
+
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
+     * this strategy makes a first pass over first block to collect statistics
+     * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
+     * The compression ratio gain is generally small (~0.5% on first block),
+     * the cost is 2x cpu time on first block. */
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    if ( (ms->opt.litLengthSum==0)   /* first block */
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+    }
+
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+}
+
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */
diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h
new file mode 100644
index 000000000000..22b862858ba7
--- /dev/null
+++ b/lib/zstd/compress/zstd_opt.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_OPT_H
+#define ZSTD_OPT_H
+
+
+#include "zstd_compress_internal.h"
+
+/* used in ZSTD_loadDictionaryContent() */
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+        /* note : no btultra2 variant for extDict nor dictMatchState,
+         * because btultra2 is not meant to work with dictionaries
+         * and is only specific for the first block (no prefix) */
+
+
+#endif /* ZSTD_OPT_H */
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
deleted file mode 100644
index 02e92c2cbf4f..000000000000
--- a/lib/zstd/decompress.c
+++ /dev/null
@@ -1,2571 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* ***************************************************************
-*  Tuning parameters
-*****************************************************************/
-/*!
-*  MAXWINDOWSIZE_DEFAULT :
-*  maximum window size accepted by DStream, by default.
-*  Frames requiring more memory will be rejected.
-*/
-#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */
-#endif
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h" /* low level memory routines */
-#include "zstd_internal.h"
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memcpy, memmove, memset */
-
-#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
-
-/*-*************************************
-*  Macros
-***************************************/
-#define ZSTD_isError ERR_isError /* for inlining */
-#define FSE_isError ERR_isError
-#define HUF_isError ERR_isError
-
-/*_*******************************************************
-*  Memory operations
-**********************************************************/
-static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); }
-
-/*-*************************************************************
-*   Context management
-***************************************************************/
-typedef enum {
-	ZSTDds_getFrameHeaderSize,
-	ZSTDds_decodeFrameHeader,
-	ZSTDds_decodeBlockHeader,
-	ZSTDds_decompressBlock,
-	ZSTDds_decompressLastBlock,
-	ZSTDds_checkChecksum,
-	ZSTDds_decodeSkippableHeader,
-	ZSTDds_skipFrame
-} ZSTD_dStage;
-
-typedef struct {
-	FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
-	FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
-	FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-	HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
-	U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2];
-	U32 rep[ZSTD_REP_NUM];
-} ZSTD_entropyTables_t;
-
-struct ZSTD_DCtx_s {
-	const FSE_DTable *LLTptr;
-	const FSE_DTable *MLTptr;
-	const FSE_DTable *OFTptr;
-	const HUF_DTable *HUFptr;
-	ZSTD_entropyTables_t entropy;
-	const void *previousDstEnd; /* detect continuity */
-	const void *base;	   /* start of curr segment */
-	const void *vBase;	  /* virtual start of previous segment if it was just before curr one */
-	const void *dictEnd;	/* end of previous segment */
-	size_t expected;
-	ZSTD_frameParams fParams;
-	blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
-	ZSTD_dStage stage;
-	U32 litEntropy;
-	U32 fseEntropy;
-	struct xxh64_state xxhState;
-	size_t headerSize;
-	U32 dictID;
-	const BYTE *litPtr;
-	ZSTD_customMem customMem;
-	size_t litSize;
-	size_t rleSize;
-	BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
-	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
-
-size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); }
-
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx)
-{
-	dctx->expected = ZSTD_frameHeaderSize_prefix;
-	dctx->stage = ZSTDds_getFrameHeaderSize;
-	dctx->previousDstEnd = NULL;
-	dctx->base = NULL;
-	dctx->vBase = NULL;
-	dctx->dictEnd = NULL;
-	dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-	dctx->litEntropy = dctx->fseEntropy = 0;
-	dctx->dictID = 0;
-	ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-	memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
-	dctx->LLTptr = dctx->entropy.LLTable;
-	dctx->MLTptr = dctx->entropy.MLTable;
-	dctx->OFTptr = dctx->entropy.OFTable;
-	dctx->HUFptr = dctx->entropy.hufTable;
-	return 0;
-}
-
-ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_DCtx *dctx;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
-	if (!dctx)
-		return NULL;
-	memcpy(&dctx->customMem, &customMem, sizeof(customMem));
-	ZSTD_decompressBegin(dctx);
-	return dctx;
-}
-
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createDCtx_advanced(stackMem);
-}
-
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx)
-{
-	if (dctx == NULL)
-		return 0; /* support free on NULL */
-	ZSTD_free(dctx, dctx->customMem);
-	return 0; /* reserved as a potential error code in the future */
-}
-
-void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
-{
-	size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
-	memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
-}
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict);
-
-/*-*************************************************************
-*   Decompression section
-***************************************************************/
-
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-unsigned ZSTD_isFrame(const void *buffer, size_t size)
-{
-	if (size < 4)
-		return 0;
-	{
-		U32 const magic = ZSTD_readLE32(buffer);
-		if (magic == ZSTD_MAGICNUMBER)
-			return 1;
-		if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START)
-			return 1;
-	}
-	return 0;
-}
-
-/** ZSTD_frameHeaderSize() :
-*   srcSize must be >= ZSTD_frameHeaderSize_prefix.
-*   @return : size of the Frame Header */
-static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize)
-{
-	if (srcSize < ZSTD_frameHeaderSize_prefix)
-		return ERROR(srcSize_wrong);
-	{
-		BYTE const fhd = ((const BYTE *)src)[4];
-		U32 const dictID = fhd & 3;
-		U32 const singleSegment = (fhd >> 5) & 1;
-		U32 const fcsId = fhd >> 6;
-		return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId);
-	}
-}
-
-/** ZSTD_getFrameParams() :
-*   decode Frame Header, or require larger `srcSize`.
-*   @return : 0, `fparamsPtr` is correctly filled,
-*            >0, `srcSize` is too small, result is expected `srcSize`,
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize)
-{
-	const BYTE *ip = (const BYTE *)src;
-
-	if (srcSize < ZSTD_frameHeaderSize_prefix)
-		return ZSTD_frameHeaderSize_prefix;
-	if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) {
-		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-			if (srcSize < ZSTD_skippableHeaderSize)
-				return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
-			memset(fparamsPtr, 0, sizeof(*fparamsPtr));
-			fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4);
-			fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
-			return 0;
-		}
-		return ERROR(prefix_unknown);
-	}
-
-	/* ensure there is enough `srcSize` to fully read/decode frame header */
-	{
-		size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
-		if (srcSize < fhsize)
-			return fhsize;
-	}
-
-	{
-		BYTE const fhdByte = ip[4];
-		size_t pos = 5;
-		U32 const dictIDSizeCode = fhdByte & 3;
-		U32 const checksumFlag = (fhdByte >> 2) & 1;
-		U32 const singleSegment = (fhdByte >> 5) & 1;
-		U32 const fcsID = fhdByte >> 6;
-		U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
-		U32 windowSize = 0;
-		U32 dictID = 0;
-		U64 frameContentSize = 0;
-		if ((fhdByte & 0x08) != 0)
-			return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */
-		if (!singleSegment) {
-			BYTE const wlByte = ip[pos++];
-			U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-			if (windowLog > ZSTD_WINDOWLOG_MAX)
-				return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */
-			windowSize = (1U << windowLog);
-			windowSize += (windowSize >> 3) * (wlByte & 7);
-		}
-
-		switch (dictIDSizeCode) {
-		default: /* impossible */
-		case 0: break;
-		case 1:
-			dictID = ip[pos];
-			pos++;
-			break;
-		case 2:
-			dictID = ZSTD_readLE16(ip + pos);
-			pos += 2;
-			break;
-		case 3:
-			dictID = ZSTD_readLE32(ip + pos);
-			pos += 4;
-			break;
-		}
-		switch (fcsID) {
-		default: /* impossible */
-		case 0:
-			if (singleSegment)
-				frameContentSize = ip[pos];
-			break;
-		case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break;
-		case 2: frameContentSize = ZSTD_readLE32(ip + pos); break;
-		case 3: frameContentSize = ZSTD_readLE64(ip + pos); break;
-		}
-		if (!windowSize)
-			windowSize = (U32)frameContentSize;
-		if (windowSize > windowSizeMax)
-			return ERROR(frameParameter_windowTooLarge);
-		fparamsPtr->frameContentSize = frameContentSize;
-		fparamsPtr->windowSize = windowSize;
-		fparamsPtr->dictID = dictID;
-		fparamsPtr->checksumFlag = checksumFlag;
-	}
-	return 0;
-}
-
-/** ZSTD_getFrameContentSize() :
-*   compatible with legacy mode
-*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
-*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
-*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
-{
-	{
-		ZSTD_frameParams fParams;
-		if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0)
-			return ZSTD_CONTENTSIZE_ERROR;
-		if (fParams.windowSize == 0) {
-			/* Either skippable or empty frame, size == 0 either way */
-			return 0;
-		} else if (fParams.frameContentSize != 0) {
-			return fParams.frameContentSize;
-		} else {
-			return ZSTD_CONTENTSIZE_UNKNOWN;
-		}
-	}
-}
-
-/** ZSTD_findDecompressedSize() :
- *  compatible with legacy mode
- *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
- *      skippable frames
- *  @return : decompressed size of the frames contained */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize)
-{
-	{
-		unsigned long long totalDstSize = 0;
-		while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-			const U32 magicNumber = ZSTD_readLE32(src);
-
-			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-				size_t skippableSize;
-				if (srcSize < ZSTD_skippableHeaderSize)
-					return ERROR(srcSize_wrong);
-				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-				if (srcSize < skippableSize) {
-					return ZSTD_CONTENTSIZE_ERROR;
-				}
-
-				src = (const BYTE *)src + skippableSize;
-				srcSize -= skippableSize;
-				continue;
-			}
-
-			{
-				unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
-				if (ret >= ZSTD_CONTENTSIZE_ERROR)
-					return ret;
-
-				/* check for overflow */
-				if (totalDstSize + ret < totalDstSize)
-					return ZSTD_CONTENTSIZE_ERROR;
-				totalDstSize += ret;
-			}
-			{
-				size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
-				if (ZSTD_isError(frameSrcSize)) {
-					return ZSTD_CONTENTSIZE_ERROR;
-				}
-
-				src = (const BYTE *)src + frameSrcSize;
-				srcSize -= frameSrcSize;
-			}
-		}
-
-		if (srcSize) {
-			return ZSTD_CONTENTSIZE_ERROR;
-		}
-
-		return totalDstSize;
-	}
-}
-
-/** ZSTD_decodeFrameHeader() :
-*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
-*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize)
-{
-	size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
-	if (ZSTD_isError(result))
-		return result; /* invalid header */
-	if (result > 0)
-		return ERROR(srcSize_wrong); /* headerSize too small */
-	if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
-		return ERROR(dictionary_wrong);
-	if (dctx->fParams.checksumFlag)
-		xxh64_reset(&dctx->xxhState, 0);
-	return 0;
-}
-
-typedef struct {
-	blockType_e blockType;
-	U32 lastBlock;
-	U32 origSize;
-} blockProperties_t;
-
-/*! ZSTD_getcBlockSize() :
-*   Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr)
-{
-	if (srcSize < ZSTD_blockHeaderSize)
-		return ERROR(srcSize_wrong);
-	{
-		U32 const cBlockHeader = ZSTD_readLE24(src);
-		U32 const cSize = cBlockHeader >> 3;
-		bpPtr->lastBlock = cBlockHeader & 1;
-		bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
-		bpPtr->origSize = cSize; /* only useful for RLE */
-		if (bpPtr->blockType == bt_rle)
-			return 1;
-		if (bpPtr->blockType == bt_reserved)
-			return ERROR(corruption_detected);
-		return cSize;
-	}
-}
-
-static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	if (srcSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memcpy(dst, src, srcSize);
-	return srcSize;
-}
-
-static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize)
-{
-	if (srcSize != 1)
-		return ERROR(srcSize_wrong);
-	if (regenSize > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memset(dst, *(const BYTE *)src, regenSize);
-	return regenSize;
-}
-
-/*! ZSTD_decodeLiteralsBlock() :
-	@return : nb of bytes read from src (< srcSize ) */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
-{
-	if (srcSize < MIN_CBLOCK_SIZE)
-		return ERROR(corruption_detected);
-
-	{
-		const BYTE *const istart = (const BYTE *)src;
-		symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
-
-		switch (litEncType) {
-		case set_repeat:
-			if (dctx->litEntropy == 0)
-				return ERROR(dictionary_corrupted);
-			fallthrough;
-		case set_compressed:
-			if (srcSize < 5)
-				return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
-			{
-				size_t lhSize, litSize, litCSize;
-				U32 singleStream = 0;
-				U32 const lhlCode = (istart[0] >> 2) & 3;
-				U32 const lhc = ZSTD_readLE32(istart);
-				switch (lhlCode) {
-				case 0:
-				case 1:
-				default: /* note : default is impossible, since lhlCode into [0..3] */
-					/* 2 - 2 - 10 - 10 */
-					singleStream = !lhlCode;
-					lhSize = 3;
-					litSize = (lhc >> 4) & 0x3FF;
-					litCSize = (lhc >> 14) & 0x3FF;
-					break;
-				case 2:
-					/* 2 - 2 - 14 - 14 */
-					lhSize = 4;
-					litSize = (lhc >> 4) & 0x3FFF;
-					litCSize = lhc >> 18;
-					break;
-				case 3:
-					/* 2 - 2 - 18 - 18 */
-					lhSize = 5;
-					litSize = (lhc >> 4) & 0x3FFFF;
-					litCSize = (lhc >> 22) + (istart[4] << 10);
-					break;
-				}
-				if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-					return ERROR(corruption_detected);
-				if (litCSize + lhSize > srcSize)
-					return ERROR(corruption_detected);
-
-				if (HUF_isError(
-					(litEncType == set_repeat)
-					    ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)
-							    : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr))
-					    : (singleStream
-						   ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-										 dctx->entropy.workspace, sizeof(dctx->entropy.workspace))
-						   : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-										   dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
-					return ERROR(corruption_detected);
-
-				dctx->litPtr = dctx->litBuffer;
-				dctx->litSize = litSize;
-				dctx->litEntropy = 1;
-				if (litEncType == set_compressed)
-					dctx->HUFptr = dctx->entropy.hufTable;
-				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-				return litCSize + lhSize;
-			}
-
-		case set_basic: {
-			size_t litSize, lhSize;
-			U32 const lhlCode = ((istart[0]) >> 2) & 3;
-			switch (lhlCode) {
-			case 0:
-			case 2:
-			default: /* note : default is impossible, since lhlCode into [0..3] */
-				lhSize = 1;
-				litSize = istart[0] >> 3;
-				break;
-			case 1:
-				lhSize = 2;
-				litSize = ZSTD_readLE16(istart) >> 4;
-				break;
-			case 3:
-				lhSize = 3;
-				litSize = ZSTD_readLE24(istart) >> 4;
-				break;
-			}
-
-			if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
-				if (litSize + lhSize > srcSize)
-					return ERROR(corruption_detected);
-				memcpy(dctx->litBuffer, istart + lhSize, litSize);
-				dctx->litPtr = dctx->litBuffer;
-				dctx->litSize = litSize;
-				memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-				return lhSize + litSize;
-			}
-			/* direct reference into compressed stream */
-			dctx->litPtr = istart + lhSize;
-			dctx->litSize = litSize;
-			return lhSize + litSize;
-		}
-
-		case set_rle: {
-			U32 const lhlCode = ((istart[0]) >> 2) & 3;
-			size_t litSize, lhSize;
-			switch (lhlCode) {
-			case 0:
-			case 2:
-			default: /* note : default is impossible, since lhlCode into [0..3] */
-				lhSize = 1;
-				litSize = istart[0] >> 3;
-				break;
-			case 1:
-				lhSize = 2;
-				litSize = ZSTD_readLE16(istart) >> 4;
-				break;
-			case 3:
-				lhSize = 3;
-				litSize = ZSTD_readLE24(istart) >> 4;
-				if (srcSize < 4)
-					return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
-				break;
-			}
-			if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-				return ERROR(corruption_detected);
-			memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
-			dctx->litPtr = dctx->litBuffer;
-			dctx->litSize = litSize;
-			return lhSize + 1;
-		}
-		default:
-			return ERROR(corruption_detected); /* impossible */
-		}
-	}
-}
-
-typedef union {
-	FSE_decode_t realData;
-	U32 alignedBy4;
-} FSE_decode_t4;
-
-static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = {
-    {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 4}},		 /* 0 : base, symbol, bits */
-    {{16, 0, 4}},
-    {{32, 1, 5}},
-    {{0, 3, 5}},
-    {{0, 4, 5}},
-    {{0, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 5}},
-    {{0, 10, 5}},
-    {{0, 12, 5}},
-    {{0, 14, 6}},
-    {{0, 16, 5}},
-    {{0, 18, 5}},
-    {{0, 19, 5}},
-    {{0, 21, 5}},
-    {{0, 22, 5}},
-    {{0, 24, 5}},
-    {{32, 25, 5}},
-    {{0, 26, 5}},
-    {{0, 27, 6}},
-    {{0, 29, 6}},
-    {{0, 31, 6}},
-    {{32, 0, 4}},
-    {{0, 1, 4}},
-    {{0, 2, 5}},
-    {{32, 4, 5}},
-    {{0, 5, 5}},
-    {{32, 7, 5}},
-    {{0, 8, 5}},
-    {{32, 10, 5}},
-    {{0, 11, 5}},
-    {{0, 13, 6}},
-    {{32, 16, 5}},
-    {{0, 17, 5}},
-    {{32, 19, 5}},
-    {{0, 20, 5}},
-    {{32, 22, 5}},
-    {{0, 23, 5}},
-    {{0, 25, 4}},
-    {{16, 25, 4}},
-    {{32, 26, 5}},
-    {{0, 28, 6}},
-    {{0, 30, 6}},
-    {{48, 0, 4}},
-    {{16, 1, 4}},
-    {{32, 2, 5}},
-    {{32, 3, 5}},
-    {{32, 5, 5}},
-    {{32, 6, 5}},
-    {{32, 8, 5}},
-    {{32, 9, 5}},
-    {{32, 11, 5}},
-    {{32, 12, 5}},
-    {{0, 15, 6}},
-    {{32, 17, 5}},
-    {{32, 18, 5}},
-    {{32, 20, 5}},
-    {{32, 21, 5}},
-    {{32, 23, 5}},
-    {{32, 24, 5}},
-    {{0, 35, 6}},
-    {{0, 34, 6}},
-    {{0, 33, 6}},
-    {{0, 32, 6}},
-}; /* LL_defaultDTable */
-
-static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = {
-    {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 6}},		 /* 0 : base, symbol, bits */
-    {{0, 1, 4}},
-    {{32, 2, 5}},
-    {{0, 3, 5}},
-    {{0, 5, 5}},
-    {{0, 6, 5}},
-    {{0, 8, 5}},
-    {{0, 10, 6}},
-    {{0, 13, 6}},
-    {{0, 16, 6}},
-    {{0, 19, 6}},
-    {{0, 22, 6}},
-    {{0, 25, 6}},
-    {{0, 28, 6}},
-    {{0, 31, 6}},
-    {{0, 33, 6}},
-    {{0, 35, 6}},
-    {{0, 37, 6}},
-    {{0, 39, 6}},
-    {{0, 41, 6}},
-    {{0, 43, 6}},
-    {{0, 45, 6}},
-    {{16, 1, 4}},
-    {{0, 2, 4}},
-    {{32, 3, 5}},
-    {{0, 4, 5}},
-    {{32, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 6}},
-    {{0, 12, 6}},
-    {{0, 15, 6}},
-    {{0, 18, 6}},
-    {{0, 21, 6}},
-    {{0, 24, 6}},
-    {{0, 27, 6}},
-    {{0, 30, 6}},
-    {{0, 32, 6}},
-    {{0, 34, 6}},
-    {{0, 36, 6}},
-    {{0, 38, 6}},
-    {{0, 40, 6}},
-    {{0, 42, 6}},
-    {{0, 44, 6}},
-    {{32, 1, 4}},
-    {{48, 1, 4}},
-    {{16, 2, 4}},
-    {{32, 4, 5}},
-    {{32, 5, 5}},
-    {{32, 7, 5}},
-    {{32, 8, 5}},
-    {{0, 11, 6}},
-    {{0, 14, 6}},
-    {{0, 17, 6}},
-    {{0, 20, 6}},
-    {{0, 23, 6}},
-    {{0, 26, 6}},
-    {{0, 29, 6}},
-    {{0, 52, 6}},
-    {{0, 51, 6}},
-    {{0, 50, 6}},
-    {{0, 49, 6}},
-    {{0, 48, 6}},
-    {{0, 47, 6}},
-    {{0, 46, 6}},
-}; /* ML_defaultDTable */
-
-static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = {
-    {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 5}},		 /* 0 : base, symbol, bits */
-    {{0, 6, 4}},
-    {{0, 9, 5}},
-    {{0, 15, 5}},
-    {{0, 21, 5}},
-    {{0, 3, 5}},
-    {{0, 7, 4}},
-    {{0, 12, 5}},
-    {{0, 18, 5}},
-    {{0, 23, 5}},
-    {{0, 5, 5}},
-    {{0, 8, 4}},
-    {{0, 14, 5}},
-    {{0, 20, 5}},
-    {{0, 2, 5}},
-    {{16, 7, 4}},
-    {{0, 11, 5}},
-    {{0, 17, 5}},
-    {{0, 22, 5}},
-    {{0, 4, 5}},
-    {{16, 8, 4}},
-    {{0, 13, 5}},
-    {{0, 19, 5}},
-    {{0, 1, 5}},
-    {{16, 6, 4}},
-    {{0, 10, 5}},
-    {{0, 16, 5}},
-    {{0, 28, 5}},
-    {{0, 27, 5}},
-    {{0, 26, 5}},
-    {{0, 25, 5}},
-    {{0, 24, 5}},
-}; /* OF_defaultDTable */
-
-/*! ZSTD_buildSeqTable() :
-	@return : nb bytes read from src,
-			  or an error code if it fails, testable with ZSTD_isError()
-*/
-static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src,
-				 size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize)
-{
-	const void *const tmpPtr = defaultTable; /* bypass strict aliasing */
-	switch (type) {
-	case set_rle:
-		if (!srcSize)
-			return ERROR(srcSize_wrong);
-		if ((*(const BYTE *)src) > max)
-			return ERROR(corruption_detected);
-		FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src);
-		*DTablePtr = DTableSpace;
-		return 1;
-	case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0;
-	case set_repeat:
-		if (!flagRepeatTable)
-			return ERROR(corruption_detected);
-		return 0;
-	default: /* impossible */
-	case set_compressed: {
-		U32 tableLog;
-		S16 *norm = (S16 *)workspace;
-		size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-		if ((spaceUsed32 << 2) > workspaceSize)
-			return ERROR(GENERIC);
-		workspace = (U32 *)workspace + spaceUsed32;
-		workspaceSize -= (spaceUsed32 << 2);
-		{
-			size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-			if (FSE_isError(headerSize))
-				return ERROR(corruption_detected);
-			if (tableLog > maxLog)
-				return ERROR(corruption_detected);
-			FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize);
-			*DTablePtr = DTableSpace;
-			return headerSize;
-		}
-	}
-	}
-}
-
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize)
-{
-	const BYTE *const istart = (const BYTE *const)src;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *ip = istart;
-
-	/* check */
-	if (srcSize < MIN_SEQUENCES_SIZE)
-		return ERROR(srcSize_wrong);
-
-	/* SeqHead */
-	{
-		int nbSeq = *ip++;
-		if (!nbSeq) {
-			*nbSeqPtr = 0;
-			return 1;
-		}
-		if (nbSeq > 0x7F) {
-			if (nbSeq == 0xFF) {
-				if (ip + 2 > iend)
-					return ERROR(srcSize_wrong);
-				nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2;
-			} else {
-				if (ip >= iend)
-					return ERROR(srcSize_wrong);
-				nbSeq = ((nbSeq - 0x80) << 8) + *ip++;
-			}
-		}
-		*nbSeqPtr = nbSeq;
-	}
-
-	/* FSE table descriptors */
-	if (ip + 4 > iend)
-		return ERROR(srcSize_wrong); /* minimum possible size */
-	{
-		symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-		symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-		symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
-		ip++;
-
-		/* Build DTables */
-		{
-			size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip,
-								  LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(llhSize))
-				return ERROR(corruption_detected);
-			ip += llhSize;
-		}
-		{
-			size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip,
-								  OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(ofhSize))
-				return ERROR(corruption_detected);
-			ip += ofhSize;
-		}
-		{
-			size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip,
-								  ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-			if (ZSTD_isError(mlhSize))
-				return ERROR(corruption_detected);
-			ip += mlhSize;
-		}
-	}
-
-	return ip - istart;
-}
-
-typedef struct {
-	size_t litLength;
-	size_t matchLength;
-	size_t offset;
-	const BYTE *match;
-} seq_t;
-
-typedef struct {
-	BIT_DStream_t DStream;
-	FSE_DState_t stateLL;
-	FSE_DState_t stateOffb;
-	FSE_DState_t stateML;
-	size_t prevOffset[ZSTD_REP_NUM];
-	const BYTE *base;
-	size_t pos;
-	uPtrDiff gotoDict;
-} seqState_t;
-
-FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			      const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = oLitEnd - sequence.offset;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd <= oend_w)
-		return ERROR(GENERIC); /* Precondition */
-
-	/* copy literals */
-	if (op < oend_w) {
-		ZSTD_wildcopy(op, *litPtr, oend_w - op);
-		*litPtr += oend_w - op;
-		op = oend_w;
-	}
-	while (op < oLitEnd)
-		*op++ = *(*litPtr)++;
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		match = dictEnd - (base - match);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-		}
-	}
-	while (op < oMatchEnd)
-		*op++ = *match++;
-	return sequenceLength;
-}
-
-static seq_t ZSTD_decodeSequence(seqState_t *seqState)
-{
-	seq_t seq;
-
-	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-	U32 const llBits = LL_bits[llCode];
-	U32 const mlBits = ML_bits[mlCode];
-	U32 const ofBits = ofCode;
-	U32 const totalBits = llBits + mlBits + ofBits;
-
-	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-	/* sequence */
-	{
-		size_t offset;
-		if (!ofCode)
-			offset = 0;
-		else {
-			offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-			if (ZSTD_32bits())
-				BIT_reloadDStream(&seqState->DStream);
-		}
-
-		if (ofCode <= 1) {
-			offset += (llCode == 0);
-			if (offset) {
-				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-				if (offset != 1)
-					seqState->prevOffset[2] = seqState->prevOffset[1];
-				seqState->prevOffset[1] = seqState->prevOffset[0];
-				seqState->prevOffset[0] = offset = temp;
-			} else {
-				offset = seqState->prevOffset[0];
-			}
-		} else {
-			seqState->prevOffset[2] = seqState->prevOffset[1];
-			seqState->prevOffset[1] = seqState->prevOffset[0];
-			seqState->prevOffset[0] = offset;
-		}
-		seq.offset = offset;
-	}
-
-	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() && (mlBits + llBits > 24))
-		BIT_reloadDStream(&seqState->DStream);
-
-	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-		BIT_reloadDStream(&seqState->DStream);
-
-	/* ANS state update */
-	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-	if (ZSTD_32bits())
-		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
-	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-	seq.match = NULL;
-
-	return seq;
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			 const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = oLitEnd - sequence.offset;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd > oend_w)
-		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-	/* copy Literals */
-	ZSTD_copy8(op, *litPtr);
-	if (sequence.litLength > 8)
-		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-	op = oLitEnd;
-	*litPtr = iLitEnd; /* update for next sequence */
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		match = dictEnd + (match - base);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-			if (op > oend_w || sequence.matchLength < MINMATCH) {
-				U32 i;
-				for (i = 0; i < sequence.matchLength; ++i)
-					op[i] = match[i];
-				return sequenceLength;
-			}
-		}
-	}
-	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-	/* match within prefix */
-	if (sequence.offset < 8) {
-		/* close range match, overlap */
-		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-		int const sub2 = dec64table[sequence.offset];
-		op[0] = match[0];
-		op[1] = match[1];
-		op[2] = match[2];
-		op[3] = match[3];
-		match += dec32table[sequence.offset];
-		ZSTD_copy4(op + 4, match);
-		match -= sub2;
-	} else {
-		ZSTD_copy8(op, match);
-	}
-	op += 8;
-	match += 8;
-
-	if (oMatchEnd > oend - (16 - MINMATCH)) {
-		if (op < oend_w) {
-			ZSTD_wildcopy(op, match, oend_w - op);
-			match += oend_w - op;
-			op = oend_w;
-		}
-		while (op < oMatchEnd)
-			*op++ = *match++;
-	} else {
-		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-	}
-	return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-	const BYTE *ip = (const BYTE *)seqStart;
-	const BYTE *const iend = ip + seqSize;
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + maxDstSize;
-	BYTE *op = ostart;
-	const BYTE *litPtr = dctx->litPtr;
-	const BYTE *const litEnd = litPtr + dctx->litSize;
-	const BYTE *const base = (const BYTE *)(dctx->base);
-	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-	int nbSeq;
-
-	/* Build Decoding Tables */
-	{
-		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-		if (ZSTD_isError(seqHSize))
-			return seqHSize;
-		ip += seqHSize;
-	}
-
-	/* Regen sequences */
-	if (nbSeq) {
-		seqState_t seqState;
-		dctx->fseEntropy = 1;
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				seqState.prevOffset[i] = dctx->entropy.rep[i];
-		}
-		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) {
-			nbSeq--;
-			{
-				seq_t const sequence = ZSTD_decodeSequence(&seqState);
-				size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
-				if (ZSTD_isError(oneSeqSize))
-					return oneSeqSize;
-				op += oneSeqSize;
-			}
-		}
-
-		/* check if reached exact end */
-		if (nbSeq)
-			return ERROR(corruption_detected);
-		/* save reps for next block */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-		}
-	}
-
-	/* last literal segment */
-	{
-		size_t const lastLLSize = litEnd - litPtr;
-		if (lastLLSize > (size_t)(oend - op))
-			return ERROR(dstSize_tooSmall);
-		memcpy(op, litPtr, lastLLSize);
-		op += lastLLSize;
-	}
-
-	return op - ostart;
-}
-
-FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets)
-{
-	seq_t seq;
-
-	U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-	U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-	U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-	U32 const llBits = LL_bits[llCode];
-	U32 const mlBits = ML_bits[mlCode];
-	U32 const ofBits = ofCode;
-	U32 const totalBits = llBits + mlBits + ofBits;
-
-	static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-					       20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-	static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-					       21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-					       43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-	static const U32 OF_base[MaxOff + 1] = {0,       1,	1,	5,	0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-						0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-						0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-	/* sequence */
-	{
-		size_t offset;
-		if (!ofCode)
-			offset = 0;
-		else {
-			if (longOffsets) {
-				int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
-				offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-				if (ZSTD_32bits() || extraBits)
-					BIT_reloadDStream(&seqState->DStream);
-				if (extraBits)
-					offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-			} else {
-				offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-				if (ZSTD_32bits())
-					BIT_reloadDStream(&seqState->DStream);
-			}
-		}
-
-		if (ofCode <= 1) {
-			offset += (llCode == 0);
-			if (offset) {
-				size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-				temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-				if (offset != 1)
-					seqState->prevOffset[2] = seqState->prevOffset[1];
-				seqState->prevOffset[1] = seqState->prevOffset[0];
-				seqState->prevOffset[0] = offset = temp;
-			} else {
-				offset = seqState->prevOffset[0];
-			}
-		} else {
-			seqState->prevOffset[2] = seqState->prevOffset[1];
-			seqState->prevOffset[1] = seqState->prevOffset[0];
-			seqState->prevOffset[0] = offset;
-		}
-		seq.offset = offset;
-	}
-
-	seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() && (mlBits + llBits > 24))
-		BIT_reloadDStream(&seqState->DStream);
-
-	seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-	if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-		BIT_reloadDStream(&seqState->DStream);
-
-	{
-		size_t const pos = seqState->pos + seq.litLength;
-		seq.match = seqState->base + pos - seq.offset; /* single memory segment */
-		if (seq.offset > pos)
-			seq.match += seqState->gotoDict; /* separate memory segment */
-		seqState->pos = pos + seq.matchLength;
-	}
-
-	/* ANS state update */
-	FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-	FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-	if (ZSTD_32bits())
-		BIT_reloadDStream(&seqState->DStream);		   /* <= 18 bits */
-	FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-	return seq;
-}
-
-static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize)
-{
-	if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
-		return ZSTD_decodeSequenceLong_generic(seqState, 1);
-	} else {
-		return ZSTD_decodeSequenceLong_generic(seqState, 0);
-	}
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-			     const BYTE *const vBase, const BYTE *const dictEnd)
-{
-	BYTE *const oLitEnd = op + sequence.litLength;
-	size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-	BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-	BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-	const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-	const BYTE *match = sequence.match;
-
-	/* check */
-	if (oMatchEnd > oend)
-		return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-	if (iLitEnd > litLimit)
-		return ERROR(corruption_detected); /* over-read beyond lit buffer */
-	if (oLitEnd > oend_w)
-		return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-	/* copy Literals */
-	ZSTD_copy8(op, *litPtr);
-	if (sequence.litLength > 8)
-		ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-			      sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-	op = oLitEnd;
-	*litPtr = iLitEnd; /* update for next sequence */
-
-	/* copy Match */
-	if (sequence.offset > (size_t)(oLitEnd - base)) {
-		/* offset beyond prefix */
-		if (sequence.offset > (size_t)(oLitEnd - vBase))
-			return ERROR(corruption_detected);
-		if (match + sequence.matchLength <= dictEnd) {
-			memmove(oLitEnd, match, sequence.matchLength);
-			return sequenceLength;
-		}
-		/* span extDict & currPrefixSegment */
-		{
-			size_t const length1 = dictEnd - match;
-			memmove(oLitEnd, match, length1);
-			op = oLitEnd + length1;
-			sequence.matchLength -= length1;
-			match = base;
-			if (op > oend_w || sequence.matchLength < MINMATCH) {
-				U32 i;
-				for (i = 0; i < sequence.matchLength; ++i)
-					op[i] = match[i];
-				return sequenceLength;
-			}
-		}
-	}
-	/* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-	/* match within prefix */
-	if (sequence.offset < 8) {
-		/* close range match, overlap */
-		static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-		static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-		int const sub2 = dec64table[sequence.offset];
-		op[0] = match[0];
-		op[1] = match[1];
-		op[2] = match[2];
-		op[3] = match[3];
-		match += dec32table[sequence.offset];
-		ZSTD_copy4(op + 4, match);
-		match -= sub2;
-	} else {
-		ZSTD_copy8(op, match);
-	}
-	op += 8;
-	match += 8;
-
-	if (oMatchEnd > oend - (16 - MINMATCH)) {
-		if (op < oend_w) {
-			ZSTD_wildcopy(op, match, oend_w - op);
-			match += oend_w - op;
-			op = oend_w;
-		}
-		while (op < oMatchEnd)
-			*op++ = *match++;
-	} else {
-		ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-	}
-	return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-	const BYTE *ip = (const BYTE *)seqStart;
-	const BYTE *const iend = ip + seqSize;
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + maxDstSize;
-	BYTE *op = ostart;
-	const BYTE *litPtr = dctx->litPtr;
-	const BYTE *const litEnd = litPtr + dctx->litSize;
-	const BYTE *const base = (const BYTE *)(dctx->base);
-	const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-	const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-	unsigned const windowSize = dctx->fParams.windowSize;
-	int nbSeq;
-
-	/* Build Decoding Tables */
-	{
-		size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-		if (ZSTD_isError(seqHSize))
-			return seqHSize;
-		ip += seqHSize;
-	}
-
-	/* Regen sequences */
-	if (nbSeq) {
-#define STORED_SEQS 4
-#define STOSEQ_MASK (STORED_SEQS - 1)
-#define ADVANCED_SEQS 4
-		seq_t *sequences = (seq_t *)dctx->entropy.workspace;
-		int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
-		seqState_t seqState;
-		int seqNb;
-		ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS);
-		dctx->fseEntropy = 1;
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				seqState.prevOffset[i] = dctx->entropy.rep[i];
-		}
-		seqState.base = base;
-		seqState.pos = (size_t)(op - base);
-		seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
-		CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-		FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-		FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-		FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-		/* prepare in advance */
-		for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) {
-			sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
-		}
-		if (seqNb < seqAdvance)
-			return ERROR(corruption_detected);
-
-		/* decode and decompress */
-		for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) {
-			seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
-			size_t const oneSeqSize =
-			    ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-			if (ZSTD_isError(oneSeqSize))
-				return oneSeqSize;
-			ZSTD_PREFETCH(sequence.match);
-			sequences[seqNb & STOSEQ_MASK] = sequence;
-			op += oneSeqSize;
-		}
-		if (seqNb < nbSeq)
-			return ERROR(corruption_detected);
-
-		/* finish queue */
-		seqNb -= seqAdvance;
-		for (; seqNb < nbSeq; seqNb++) {
-			size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-			if (ZSTD_isError(oneSeqSize))
-				return oneSeqSize;
-			op += oneSeqSize;
-		}
-
-		/* save reps for next block */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-		}
-	}
-
-	/* last literal segment */
-	{
-		size_t const lastLLSize = litEnd - litPtr;
-		if (lastLLSize > (size_t)(oend - op))
-			return ERROR(dstSize_tooSmall);
-		memcpy(op, litPtr, lastLLSize);
-		op += lastLLSize;
-	}
-
-	return op - ostart;
-}
-
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{ /* blockType == blockCompressed */
-	const BYTE *ip = (const BYTE *)src;
-
-	if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-		return ERROR(srcSize_wrong);
-
-	/* Decode literals section */
-	{
-		size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-		if (ZSTD_isError(litCSize))
-			return litCSize;
-		ip += litCSize;
-		srcSize -= litCSize;
-	}
-	if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
-				/* likely because of register pressure */
-				/* if that's the correct cause, then 32-bits ARM should be affected differently */
-				/* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
-		if (dctx->fParams.windowSize > (1 << 23))
-			return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
-	return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
-}
-
-static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst)
-{
-	if (dst != dctx->previousDstEnd) { /* not contiguous */
-		dctx->dictEnd = dctx->previousDstEnd;
-		dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-		dctx->base = dst;
-		dctx->previousDstEnd = dst;
-	}
-}
-
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t dSize;
-	ZSTD_checkContinuity(dctx, dst);
-	dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
-	dctx->previousDstEnd = (char *)dst + dSize;
-	return dSize;
-}
-
-/** ZSTD_insertBlock() :
-	insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize)
-{
-	ZSTD_checkContinuity(dctx, blockStart);
-	dctx->previousDstEnd = (const char *)blockStart + blockSize;
-	return blockSize;
-}
-
-size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length)
-{
-	if (length > dstCapacity)
-		return ERROR(dstSize_tooSmall);
-	memset(dst, byte, length);
-	return length;
-}
-
-/** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
-{
-	if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-		return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4);
-	} else {
-		const BYTE *ip = (const BYTE *)src;
-		const BYTE *const ipstart = ip;
-		size_t remainingSize = srcSize;
-		ZSTD_frameParams fParams;
-
-		size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
-		if (ZSTD_isError(headerSize))
-			return headerSize;
-
-		/* Frame Header */
-		{
-			size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize);
-			if (ZSTD_isError(ret))
-				return ret;
-			if (ret > 0)
-				return ERROR(srcSize_wrong);
-		}
-
-		ip += headerSize;
-		remainingSize -= headerSize;
-
-		/* Loop on each block */
-		while (1) {
-			blockProperties_t blockProperties;
-			size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-			if (ZSTD_isError(cBlockSize))
-				return cBlockSize;
-
-			if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
-				return ERROR(srcSize_wrong);
-
-			ip += ZSTD_blockHeaderSize + cBlockSize;
-			remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
-
-			if (blockProperties.lastBlock)
-				break;
-		}
-
-		if (fParams.checksumFlag) { /* Frame content checksum */
-			if (remainingSize < 4)
-				return ERROR(srcSize_wrong);
-			ip += 4;
-			remainingSize -= 4;
-		}
-
-		return ip - ipstart;
-	}
-}
-
-/*! ZSTD_decompressFrame() :
-*   @dctx must be properly initialized */
-static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr)
-{
-	const BYTE *ip = (const BYTE *)(*srcPtr);
-	BYTE *const ostart = (BYTE * const)dst;
-	BYTE *const oend = ostart + dstCapacity;
-	BYTE *op = ostart;
-	size_t remainingSize = *srcSizePtr;
-
-	/* check */
-	if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize)
-		return ERROR(srcSize_wrong);
-
-	/* Frame Header */
-	{
-		size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
-		if (ZSTD_isError(frameHeaderSize))
-			return frameHeaderSize;
-		if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize)
-			return ERROR(srcSize_wrong);
-		CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
-		ip += frameHeaderSize;
-		remainingSize -= frameHeaderSize;
-	}
-
-	/* Loop on each block */
-	while (1) {
-		size_t decodedSize;
-		blockProperties_t blockProperties;
-		size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-		if (ZSTD_isError(cBlockSize))
-			return cBlockSize;
-
-		ip += ZSTD_blockHeaderSize;
-		remainingSize -= ZSTD_blockHeaderSize;
-		if (cBlockSize > remainingSize)
-			return ERROR(srcSize_wrong);
-
-		switch (blockProperties.blockType) {
-		case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break;
-		case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break;
-		case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break;
-		case bt_reserved:
-		default: return ERROR(corruption_detected);
-		}
-
-		if (ZSTD_isError(decodedSize))
-			return decodedSize;
-		if (dctx->fParams.checksumFlag)
-			xxh64_update(&dctx->xxhState, op, decodedSize);
-		op += decodedSize;
-		ip += cBlockSize;
-		remainingSize -= cBlockSize;
-		if (blockProperties.lastBlock)
-			break;
-	}
-
-	if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
-		U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
-		U32 checkRead;
-		if (remainingSize < 4)
-			return ERROR(checksum_wrong);
-		checkRead = ZSTD_readLE32(ip);
-		if (checkRead != checkCalc)
-			return ERROR(checksum_wrong);
-		ip += 4;
-		remainingSize -= 4;
-	}
-
-	/* Allow caller to get size read */
-	*srcPtr = ip;
-	*srcSizePtr = remainingSize;
-	return op - ostart;
-}
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict);
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict);
-
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-					const ZSTD_DDict *ddict)
-{
-	void *const dststart = dst;
-
-	if (ddict) {
-		if (dict) {
-			/* programmer error, these two cases should be mutually exclusive */
-			return ERROR(GENERIC);
-		}
-
-		dict = ZSTD_DDictDictContent(ddict);
-		dictSize = ZSTD_DDictDictSize(ddict);
-	}
-
-	while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-		U32 magicNumber;
-
-		magicNumber = ZSTD_readLE32(src);
-		if (magicNumber != ZSTD_MAGICNUMBER) {
-			if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-				size_t skippableSize;
-				if (srcSize < ZSTD_skippableHeaderSize)
-					return ERROR(srcSize_wrong);
-				skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-				if (srcSize < skippableSize) {
-					return ERROR(srcSize_wrong);
-				}
-
-				src = (const BYTE *)src + skippableSize;
-				srcSize -= skippableSize;
-				continue;
-			} else {
-				return ERROR(prefix_unknown);
-			}
-		}
-
-		if (ddict) {
-			/* we were called from ZSTD_decompress_usingDDict */
-			ZSTD_refDDict(dctx, ddict);
-		} else {
-			/* this will initialize correctly with no dict if dict == NULL, so
-			 * use this in all cases but ddict */
-			CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
-		}
-		ZSTD_checkContinuity(dctx, dst);
-
-		{
-			const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize);
-			if (ZSTD_isError(res))
-				return res;
-			/* don't need to bounds check this, ZSTD_decompressFrame will have
-			 * already */
-			dst = (BYTE *)dst + res;
-			dstCapacity -= res;
-		}
-	}
-
-	if (srcSize)
-		return ERROR(srcSize_wrong); /* input not entirely consumed */
-
-	return (BYTE *)dst - (BYTE *)dststart;
-}
-
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize)
-{
-	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
-}
-
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
-}
-
-/*-**************************************
-*   Advanced Streaming Decompression API
-*   Bufferless and synchronous
-****************************************/
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; }
-
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx)
-{
-	switch (dctx->stage) {
-	default: /* should not happen */
-	case ZSTDds_getFrameHeaderSize:
-	case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader;
-	case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader;
-	case ZSTDds_decompressBlock: return ZSTDnit_block;
-	case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock;
-	case ZSTDds_checkChecksum: return ZSTDnit_checksum;
-	case ZSTDds_decodeSkippableHeader:
-	case ZSTDds_skipFrame: return ZSTDnit_skippableFrame;
-	}
-}
-
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */
-
-/** ZSTD_decompressContinue() :
-*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	/* Sanity check */
-	if (srcSize != dctx->expected)
-		return ERROR(srcSize_wrong);
-	if (dstCapacity)
-		ZSTD_checkContinuity(dctx, dst);
-
-	switch (dctx->stage) {
-	case ZSTDds_getFrameHeaderSize:
-		if (srcSize != ZSTD_frameHeaderSize_prefix)
-			return ERROR(srcSize_wrong);					/* impossible */
-		if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
-			memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-			dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */
-			dctx->stage = ZSTDds_decodeSkippableHeader;
-			return 0;
-		}
-		dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
-		if (ZSTD_isError(dctx->headerSize))
-			return dctx->headerSize;
-		memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-		if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) {
-			dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix;
-			dctx->stage = ZSTDds_decodeFrameHeader;
-			return 0;
-		}
-		dctx->expected = 0; /* not necessary to copy more */
-		fallthrough;
-
-	case ZSTDds_decodeFrameHeader:
-		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-		CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
-		dctx->expected = ZSTD_blockHeaderSize;
-		dctx->stage = ZSTDds_decodeBlockHeader;
-		return 0;
-
-	case ZSTDds_decodeBlockHeader: {
-		blockProperties_t bp;
-		size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-		if (ZSTD_isError(cBlockSize))
-			return cBlockSize;
-		dctx->expected = cBlockSize;
-		dctx->bType = bp.blockType;
-		dctx->rleSize = bp.origSize;
-		if (cBlockSize) {
-			dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
-			return 0;
-		}
-		/* empty block */
-		if (bp.lastBlock) {
-			if (dctx->fParams.checksumFlag) {
-				dctx->expected = 4;
-				dctx->stage = ZSTDds_checkChecksum;
-			} else {
-				dctx->expected = 0; /* end of frame */
-				dctx->stage = ZSTDds_getFrameHeaderSize;
-			}
-		} else {
-			dctx->expected = 3; /* go directly to next header */
-			dctx->stage = ZSTDds_decodeBlockHeader;
-		}
-		return 0;
-	}
-	case ZSTDds_decompressLastBlock:
-	case ZSTDds_decompressBlock: {
-		size_t rSize;
-		switch (dctx->bType) {
-		case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break;
-		case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break;
-		case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break;
-		case bt_reserved: /* should never happen */
-		default: return ERROR(corruption_detected);
-		}
-		if (ZSTD_isError(rSize))
-			return rSize;
-		if (dctx->fParams.checksumFlag)
-			xxh64_update(&dctx->xxhState, dst, rSize);
-
-		if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
-			if (dctx->fParams.checksumFlag) {	/* another round for frame checksum */
-				dctx->expected = 4;
-				dctx->stage = ZSTDds_checkChecksum;
-			} else {
-				dctx->expected = 0; /* ends here */
-				dctx->stage = ZSTDds_getFrameHeaderSize;
-			}
-		} else {
-			dctx->stage = ZSTDds_decodeBlockHeader;
-			dctx->expected = ZSTD_blockHeaderSize;
-			dctx->previousDstEnd = (char *)dst + rSize;
-		}
-		return rSize;
-	}
-	case ZSTDds_checkChecksum: {
-		U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
-		U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
-		if (check32 != h32)
-			return ERROR(checksum_wrong);
-		dctx->expected = 0;
-		dctx->stage = ZSTDds_getFrameHeaderSize;
-		return 0;
-	}
-	case ZSTDds_decodeSkippableHeader: {
-		memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-		dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4);
-		dctx->stage = ZSTDds_skipFrame;
-		return 0;
-	}
-	case ZSTDds_skipFrame: {
-		dctx->expected = 0;
-		dctx->stage = ZSTDds_getFrameHeaderSize;
-		return 0;
-	}
-	default:
-		return ERROR(GENERIC); /* impossible */
-	}
-}
-
-static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	dctx->dictEnd = dctx->previousDstEnd;
-	dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-	dctx->base = dict;
-	dctx->previousDstEnd = (const char *)dict + dictSize;
-	return 0;
-}
-
-/* ZSTD_loadEntropy() :
- * dict : must point at beginning of a valid zstd dictionary
- * @return : size of entropy tables read */
-static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize)
-{
-	const BYTE *dictPtr = (const BYTE *)dict;
-	const BYTE *const dictEnd = dictPtr + dictSize;
-
-	if (dictSize <= 8)
-		return ERROR(dictionary_corrupted);
-	dictPtr += 8; /* skip header = magic + dictID */
-
-	{
-		size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace));
-		if (HUF_isError(hSize))
-			return ERROR(dictionary_corrupted);
-		dictPtr += hSize;
-	}
-
-	{
-		short offcodeNCount[MaxOff + 1];
-		U32 offcodeMaxValue = MaxOff, offcodeLog;
-		size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(offcodeHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (offcodeLog > OffFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += offcodeHeaderSize;
-	}
-
-	{
-		short matchlengthNCount[MaxML + 1];
-		unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-		size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(matchlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (matchlengthLog > MLFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += matchlengthHeaderSize;
-	}
-
-	{
-		short litlengthNCount[MaxLL + 1];
-		unsigned litlengthMaxValue = MaxLL, litlengthLog;
-		size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-		if (FSE_isError(litlengthHeaderSize))
-			return ERROR(dictionary_corrupted);
-		if (litlengthLog > LLFSELog)
-			return ERROR(dictionary_corrupted);
-		CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-		dictPtr += litlengthHeaderSize;
-	}
-
-	if (dictPtr + 12 > dictEnd)
-		return ERROR(dictionary_corrupted);
-	{
-		int i;
-		size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12));
-		for (i = 0; i < 3; i++) {
-			U32 const rep = ZSTD_readLE32(dictPtr);
-			dictPtr += 4;
-			if (rep == 0 || rep >= dictContentSize)
-				return ERROR(dictionary_corrupted);
-			entropy->rep[i] = rep;
-		}
-	}
-
-	return dictPtr - (const BYTE *)dict;
-}
-
-static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	if (dictSize < 8)
-		return ZSTD_refDictContent(dctx, dict, dictSize);
-	{
-		U32 const magic = ZSTD_readLE32(dict);
-		if (magic != ZSTD_DICT_MAGIC) {
-			return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
-		}
-	}
-	dctx->dictID = ZSTD_readLE32((const char *)dict + 4);
-
-	/* load entropy tables */
-	{
-		size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
-		if (ZSTD_isError(eSize))
-			return ERROR(dictionary_corrupted);
-		dict = (const char *)dict + eSize;
-		dictSize -= eSize;
-	}
-	dctx->litEntropy = dctx->fseEntropy = 1;
-
-	/* reference dictionary content */
-	return ZSTD_refDictContent(dctx, dict, dictSize);
-}
-
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-	CHECK_F(ZSTD_decompressBegin(dctx));
-	if (dict && dictSize)
-		CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
-	return 0;
-}
-
-/* ======   ZSTD_DDict   ====== */
-
-struct ZSTD_DDict_s {
-	void *dictBuffer;
-	const void *dictContent;
-	size_t dictSize;
-	ZSTD_entropyTables_t entropy;
-	U32 dictID;
-	U32 entropyPresent;
-	ZSTD_customMem cMem;
-}; /* typedef'd to ZSTD_DDict within "zstd.h" */
-
-size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); }
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; }
-
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; }
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict)
-{
-	ZSTD_decompressBegin(dstDCtx); /* init */
-	if (ddict) {		       /* support refDDict on NULL */
-		dstDCtx->dictID = ddict->dictID;
-		dstDCtx->base = ddict->dictContent;
-		dstDCtx->vBase = ddict->dictContent;
-		dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize;
-		dstDCtx->previousDstEnd = dstDCtx->dictEnd;
-		if (ddict->entropyPresent) {
-			dstDCtx->litEntropy = 1;
-			dstDCtx->fseEntropy = 1;
-			dstDCtx->LLTptr = ddict->entropy.LLTable;
-			dstDCtx->MLTptr = ddict->entropy.MLTable;
-			dstDCtx->OFTptr = ddict->entropy.OFTable;
-			dstDCtx->HUFptr = ddict->entropy.hufTable;
-			dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
-			dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
-			dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
-		} else {
-			dstDCtx->litEntropy = 0;
-			dstDCtx->fseEntropy = 0;
-		}
-	}
-}
-
-static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict)
-{
-	ddict->dictID = 0;
-	ddict->entropyPresent = 0;
-	if (ddict->dictSize < 8)
-		return 0;
-	{
-		U32 const magic = ZSTD_readLE32(ddict->dictContent);
-		if (magic != ZSTD_DICT_MAGIC)
-			return 0; /* pure content mode */
-	}
-	ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4);
-
-	/* load entropy tables */
-	CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted);
-	ddict->entropyPresent = 1;
-	return 0;
-}
-
-static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
-{
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	{
-		ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-		if (!ddict)
-			return NULL;
-		ddict->cMem = customMem;
-
-		if ((byReference) || (!dict) || (!dictSize)) {
-			ddict->dictBuffer = NULL;
-			ddict->dictContent = dict;
-		} else {
-			void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-			if (!internalBuffer) {
-				ZSTD_freeDDict(ddict);
-				return NULL;
-			}
-			memcpy(internalBuffer, dict, dictSize);
-			ddict->dictBuffer = internalBuffer;
-			ddict->dictContent = internalBuffer;
-		}
-		ddict->dictSize = dictSize;
-		ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-		/* parse dictionary content */
-		{
-			size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
-			if (ZSTD_isError(errorCode)) {
-				ZSTD_freeDDict(ddict);
-				return NULL;
-			}
-		}
-
-		return ddict;
-	}
-}
-
-/*! ZSTD_initDDict() :
-*   Create a digested dictionary, to start decompression without startup delay.
-*   `dict` content is copied inside DDict.
-*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
-ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem);
-}
-
-size_t ZSTD_freeDDict(ZSTD_DDict *ddict)
-{
-	if (ddict == NULL)
-		return 0; /* support free on NULL */
-	{
-		ZSTD_customMem const cMem = ddict->cMem;
-		ZSTD_free(ddict->dictBuffer, cMem);
-		ZSTD_free(ddict, cMem);
-		return 0;
-	}
-}
-
-/*! ZSTD_getDictID_fromDict() :
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize)
-{
-	if (dictSize < 8)
-		return 0;
-	if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC)
-		return 0;
-	return ZSTD_readLE32((const char *)dict + 4);
-}
-
-/*! ZSTD_getDictID_fromDDict() :
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict)
-{
-	if (ddict == NULL)
-		return 0;
-	return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
-}
-
-/*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompressed the frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary to be decoded (most common case).
- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
-unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize)
-{
-	ZSTD_frameParams zfp = {0, 0, 0, 0};
-	size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize);
-	if (ZSTD_isError(hError))
-		return 0;
-	return zfp.dictID;
-}
-
-/*! ZSTD_decompress_usingDDict() :
-*   Decompression using a pre-digested Dictionary
-*   Use dictionary without significant overhead. */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict)
-{
-	/* pass content and size in case legacy frames are encountered */
-	return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict);
-}
-
-/*=====================================
-*   Streaming decompression
-*====================================*/
-
-typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-/* *** Resource management *** */
-struct ZSTD_DStream_s {
-	ZSTD_DCtx *dctx;
-	ZSTD_DDict *ddictLocal;
-	const ZSTD_DDict *ddict;
-	ZSTD_frameParams fParams;
-	ZSTD_dStreamStage stage;
-	char *inBuff;
-	size_t inBuffSize;
-	size_t inPos;
-	size_t maxWindowSize;
-	char *outBuff;
-	size_t outBuffSize;
-	size_t outStart;
-	size_t outEnd;
-	size_t blockSize;
-	BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */
-	size_t lhSize;
-	ZSTD_customMem customMem;
-	void *legacyContext;
-	U32 previousLegacyVersion;
-	U32 legacyVersion;
-	U32 hostageByte;
-}; /* typedef'd to ZSTD_DStream within "zstd.h" */
-
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize)
-{
-	size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-	size_t const inBuffSize = blockSize;
-	size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-	return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem)
-{
-	ZSTD_DStream *zds;
-
-	if (!customMem.customAlloc || !customMem.customFree)
-		return NULL;
-
-	zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
-	if (zds == NULL)
-		return NULL;
-	memset(zds, 0, sizeof(ZSTD_DStream));
-	memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
-	zds->dctx = ZSTD_createDCtx_advanced(customMem);
-	if (zds->dctx == NULL) {
-		ZSTD_freeDStream(zds);
-		return NULL;
-	}
-	zds->stage = zdss_init;
-	zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-	return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-	ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem);
-	if (!zds) {
-		return NULL;
-	}
-
-	zds->maxWindowSize = maxWindowSize;
-	zds->stage = zdss_loadHeader;
-	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-	ZSTD_freeDDict(zds->ddictLocal);
-	zds->ddictLocal = NULL;
-	zds->ddict = zds->ddictLocal;
-	zds->legacyVersion = 0;
-	zds->hostageByte = 0;
-
-	{
-		size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-		size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-
-		zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem);
-		zds->inBuffSize = blockSize;
-		zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem);
-		zds->outBuffSize = neededOutSize;
-		if (zds->inBuff == NULL || zds->outBuff == NULL) {
-			ZSTD_freeDStream(zds);
-			return NULL;
-		}
-	}
-	return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize)
-{
-	ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize);
-	if (zds) {
-		zds->ddict = ddict;
-	}
-	return zds;
-}
-
-size_t ZSTD_freeDStream(ZSTD_DStream *zds)
-{
-	if (zds == NULL)
-		return 0; /* support free on null */
-	{
-		ZSTD_customMem const cMem = zds->customMem;
-		ZSTD_freeDCtx(zds->dctx);
-		zds->dctx = NULL;
-		ZSTD_freeDDict(zds->ddictLocal);
-		zds->ddictLocal = NULL;
-		ZSTD_free(zds->inBuff, cMem);
-		zds->inBuff = NULL;
-		ZSTD_free(zds->outBuff, cMem);
-		zds->outBuff = NULL;
-		ZSTD_free(zds, cMem);
-		return 0;
-	}
-}
-
-/* *** Initialization *** */
-
-size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-
-size_t ZSTD_resetDStream(ZSTD_DStream *zds)
-{
-	zds->stage = zdss_loadHeader;
-	zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-	zds->legacyVersion = 0;
-	zds->hostageByte = 0;
-	return ZSTD_frameHeaderSize_prefix;
-}
-
-/* *****   Decompression   ***** */
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-	size_t const length = MIN(dstCapacity, srcSize);
-	memcpy(dst, src, length);
-	return length;
-}
-
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-	const char *const istart = (const char *)(input->src) + input->pos;
-	const char *const iend = (const char *)(input->src) + input->size;
-	const char *ip = istart;
-	char *const ostart = (char *)(output->dst) + output->pos;
-	char *const oend = (char *)(output->dst) + output->size;
-	char *op = ostart;
-	U32 someMoreWork = 1;
-
-	while (someMoreWork) {
-		switch (zds->stage) {
-		case zdss_init:
-			ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
-			fallthrough;
-
-		case zdss_loadHeader: {
-			size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
-			if (ZSTD_isError(hSize))
-				return hSize;
-			if (hSize != 0) {				   /* need more input */
-				size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
-				if (toLoad > (size_t)(iend - ip)) {	/* not enough input to load full header */
-					memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip);
-					zds->lhSize += iend - ip;
-					input->pos = input->size;
-					return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) +
-					       ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
-				}
-				memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad);
-				zds->lhSize = hSize;
-				ip += toLoad;
-				break;
-			}
-
-			/* check for single-pass mode opportunity */
-			if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
-			    && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) {
-				size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart);
-				if (cSize <= (size_t)(iend - istart)) {
-					size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict);
-					if (ZSTD_isError(decompressedSize))
-						return decompressedSize;
-					ip = istart + cSize;
-					op += decompressedSize;
-					zds->dctx->expected = 0;
-					zds->stage = zdss_init;
-					someMoreWork = 0;
-					break;
-				}
-			}
-
-			/* Consume header */
-			ZSTD_refDDict(zds->dctx, zds->ddict);
-			{
-				size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
-				CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
-				{
-					size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-					CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size));
-				}
-			}
-
-			zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-			if (zds->fParams.windowSize > zds->maxWindowSize)
-				return ERROR(frameParameter_windowTooLarge);
-
-			/* Buffers are preallocated, but double check */
-			{
-				size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-				size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-				if (zds->inBuffSize < blockSize) {
-					return ERROR(GENERIC);
-				}
-				if (zds->outBuffSize < neededOutSize) {
-					return ERROR(GENERIC);
-				}
-				zds->blockSize = blockSize;
-			}
-			zds->stage = zdss_read;
-		}
-			fallthrough;
-
-		case zdss_read: {
-			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-			if (neededInSize == 0) { /* end of frame */
-				zds->stage = zdss_init;
-				someMoreWork = 0;
-				break;
-			}
-			if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */
-				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart,
-										   (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize);
-				if (ZSTD_isError(decodedSize))
-					return decodedSize;
-				ip += neededInSize;
-				if (!decodedSize && !isSkipFrame)
-					break; /* this was just a header */
-				zds->outEnd = zds->outStart + decodedSize;
-				zds->stage = zdss_flush;
-				break;
-			}
-			if (ip == iend) {
-				someMoreWork = 0;
-				break;
-			} /* no more input */
-			zds->stage = zdss_load;
-			/* pass-through */
-		}
-			fallthrough;
-
-		case zdss_load: {
-			size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-			size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */
-			size_t loadedSize;
-			if (toLoad > zds->inBuffSize - zds->inPos)
-				return ERROR(corruption_detected); /* should never happen */
-			loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip);
-			ip += loadedSize;
-			zds->inPos += loadedSize;
-			if (loadedSize < toLoad) {
-				someMoreWork = 0;
-				break;
-			} /* not enough input, wait for more */
-
-			/* decode loaded input */
-			{
-				const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-				size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
-										   zds->inBuff, neededInSize);
-				if (ZSTD_isError(decodedSize))
-					return decodedSize;
-				zds->inPos = 0; /* input is consumed */
-				if (!decodedSize && !isSkipFrame) {
-					zds->stage = zdss_read;
-					break;
-				} /* this was just a header */
-				zds->outEnd = zds->outStart + decodedSize;
-				zds->stage = zdss_flush;
-				/* pass-through */
-			}
-		}
-			fallthrough;
-
-		case zdss_flush: {
-			size_t const toFlushSize = zds->outEnd - zds->outStart;
-			size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize);
-			op += flushedSize;
-			zds->outStart += flushedSize;
-			if (flushedSize == toFlushSize) { /* flush completed */
-				zds->stage = zdss_read;
-				if (zds->outStart + zds->blockSize > zds->outBuffSize)
-					zds->outStart = zds->outEnd = 0;
-				break;
-			}
-			/* cannot complete flush */
-			someMoreWork = 0;
-			break;
-		}
-		default:
-			return ERROR(GENERIC); /* impossible */
-		}
-	}
-
-	/* result */
-	input->pos += (size_t)(ip - istart);
-	output->pos += (size_t)(op - ostart);
-	{
-		size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-		if (!nextSrcSizeHint) {			    /* frame fully decoded */
-			if (zds->outEnd == zds->outStart) { /* output fully flushed */
-				if (zds->hostageByte) {
-					if (input->pos >= input->size) {
-						zds->stage = zdss_read;
-						return 1;
-					}	     /* can't release hostage (not present) */
-					input->pos++; /* release hostage */
-				}
-				return 0;
-			}
-			if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
-				input->pos--;    /* note : pos > 0, otherwise, impossible to finish reading last block */
-				zds->hostageByte = 1;
-			}
-			return 1;
-		}
-		nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */
-		if (zds->inPos > nextSrcSizeHint)
-			return ERROR(GENERIC); /* should never happen */
-		nextSrcSizeHint -= zds->inPos; /* already loaded*/
-		return nextSrcSizeHint;
-	}
-}
-
-unsigned int zstd_is_error(size_t code)
-{
-	return ZSTD_isError(code);
-}
-EXPORT_SYMBOL(zstd_is_error);
-
-zstd_error_code zstd_get_error_code(size_t code)
-{
-	return ZSTD_getErrorCode(code);
-}
-EXPORT_SYMBOL(zstd_get_error_code);
-
-const char *zstd_get_error_name(size_t code)
-{
-	/* Real implementation in zstd-1.4.6. */
-	return "GENERIC";
-}
-EXPORT_SYMBOL(zstd_get_error_name);
-
-size_t zstd_dctx_workspace_bound(void)
-{
-	return ZSTD_DCtxWorkspaceBound();
-}
-EXPORT_SYMBOL(zstd_dctx_workspace_bound);
-
-zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
-{
-	return ZSTD_initDCtx(workspace, workspace_size);
-}
-EXPORT_SYMBOL(zstd_init_dctx);
-
-size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
-	const void *src, size_t src_size)
-{
-	return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
-}
-EXPORT_SYMBOL(zstd_decompress_dctx);
-
-size_t zstd_dstream_workspace_bound(size_t max_window_size)
-{
-	return ZSTD_DStreamWorkspaceBound(max_window_size);
-}
-EXPORT_SYMBOL(zstd_dstream_workspace_bound);
-
-zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
-	size_t workspace_size)
-{
-	return ZSTD_initDStream(max_window_size, workspace, workspace_size);
-}
-EXPORT_SYMBOL(zstd_init_dstream);
-
-size_t zstd_reset_dstream(zstd_dstream *dstream)
-{
-	return ZSTD_resetDStream(dstream);
-}
-EXPORT_SYMBOL(zstd_reset_dstream);
-
-size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
-	zstd_in_buffer *input)
-{
-	return ZSTD_decompressStream(dstream, output, input);
-}
-EXPORT_SYMBOL(zstd_decompress_stream);
-
-size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
-{
-	return ZSTD_findFrameCompressedSize(src, src_size);
-}
-EXPORT_SYMBOL(zstd_find_frame_compressed_size);
-
-size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
-	size_t src_size)
-{
-	return ZSTD_getFrameParams(header, src, src_size);
-}
-EXPORT_SYMBOL(zstd_get_frame_header);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
new file mode 100644
index 000000000000..05570ed5f8be
--- /dev/null
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -0,0 +1,1206 @@
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"  /* BIT_* */
+#include "../common/fse.h"        /* to compress headers */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
+
+#else
+
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+/*
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
+
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }
+
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+
+
+
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+{
+    HUF_DEltX2 DElt;
+    U32* rankVal = wksp;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
+    /* get pre-calculated rankVal */
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
+
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+            rankVal[weight] += length;
+    }   }
+}
+
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+{
+    U32* rankVal = wksp;
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol, wksp, wkspSize);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
+
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
+
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
+
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = wksp->weightList[s];
+            U32 const r = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = wksp->rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
+                rankVal0[w] = curr;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY(
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+#endif
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+    }
+}
+
diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
new file mode 100644
index 000000000000..dbbc7919de53
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_decompress_internal.h"
+#include "zstd_ddict.h"
+
+
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
+};  /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID = ddict->dictID;
+    dctx->prefixStart = ddict->dictContent;
+    dctx->virtualStart = ddict->dictContent;
+    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent) {
+        dctx->litEntropy = 1;
+        dctx->fseEntropy = 1;
+        dctx->LLTptr = ddict->entropy.LLTable;
+        dctx->MLTptr = ddict->entropy.MLTable;
+        dctx->OFTptr = ddict->entropy.OFTable;
+        dctx->HUFptr = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    } else {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+                           ZSTD_dictContentType_e dictContentType)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+    if (ddict->dictSize < 8) {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+        return 0;   /* pure content mode */
+    }
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+            return 0;   /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType)
+{
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+        if (!dict) dictSize = 0;
+    } else {
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL) return NULL;
+        ddict->cMem = customMem;
+        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
+                                            dict, dictSize,
+                                            dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult)) {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+        }   }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+                                void* sBuffer, size_t sBufferSize,
+                                const void* dict, size_t dictSize,
+                                ZSTD_dictLoadMethod_e dictLoadMethod,
+                                ZSTD_dictContentType_e dictContentType)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict)
+                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
+    if (sBufferSize < neededSpace) return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy) {
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+                                              dict, dictSize,
+                                              ZSTD_dlm_byRef, dictContentType) ))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
new file mode 100644
index 000000000000..8c1a79d666f8
--- /dev/null
+++ b/lib/zstd/decompress/zstd_ddict.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>     /* ZSTD_DDict, and several public functions */
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
new file mode 100644
index 000000000000..b4d81d84479a
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -0,0 +1,2085 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
+#include "../common/zstd_internal.h"  /* blockProperties_t */
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
+
+
+
+
+/* ***********************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = xxh64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    if (!ret)
+        return NULL;
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    if (!ret->ddictPtrTable) {
+        ZSTD_customFree(ret, customMem);
+        return NULL;
+    }
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+    return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+    assert(dctx->streamStage == zdss_init);
+    dctx->format = ZSTD_f_zstd1;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    dctx->staticSize  = 0;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+    dctx->legacyContext = NULL;
+    dctx->previousLegacyVersion = 0;
+    dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
+    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
+    return dctx;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_customFree(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+    return 0;
+}
+
+/* ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+        U32 const dictID= fhd & 3;
+        U32 const singleSegment = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return minInputSize + !singleSegment
+             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+             + (singleSegment && !fcsId);
+    }
+}
+
+/* ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    if (srcSize < minInputSize) return minInputSize;
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+    if ( (format != ZSTD_f_zstd1_magicless)
+      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
+
+    {   BYTE const fhdByte = ip[minInputSize-1];
+        size_t pos = minInputSize;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U64 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+        switch(dictIDSizeCode)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+/* ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
+            return 0;
+        } else {
+            return zfh.frameContentSize;
+    }   }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32 sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/* ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/* ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/* ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result)) return result;    /* invalid header */
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+/* ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+/* ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/* ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (srcSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                               BYTE b,
+                               size_t regenSize)
+{
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (regenSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memset(dst, b, regenSize);
+    return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
+{
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE* op = ostart;
+    size_t remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong, "");
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default:
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        if (dctx->validateChecksum)
+            xxh64_update(&dctx->xxhState, op, decodedSize);
+        if (decodedSize != 0)
+            op += decodedSize;
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected, "");
+    }
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum) {
+            U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
+            U32 checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSrcSize;
+    return (size_t)(op-ostart);
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void* dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+    int moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
+
+    if (ddict) {
+        dict = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+
+        {   U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+        }   }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
+            if (ZSTD_isError(res)) return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict, size_t dictSize)
+{
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+    size_t regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/*
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+        assert(0);
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_getFrameHeaderSize:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/* ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+        }   }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
+                if (dctx->fParams.checksumFlag) {
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
+            }
+            return 0;
+        }
+
+    case ZSTDds_decompressLastBlock:
+    case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_reserved :   /* should never happen */
+            default:
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*)dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0) {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected, "");
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum:
+        assert(srcSize == 4);  /* guaranteed by dctx->expected */
+        {
+            if (dctx->validateChecksum) {
+                U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader:
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame:
+        dctx->expected = 0;
+        dctx->stage = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default:
+        assert(0);   /* impossible */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+    dctx->prefixStart = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                  const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#else
+        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
+                                                workspace, workspaceSize);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        unsigned offcodeMaxValue = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->OFTable,
+                            offcodeNCount, offcodeMaxValue,
+                            OF_base, OF_bits,
+                            offcodeLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */0);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->MLTable,
+                            matchlengthNCount, matchlengthMaxValue,
+                            ML_base, ML_bits,
+                            matchlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->LLTable,
+                            litlengthNCount, litlengthMaxValue,
+                            LL_base, LL_bits,
+                            litlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+    }   }
+
+    return (size_t)(dictPtr - (const BYTE*)dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    assert(dctx != NULL);
+    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
+    dctx->decodedSize = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->prefixStart = NULL;
+    dctx->virtualStart = NULL;
+    dctx->dictEnd = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    dctx->bType = bt_reserved;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict) {
+        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd = dictStart + dictSize;
+        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s",
+                    dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict)
+{
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+    return ZSTD_freeDCtx(zds);
+}
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+                                   const void* dict, size_t dictSize,
+                                         ZSTD_dictLoadMethod_e dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+    DEBUGLOG(4, "ZSTD_initDStream");
+    return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const min = (size_t)1 << bounds.lowerBound;
+    size_t const max = (size_t)1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+            return bounds;
+        case ZSTD_d_format:
+            bounds.lowerBound = (int)ZSTD_f_zstd1;
+            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+            return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
+            return bounds;
+        case ZSTD_d_forceIgnoreChecksum:
+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+            return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
+        default:;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) {                \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+    switch (param) {
+        case ZSTD_d_windowLogMax:
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+            return 0;
+        case ZSTD_d_format:
+            *value = (int)dctx->format;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            *value = (int)dctx->outBufferMode;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            *value = (int)dctx->forceIgnoreChecksum;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+            dctx->maxWindowSize = ((size_t)1) << value;
+            return 0;
+        case ZSTD_d_format:
+            CHECK_DBOUNDS(ZSTD_d_format, value);
+            dctx->format = (ZSTD_format_e)value;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        ZSTD_DCtx_resetParameters(dctx);
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+    return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const minRBSize = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge, "");
+    return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    const char* const src = (const char*)input->src;
+    const char* const istart = input->pos != 0 ? src + input->pos : src;
+    const char* const iend = input->size != 0 ? src + input->size : src;
+    const char* ip = istart;
+    char* const dst = (char*)output->dst;
+    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+    char* const oend = output->size != 0 ? dst + output->size : dst;
+    char* op = ostart;
+    U32 someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork) {
+        switch(zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
+                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                if (ZSTD_isError(hSize)) {
+                    return hSize;   /* error */
+                }
+                if (hSize != 0) {   /* need more input */
+                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t)(iend-ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput) {   /* not enough input to load full header */
+                        if (remainingInput > 0) {
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+                if (cSize <= (size_t)(iend-istart)) {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+            }   }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                        (U32)(zds->fParams.windowSize >>10),
+                        (U32)(zds->maxWindowSize >> 10) );
+            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+
+            /* Adapt buffer sizes to frame header instructions */
+            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
+            zds->streamStage = zdss_read;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_read:
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+                if (neededInSize==0) {  /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                size_t const toLoad = neededInSize - zds->inPos;
+                int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                size_t loadedSize;
+                /* At this point we shouldn't be decompressing a block that we can stream. */
+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+                if (isSkipFrame) {
+                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                } else {
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+                }
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
+        case zdss_flush:
+            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+                op += flushedSize;
+                zds->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {  /* flush completed */
+                    zds->streamStage = zdss_read;
+                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                (int)(zds->outBuffSize - zds->outStart),
+                                (U32)zds->fParams.blockSizeMax);
+                        zds->outStart = zds->outEnd = 0;
+                    }
+                    break;
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default:
+            assert(0);    /* impossible */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }   }
+
+    /* result */
+    input->pos = (size_t)(ip - (const char*)(input->src));
+    output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+        zds->noForwardProgress ++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+            assert(0);
+        }
+    } else {
+        zds->noForwardProgress = 0;
+    }
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint) {   /* frame fully decoded */
+            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
+                if (zds->hostageByte) {
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++;  /* release hostage */
+                }   /* zds->hostageByte */
+                return 0;
+            }  /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte=1;
+            }
+            return 1;
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
new file mode 100644
index 000000000000..2d101d9a842e
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.c
@@ -0,0 +1,1540 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/compiler.h"    /* prefetch */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
+{
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize);
+/*! ZSTD_decodeLiteralsBlock() :
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+        switch(litEncType)
+        {
+        case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+            ZSTD_FALLTHROUGH;
+
+        case set_compressed:
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                size_t hufSuccess;
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize = 3;
+                    litSize  = (lhc >> 4) & 0x3FF;
+                    litCSize = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType==set_repeat) {
+                    if (singleStream) {
+                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    } else {
+                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    }
+                } else {
+                    if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace));
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+#endif
+                    } else {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+                    }
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litSize = litSize;
+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize+1;
+            }
+        default:
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+     /* nextState, nbAddBits, nbBits, baseVal */
+     {  0,  0,  4,    0},  { 16,  0,  4,    0},
+     { 32,  0,  5,    1},  {  0,  0,  5,    3},
+     {  0,  0,  5,    4},  {  0,  0,  5,    6},
+     {  0,  0,  5,    7},  {  0,  0,  5,    9},
+     {  0,  0,  5,   10},  {  0,  0,  5,   12},
+     {  0,  0,  6,   14},  {  0,  1,  5,   16},
+     {  0,  1,  5,   20},  {  0,  1,  5,   22},
+     {  0,  2,  5,   28},  {  0,  3,  5,   32},
+     {  0,  4,  5,   48},  { 32,  6,  5,   64},
+     {  0,  7,  5,  128},  {  0,  8,  6,  256},
+     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
+     { 32,  0,  4,    0},  {  0,  0,  4,    1},
+     {  0,  0,  5,    2},  { 32,  0,  5,    4},
+     {  0,  0,  5,    5},  { 32,  0,  5,    7},
+     {  0,  0,  5,    8},  { 32,  0,  5,   10},
+     {  0,  0,  5,   11},  {  0,  0,  6,   13},
+     { 32,  1,  5,   16},  {  0,  1,  5,   18},
+     { 32,  1,  5,   22},  {  0,  2,  5,   24},
+     { 32,  3,  5,   32},  {  0,  3,  5,   40},
+     {  0,  6,  4,   64},  { 16,  6,  4,   64},
+     { 32,  7,  5,  128},  {  0,  9,  6,  512},
+     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
+     { 16,  0,  4,    1},  { 32,  0,  5,    2},
+     { 32,  0,  5,    3},  { 32,  0,  5,    5},
+     { 32,  0,  5,    6},  { 32,  0,  5,    8},
+     { 32,  0,  5,    9},  { 32,  0,  5,   11},
+     { 32,  0,  5,   12},  {  0,  0,  6,   15},
+     { 32,  1,  5,   18},  { 32,  1,  5,   20},
+     { 32,  2,  5,   24},  { 32,  2,  5,   28},
+     { 32,  3,  5,   40},  { 32,  4,  5,   48},
+     {  0, 16,  6,65536},  {  0, 15,  6,32768},
+     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
+};   /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  5,    0},     {  0,  6,  4,   61},
+    {  0,  9,  5,  509},     {  0, 15,  5,32765},
+    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
+    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
+    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
+    {  0,  5,  5,   29},     {  0,  8,  4,  253},
+    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
+    {  0,  2,  5,    1},     { 16,  7,  4,  125},
+    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
+    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
+    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
+    {  0, 19,  5,524285},    {  0,  1,  5,    1},
+    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
+    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
+    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
+    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
+};   /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  6,    3},  {  0,  0,  4,    4},
+    { 32,  0,  5,    5},  {  0,  0,  5,    6},
+    {  0,  0,  5,    8},  {  0,  0,  5,    9},
+    {  0,  0,  5,   11},  {  0,  0,  6,   13},
+    {  0,  0,  6,   16},  {  0,  0,  6,   19},
+    {  0,  0,  6,   22},  {  0,  0,  6,   25},
+    {  0,  0,  6,   28},  {  0,  0,  6,   31},
+    {  0,  0,  6,   34},  {  0,  1,  6,   37},
+    {  0,  1,  6,   41},  {  0,  2,  6,   47},
+    {  0,  3,  6,   59},  {  0,  4,  6,   83},
+    {  0,  7,  6,  131},  {  0,  9,  6,  515},
+    { 16,  0,  4,    4},  {  0,  0,  4,    5},
+    { 32,  0,  5,    6},  {  0,  0,  5,    7},
+    { 32,  0,  5,    9},  {  0,  0,  5,   10},
+    {  0,  0,  6,   12},  {  0,  0,  6,   15},
+    {  0,  0,  6,   18},  {  0,  0,  6,   21},
+    {  0,  0,  6,   24},  {  0,  0,  6,   27},
+    {  0,  0,  6,   30},  {  0,  0,  6,   33},
+    {  0,  1,  6,   35},  {  0,  1,  6,   39},
+    {  0,  2,  6,   43},  {  0,  3,  6,   51},
+    {  0,  4,  6,   67},  {  0,  5,  6,   99},
+    {  0,  8,  6,  259},  { 32,  0,  4,    4},
+    { 48,  0,  4,    4},  { 16,  0,  4,    5},
+    { 32,  0,  5,    7},  { 32,  0,  5,    8},
+    { 32,  0,  5,   10},  { 32,  0,  5,   11},
+    {  0,  0,  6,   14},  {  0,  0,  6,   17},
+    {  0,  0,  6,   20},  {  0,  0,  6,   23},
+    {  0,  0,  6,   26},  {  0,  0,  6,   29},
+    {  0,  0,  6,   32},  {  0, 16,  6,65539},
+    {  0, 15,  6,32771},  {  0, 14,  6,16387},
+    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
+    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
+};   /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+    void* ptr = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+    ZSTD_seqSymbol* const cell = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_seqSymbol* const tableDecode = dt+1;
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+
+    U16* symbolNext = (U16*)wksp;
+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+    U32 highThreshold = tableSize - 1;
+
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void)wkspSize;
+    /* Init, lay down lowprob symbols */
+    {   ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            int const n = normalizedCounter[s];
+            for (i=0; i<n; i++) {
+                tableDecode[position].baseValue = s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u=0; u<tableSize; u++) {
+            U32 const symbol = tableDecode[u].baseValue;
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].baseValue = baseValue[symbol];
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void)bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+                                 int bmi2)
+{
+    switch(type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+        {   U32 const symbol = *(const BYTE*)src;
+            U32 const baseline = baseValue[symbol];
+            U32 const nbBits = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat:
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+            const void* const pStart = *DTablePtr;
+            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed :
+        {   unsigned tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
+            *DTablePtr = DTableSpace;
+            return headerSize;
+        }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+    int nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (!nbSeq) {
+        *nbSeqPtr=0;
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+        return 1;
+    }
+    if (nbSeq > 0x7F) {
+        if (nbSeq == 0xFF) {
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip+=2;
+        } else {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+                                                      LLtype, MaxLL, LLFSELog,
+                                                      ip, iend-ip,
+                                                      LL_base, LL_bits,
+                                                      LL_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+                                                      OFtype, MaxOff, OffFSELog,
+                                                      ip, iend-ip,
+                                                      OF_base, OF_bits,
+                                                      OF_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+                                                      MLtype, MaxML, MLFSELog,
+                                                      ip, iend-ip,
+                                                      ML_base, ML_bits,
+                                                      ML_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+    const BYTE* match;
+} seq_t;
+
+typedef struct {
+    size_t state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* prefixStart;
+    const BYTE* dictEnd;
+    size_t pos;
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+    const void* ptr = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+                (U32)DStatePtr->state, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
+        : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+{
+    seq_t seq;
+    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+    U32 const llBase = llDInfo.baseValue;
+    U32 const mlBase = mlDInfo.baseValue;
+    U32 const ofBase = ofDInfo.baseValue;
+    BYTE const llBits = llDInfo.nbAdditionalBits;
+    BYTE const mlBits = mlDInfo.nbAdditionalBits;
+    BYTE const ofBits = ofDInfo.nbAdditionalBits;
+    BYTE const totalBits = llBits+mlBits+ofBits;
+
+    /* sequence */
+    {   size_t offset;
+        if (ofBits > 1) {
+            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+            } else {
+                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        } else {
+            U32 const ll0 = (llBase == 0);
+            if (LIKELY((ofBits == 0))) {
+                if (LIKELY(!ll0))
+                    offset = seqState->prevOffset[0];
+                else {
+                    offset = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset;
+                }
+            } else {
+                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset = temp;
+        }   }   }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = mlBase;
+    if (mlBits > 0)
+        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    seq.litLength = llBase;
+    if (llBits > 0)
+        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    if (prefetch == ZSTD_p_prefetch) {
+        size_t const pos = seqState->pos + seq.litLength;
+        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
+        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update
+     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+     * better option, so it is the default for other compilers. But, if you
+     * measure that it is worse, please put up a pull request.
+     */
+    {
+#if !defined(__clang__)
+        const int kUseUpdateFseState = 1;
+#else
+        const int kUseUpdateFseState = 0;
+#endif
+        if (kUseUpdateFseState) {
+            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+        } else {
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+        }
+    }
+
+    return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+#else
+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        size_t error = 0;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__x86_64__)
+        /* Align the decompression loop to 32 + 16 bytes.
+         *
+         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+         * speed swings based on the alignment of the decompression loop. This
+         * performance swing is caused by parts of the decompression loop falling
+         * out of the DSB. The entire decompression loop should fit in the DSB,
+         * when it can't we get much worse performance. You can measure if you've
+         * hit the good case or the bad case with this perf command for some
+         * compressed file test.zst:
+         *
+         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+         *
+         * If you see most cycles served out of the MITE you've hit the bad case.
+         * If you see most cycles served out of the DSB you've hit the good case.
+         * If it is pretty even then you may be in an okay case.
+         *
+         * I've been able to reproduce this issue on the following CPUs:
+         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+         *               Use Instruments->Counters to get DSB/MITE cycles.
+         *               I never got performance swings, but I was able to
+         *               go from the good case of mostly DSB to half of the
+         *               cycles served from MITE.
+         *   - Coffeelake: Intel i9-9900k
+         *
+         * I haven't been able to reproduce the instability or DSB misses on any
+         * of the following CPUS:
+         *   - Haswell
+         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+         *   - Skylake
+         *
+         * If you are seeing performance stability this script can help test.
+         * It tests on 4 commits in zstd where I saw performance change.
+         *
+         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+         */
+        __asm__(".p2align 5");
+        __asm__("nop");
+        __asm__(".p2align 4");
+#endif
+        for ( ; ; ) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            BIT_reloadDStream(&(seqState.DStream));
+            op += oneSeqSize;
+            /* gcc and clang both don't like early returns in this loop.
+             * Instead break and check for an error at the end of the loop.
+             */
+            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
+                error = oneSeqSize;
+                break;
+            }
+            if (UNLIKELY(!--nbSeq)) break;
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+        if (ZSTD_isError(error)) return error;
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.prefixStart = prefixStart;
+        seqState.pos = (size_t)(op-prefixStart);
+        seqState.dictEnd = dictEnd;
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+        }
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                   const void* seqStart, size_t seqSize, int nbSeq,
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+    const void* ptr = offTable;
+    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+    const ZSTD_seqSymbol* table = offTable + 1;
+    U32 const max = 1 << tableLog;
+    U32 u, total = 0;
+    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+    assert(max <= (1 << OffFSELog));  /* max not too large */
+    for (u=0; u<max; u++) {
+        if (table[u].nbAdditionalBits > 22) total += 1;
+    }
+
+    assert(tableLog <= OffFSELog);
+    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+
+    return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize, const int frame)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    /* isLongOffset must be true if there are long offsets.
+     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+     * We don't expect that to be the case in 64-bit mode.
+     * In block mode, window size is not known, so we have to be conservative.
+     * (note: but it could be evaluated from current-lowLimit)
+     */
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+        int nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if ( !usePrefetchDecoder
+          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+            usePrefetchDecoder = (shareLongOffsets >= minShare);
+        }
+#endif
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+    }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+{
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+        dctx->prefixStart = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
new file mode 100644
index 000000000000..e7f5f6689459
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_block.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>    /* DCtx, and some public functions */
+#include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
+#include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize, const int frame);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+             const short* normalizedCounter, unsigned maxSymbolValue,
+             const U32* baseValue, const U32* nbAdditionalBits,
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
new file mode 100644
index 000000000000..4b9052f68755
--- /dev/null
+++ b/lib/zstd/decompress/zstd_decompress_internal.h
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/mem.h"             /* BYTE, U16, U32 */
+#include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
+
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
+
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
+
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+struct ZSTD_DCtx_s
+{
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 processedCSize;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    struct xxh64_state xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+
+    /* Tracing */
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
index 9ba367b44149..0fbec508f285 100644
--- a/lib/zstd/decompress_sources.h
+++ b/lib/zstd/decompress_sources.h
@@ -16,8 +16,13 @@
  * decompression.
  */
 
-#include "entropy_common.c"
-#include "fse_decompress.c"
-#include "huf_decompress.c"
-#include "zstd_common.c"
-#include "decompress.c"
+#include "common/debug.c"
+#include "common/entropy_common.c"
+#include "common/error_private.c"
+#include "common/fse_decompress.c"
+#include "common/zstd_common.c"
+#include "decompress/huf_decompress.c"
+#include "decompress/zstd_ddict.c"
+#include "decompress/zstd_decompress.c"
+#include "decompress/zstd_decompress_block.c"
+#include "zstd_decompress_module.c"
diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c
deleted file mode 100644
index 2b0a643c32c4..000000000000
--- a/lib/zstd/entropy_common.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Common functions of New Generation Entropy library
- * Copyright (C) 2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include "error_private.h" /* ERR_*, ERROR */
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-
-/*===   Version   ===*/
-unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
-
-/*===   Error Management   ===*/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
-
-unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
-{
-	const BYTE *const istart = (const BYTE *)headerBuffer;
-	const BYTE *const iend = istart + hbSize;
-	const BYTE *ip = istart;
-	int nbBits;
-	int remaining;
-	int threshold;
-	U32 bitStream;
-	int bitCount;
-	unsigned charnum = 0;
-	int previous0 = 0;
-
-	if (hbSize < 4)
-		return ERROR(srcSize_wrong);
-	bitStream = ZSTD_readLE32(ip);
-	nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
-	if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
-		return ERROR(tableLog_tooLarge);
-	bitStream >>= 4;
-	bitCount = 4;
-	*tableLogPtr = nbBits;
-	remaining = (1 << nbBits) + 1;
-	threshold = 1 << nbBits;
-	nbBits++;
-
-	while ((remaining > 1) & (charnum <= *maxSVPtr)) {
-		if (previous0) {
-			unsigned n0 = charnum;
-			while ((bitStream & 0xFFFF) == 0xFFFF) {
-				n0 += 24;
-				if (ip < iend - 5) {
-					ip += 2;
-					bitStream = ZSTD_readLE32(ip) >> bitCount;
-				} else {
-					bitStream >>= 16;
-					bitCount += 16;
-				}
-			}
-			while ((bitStream & 3) == 3) {
-				n0 += 3;
-				bitStream >>= 2;
-				bitCount += 2;
-			}
-			n0 += bitStream & 3;
-			bitCount += 2;
-			if (n0 > *maxSVPtr)
-				return ERROR(maxSymbolValue_tooSmall);
-			while (charnum < n0)
-				normalizedCounter[charnum++] = 0;
-			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-				ip += bitCount >> 3;
-				bitCount &= 7;
-				bitStream = ZSTD_readLE32(ip) >> bitCount;
-			} else {
-				bitStream >>= 2;
-			}
-		}
-		{
-			int const max = (2 * threshold - 1) - remaining;
-			int count;
-
-			if ((bitStream & (threshold - 1)) < (U32)max) {
-				count = bitStream & (threshold - 1);
-				bitCount += nbBits - 1;
-			} else {
-				count = bitStream & (2 * threshold - 1);
-				if (count >= threshold)
-					count -= max;
-				bitCount += nbBits;
-			}
-
-			count--;				 /* extra accuracy */
-			remaining -= count < 0 ? -count : count; /* -1 means +1 */
-			normalizedCounter[charnum++] = (short)count;
-			previous0 = !count;
-			while (remaining < threshold) {
-				nbBits--;
-				threshold >>= 1;
-			}
-
-			if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-				ip += bitCount >> 3;
-				bitCount &= 7;
-			} else {
-				bitCount -= (int)(8 * (iend - 4 - ip));
-				ip = iend - 4;
-			}
-			bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
-		}
-	} /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
-	if (remaining != 1)
-		return ERROR(corruption_detected);
-	if (bitCount > 32)
-		return ERROR(corruption_detected);
-	*maxSVPtr = charnum - 1;
-
-	ip += (bitCount + 7) >> 3;
-	return ip - istart;
-}
-
-/*! HUF_readStats() :
-	Read compact Huffman tree, saved by HUF_writeCTable().
-	`huffWeight` is destination buffer.
-	`rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
-	@return : size read from `src` , or an error Code .
-	Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
-*/
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 weightTotal;
-	const BYTE *ip = (const BYTE *)src;
-	size_t iSize;
-	size_t oSize;
-
-	if (!srcSize)
-		return ERROR(srcSize_wrong);
-	iSize = ip[0];
-	/* memset(huffWeight, 0, hwSize);   */ /* is not necessary, even though some analyzer complain ... */
-
-	if (iSize >= 128) { /* special header */
-		oSize = iSize - 127;
-		iSize = ((oSize + 1) / 2);
-		if (iSize + 1 > srcSize)
-			return ERROR(srcSize_wrong);
-		if (oSize >= hwSize)
-			return ERROR(corruption_detected);
-		ip += 1;
-		{
-			U32 n;
-			for (n = 0; n < oSize; n += 2) {
-				huffWeight[n] = ip[n / 2] >> 4;
-				huffWeight[n + 1] = ip[n / 2] & 15;
-			}
-		}
-	} else {						 /* header compressed with FSE (normal case) */
-		if (iSize + 1 > srcSize)
-			return ERROR(srcSize_wrong);
-		oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
-		if (FSE_isError(oSize))
-			return oSize;
-	}
-
-	/* collect weight stats */
-	memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
-	weightTotal = 0;
-	{
-		U32 n;
-		for (n = 0; n < oSize; n++) {
-			if (huffWeight[n] >= HUF_TABLELOG_MAX)
-				return ERROR(corruption_detected);
-			rankStats[huffWeight[n]]++;
-			weightTotal += (1 << huffWeight[n]) >> 1;
-		}
-	}
-	if (weightTotal == 0)
-		return ERROR(corruption_detected);
-
-	/* get last non-null symbol weight (implied, total must be 2^n) */
-	{
-		U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-		if (tableLog > HUF_TABLELOG_MAX)
-			return ERROR(corruption_detected);
-		*tableLogPtr = tableLog;
-		/* determine last weight */
-		{
-			U32 const total = 1 << tableLog;
-			U32 const rest = total - weightTotal;
-			U32 const verif = 1 << BIT_highbit32(rest);
-			U32 const lastWeight = BIT_highbit32(rest) + 1;
-			if (verif != rest)
-				return ERROR(corruption_detected); /* last value must be a clean power of 2 */
-			huffWeight[oSize] = (BYTE)lastWeight;
-			rankStats[lastWeight]++;
-		}
-	}
-
-	/* check tree construction validity */
-	if ((rankStats[1] < 2) || (rankStats[1] & 1))
-		return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
-
-	/* results */
-	*nbSymbolsPtr = (U32)(oSize + 1);
-	return iSize + 1;
-}
diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h
deleted file mode 100644
index 1a60b31f706c..000000000000
--- a/lib/zstd/error_private.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this module is expected to remain private, do not expose it */
-
-#ifndef ERROR_H_MODULE
-#define ERROR_H_MODULE
-
-/* ****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t */
-#include <linux/zstd.h>  /* enum list */
-
-/* ****************************************
-*  Compiler-specific
-******************************************/
-#define ERR_STATIC static __attribute__((unused))
-
-/*-****************************************
-*  Customization (error_public.h)
-******************************************/
-typedef ZSTD_ErrorCode ERR_enum;
-#define PREFIX(name) ZSTD_error_##name
-
-/*-****************************************
-*  Error codes handling
-******************************************/
-#define ERROR(name) ((size_t)-PREFIX(name))
-
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
-
-ERR_STATIC ERR_enum ERR_getErrorCode(size_t code)
-{
-	if (!ERR_isError(code))
-		return (ERR_enum)0;
-	return (ERR_enum)(0 - code);
-}
-
-#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h
deleted file mode 100644
index 7460ab04b191..000000000000
--- a/lib/zstd/fse.h
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * FSE : Finite State Entropy codec
- * Public Prototypes declaration
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef FSE_H
-#define FSE_H
-
-/*-*****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t, ptrdiff_t */
-
-/*-*****************************************
-*  FSE_PUBLIC_API : control library symbols visibility
-******************************************/
-#define FSE_PUBLIC_API
-
-/*------   Version   ------*/
-#define FSE_VERSION_MAJOR 0
-#define FSE_VERSION_MINOR 9
-#define FSE_VERSION_RELEASE 0
-
-#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
-#define FSE_QUOTE(str) #str
-#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
-#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
-
-#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE)
-FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
-
-/*-*****************************************
-*  Tool functions
-******************************************/
-FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
-
-/* Error Management */
-FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
-
-/*-*****************************************
-*  FSE detailed API
-******************************************/
-/*!
-FSE_compress() does the following:
-1. count symbol occurrence from source[] into table count[]
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
-3. save normalized counters to memory buffer using writeNCount()
-4. build encoding table 'CTable' from normalized counters
-5. encode the data stream using encoding table 'CTable'
-
-FSE_decompress() does the following:
-1. read normalized counters with readNCount()
-2. build decoding table 'DTable' from normalized counters
-3. decode the data stream using decoding table 'DTable'
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and provide normalized distribution using external method.
-*/
-
-/* *** COMPRESSION *** */
-/*! FSE_optimalTableLog():
-	dynamically downsize 'tableLog' when conditions are met.
-	It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-	@return : recommended tableLog (necessarily <= 'maxTableLog') */
-FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_normalizeCount():
-	normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
-	'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
-	@return : tableLog,
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_NCountWriteBound():
-	Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
-	Typically useful for allocation purpose. */
-FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_writeNCount():
-	Compactly save 'normalizedCounter' into 'buffer'.
-	@return : size of the compressed table,
-			  or an errorCode, which can be tested using FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
-/*! Constructor and Destructor of FSE_CTable.
-	Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
-typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
-
-/*! FSE_compress_usingCTable():
-	Compress `src` using `ct` into `dst` which must be already allocated.
-	@return : size of compressed data (<= `dstCapacity`),
-			  or 0 if compressed data could not fit into `dst`,
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct);
-
-/*!
-Tutorial :
-----------
-The first step is to count all symbols. FSE_count() does this job very fast.
-Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
-'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
-maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
-FSE_count() will return the number of occurrence of the most frequent symbol.
-This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-The next step is to normalize the frequencies.
-FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
-It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
-You can use 'tableLog'==0 to mean "use default tableLog value".
-If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
-which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
-
-The result of FSE_normalizeCount() will be saved into a table,
-called 'normalizedCounter', which is a table of signed short.
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
-The return value is tableLog if everything proceeded as expected.
-It is 0 if there is a single symbol within distribution.
-If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
-'buffer' must be already allocated.
-For guaranteed success, buffer size must be at least FSE_headerBound().
-The result of the function is the number of bytes written into 'buffer'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
-
-'normalizedCounter' can then be used to create the compression table 'CTable'.
-The space required by 'CTable' must be already allocated, using FSE_createCTable().
-You can then use FSE_buildCTable() to fill 'CTable'.
-If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
-
-'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
-Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
-The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
-If it returns '0', compressed data could not fit into 'dst'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-*/
-
-/* *** DECOMPRESSION *** */
-
-/*! FSE_readNCount():
-	Read compactly saved 'normalizedCounter' from 'rBuffer'.
-	@return : size read from 'rBuffer',
-			  or an errorCode, which can be tested using FSE_isError().
-			  maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize);
-
-/*! Constructor and Destructor of FSE_DTable.
-	Note that its size depends on 'tableLog' */
-typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
-
-/*! FSE_buildDTable():
-	Builds 'dt', which must be already allocated, using FSE_createDTable().
-	return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize);
-
-/*! FSE_decompress_usingDTable():
-	Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
-	into `dst` which must be already allocated.
-	@return : size of regenerated data (necessarily <= `dstCapacity`),
-			  or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
-
-/*!
-Tutorial :
-----------
-(Note : these functions only decompress FSE-compressed blocks.
- If block is uncompressed, use memcpy() instead
- If block is a single repeated byte, use memset() instead )
-
-The first step is to obtain the normalized frequencies of symbols.
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
-or size the table to handle worst case situations (typically 256).
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
-This is performed by the function FSE_buildDTable().
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
-`cSrcSize` must be strictly correct, otherwise decompression will fail.
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
-*/
-
-/* *** Dependency *** */
-#include "bitstream.h"
-
-/* *****************************************
-*  Static allocation
-*******************************************/
-/* FSE buffer bounds */
-#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size >> 7))
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog))
-
-/* *****************************************
-*  FSE advanced API
-*******************************************/
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned
- */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace);
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` must be a table of minimum `1024` unsigned
- */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace);
-
-/*! FSE_count_simple
- * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
- * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize);
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
-/**< same as FSE_optimalTableLog(), which used `minus==2` */
-
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
-
-size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue);
-/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `(1<<tableLog)`.
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize);
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
-
-size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
-/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
-
-/* *****************************************
-*  FSE symbol compression API
-*******************************************/
-/*!
-   This API consists of small unitary functions, which highly benefit from being inlined.
-   Hence their body are included in next section.
-*/
-typedef struct {
-	ptrdiff_t value;
-	const void *stateTable;
-	const void *symbolTT;
-	unsigned stateLog;
-} FSE_CState_t;
-
-static void FSE_initCState(FSE_CState_t *CStatePtr, const FSE_CTable *ct);
-
-static void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *CStatePtr, unsigned symbol);
-
-static void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *CStatePtr);
-
-/**<
-These functions are inner components of FSE_compress_usingCTable().
-They allow the creation of custom streams, mixing multiple tables and bit sources.
-
-A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
-So the first symbol you will encode is the last you will decode, like a LIFO stack.
-
-You will need a few variables to track your CStream. They are :
-
-FSE_CTable    ct;         // Provided by FSE_buildCTable()
-BIT_CStream_t bitStream;  // bitStream tracking structure
-FSE_CState_t  state;      // State tracking structure (can have several)
-
-
-The first thing to do is to init bitStream and state.
-	size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
-	FSE_initCState(&state, ct);
-
-Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
-You can then encode your input data, byte after byte.
-FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
-Remember decoding will be done in reverse direction.
-	FSE_encodeByte(&bitStream, &state, symbol);
-
-At any time, you can also add any bit sequence.
-Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
-	BIT_addBits(&bitStream, bitField, nbBits);
-
-The above methods don't commit data to memory, they just store it into local register, for speed.
-Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-Writing data to memory is a manual operation, performed by the flushBits function.
-	BIT_flushBits(&bitStream);
-
-Your last FSE encoding operation shall be to flush your last state value(s).
-	FSE_flushState(&bitStream, &state);
-
-Finally, you must close the bitStream.
-The function returns the size of CStream in bytes.
-If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
-If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
-	size_t size = BIT_closeCStream(&bitStream);
-*/
-
-/* *****************************************
-*  FSE symbol decompression API
-*******************************************/
-typedef struct {
-	size_t state;
-	const void *table; /* precise table may vary, depending on U16 */
-} FSE_DState_t;
-
-static void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt);
-
-static unsigned char FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-
-static unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr);
-
-/**<
-Let's now decompose FSE_decompress_usingDTable() into its unitary components.
-You will decode FSE-encoded symbols from the bitStream,
-and also any other bitFields you put in, **in reverse order**.
-
-You will need a few variables to track your bitStream. They are :
-
-BIT_DStream_t DStream;    // Stream context
-FSE_DState_t  DState;     // State context. Multiple ones are possible
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
-
-The first thing to do is to init the bitStream.
-	errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
-
-You should then retrieve your initial state(s)
-(in reverse flushing order if you have several ones) :
-	errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
-
-You can then decode your data, symbol after symbol.
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
-	unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
-
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
-Note : maximum allowed nbBits is 25, for 32-bits compatibility
-	size_t bitField = BIT_readBits(&DStream, nbBits);
-
-All above operations only read from local register (which size depends on size_t).
-Refueling the register from memory is manually performed by the reload method.
-	endSignal = FSE_reloadDStream(&DStream);
-
-BIT_reloadDStream() result tells if there is still some more data to read from DStream.
-BIT_DStream_unfinished : there is still some data left into the DStream.
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
-
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
-to properly detect the exact end of stream.
-After each decoded symbol, check if DStream is fully consumed using this simple test :
-	BIT_reloadDStream(&DStream) >= BIT_DStream_completed
-
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
-Checking if DStream has reached its end is performed by :
-	BIT_endOfDStream(&DStream);
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
-	FSE_endOfDState(&DState);
-*/
-
-/* *****************************************
-*  FSE unsafe API
-*******************************************/
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
-
-/* *****************************************
-*  Implementation of inlined functions
-*******************************************/
-typedef struct {
-	int deltaFindState;
-	U32 deltaNbBits;
-} FSE_symbolCompressionTransform; /* total 8 bytes */
-
-ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct)
-{
-	const void *ptr = ct;
-	const U16 *u16ptr = (const U16 *)ptr;
-	const U32 tableLog = ZSTD_read16(ptr);
-	statePtr->value = (ptrdiff_t)1 << tableLog;
-	statePtr->stateTable = u16ptr + 2;
-	statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1));
-	statePtr->stateLog = tableLog;
-}
-
-/*! FSE_initCState2() :
-*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
-*   uses the smallest state value possible, saving the cost of this symbol */
-ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol)
-{
-	FSE_initCState(statePtr, ct);
-	{
-		const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-		const U16 *stateTable = (const U16 *)(statePtr->stateTable);
-		U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16);
-		statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
-		statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-	}
-}
-
-ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol)
-{
-	const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-	const U16 *const stateTable = (const U16 *)(statePtr->stateTable);
-	U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-	BIT_addBits(bitC, statePtr->value, nbBitsOut);
-	statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-}
-
-ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr)
-{
-	BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
-	BIT_flushBits(bitC);
-}
-
-/* ======    Decompression    ====== */
-
-typedef struct {
-	U16 tableLog;
-	U16 fastMode;
-} FSE_DTableHeader; /* sizeof U32 */
-
-typedef struct {
-	unsigned short newState;
-	unsigned char symbol;
-	unsigned char nbBits;
-} FSE_decode_t; /* size == U32 */
-
-ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt)
-{
-	const void *ptr = dt;
-	const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr;
-	DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
-	BIT_reloadDStream(bitD);
-	DStatePtr->table = dt + 1;
-}
-
-ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	return DInfo.symbol;
-}
-
-ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	size_t const lowBits = BIT_readBits(bitD, nbBits);
-	DStatePtr->state = DInfo.newState + lowBits;
-}
-
-ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	BYTE const symbol = DInfo.symbol;
-	size_t const lowBits = BIT_readBits(bitD, nbBits);
-
-	DStatePtr->state = DInfo.newState + lowBits;
-	return symbol;
-}
-
-/*! FSE_decodeSymbolFast() :
-	unsafe, only works if no symbol has a probability > 50% */
-ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-	FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-	U32 const nbBits = DInfo.nbBits;
-	BYTE const symbol = DInfo.symbol;
-	size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
-
-	DStatePtr->state = DInfo.newState + lowBits;
-	return symbol;
-}
-
-ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; }
-
-/* **************************************************************
-*  Tuning parameters
-****************************************************************/
-/*!MEMORY_USAGE :
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-*  Increasing memory usage improves compression ratio
-*  Reduced memory usage can improve speed, due to cache effect
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#ifndef FSE_MAX_MEMORY_USAGE
-#define FSE_MAX_MEMORY_USAGE 14
-#endif
-#ifndef FSE_DEFAULT_MEMORY_USAGE
-#define FSE_DEFAULT_MEMORY_USAGE 13
-#endif
-
-/*!FSE_MAX_SYMBOL_VALUE :
-*  Maximum symbol value authorized.
-*  Required for proper stack allocation */
-#ifndef FSE_MAX_SYMBOL_VALUE
-#define FSE_MAX_SYMBOL_VALUE 255
-#endif
-
-/* **************************************************************
-*  template functions type & suffix
-****************************************************************/
-#define FSE_FUNCTION_TYPE BYTE
-#define FSE_FUNCTION_EXTENSION
-#define FSE_DECODE_TYPE FSE_decode_t
-
-/* ***************************************************************
-*  Constants
-*****************************************************************/
-#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2)
-#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG)
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1)
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2)
-#define FSE_MIN_TABLELOG 5
-
-#define FSE_TABLELOG_ABSOLUTE_MAX 15
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
-#endif
-
-#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3)
-
-#endif /* FSE_H */
diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c
deleted file mode 100644
index ef3d1741d532..000000000000
--- a/lib/zstd/fse_compress.c
+++ /dev/null
@@ -1,795 +0,0 @@
-/*
- * FSE : Finite State Entropy encoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
- * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-	U32 const tableSize = 1 << tableLog;
-	U32 const tableMask = tableSize - 1;
-	void *const ptr = ct;
-	U16 *const tableU16 = ((U16 *)ptr) + 2;
-	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
-	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-	U32 const step = FSE_TABLESTEP(tableSize);
-	U32 highThreshold = tableSize - 1;
-
-	U32 *cumul;
-	FSE_FUNCTION_TYPE *tableSymbol;
-	size_t spaceUsed32 = 0;
-
-	cumul = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
-	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* CTable header */
-	tableU16[-2] = (U16)tableLog;
-	tableU16[-1] = (U16)maxSymbolValue;
-
-	/* For explanations on how to distribute symbol values over the table :
-	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
-
-	/* symbol start positions */
-	{
-		U32 u;
-		cumul[0] = 0;
-		for (u = 1; u <= maxSymbolValue + 1; u++) {
-			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
-				cumul[u] = cumul[u - 1] + 1;
-				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
-			} else {
-				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
-			}
-		}
-		cumul[maxSymbolValue + 1] = tableSize + 1;
-	}
-
-	/* Spread symbols */
-	{
-		U32 position = 0;
-		U32 symbol;
-		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
-			int nbOccurences;
-			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
-				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
-				position = (position + step) & tableMask;
-				while (position > highThreshold)
-					position = (position + step) & tableMask; /* Low proba area */
-			}
-		}
-
-		if (position != 0)
-			return ERROR(GENERIC); /* Must have gone through all positions */
-	}
-
-	/* Build table */
-	{
-		U32 u;
-		for (u = 0; u < tableSize; u++) {
-			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
-			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
-		}
-	}
-
-	/* Build Symbol Transformation Table */
-	{
-		unsigned total = 0;
-		unsigned s;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			switch (normalizedCounter[s]) {
-			case 0: break;
-
-			case -1:
-			case 1:
-				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
-				symbolTT[s].deltaFindState = total - 1;
-				total++;
-				break;
-			default: {
-				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
-				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
-				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-				symbolTT[s].deltaFindState = total - normalizedCounter[s];
-				total += normalizedCounter[s];
-			}
-			}
-		}
-	}
-
-	return 0;
-}
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
-{
-	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
-	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
-}
-
-static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
-				      unsigned writeIsSafe)
-{
-	BYTE *const ostart = (BYTE *)header;
-	BYTE *out = ostart;
-	BYTE *const oend = ostart + headerBufferSize;
-	int nbBits;
-	const int tableSize = 1 << tableLog;
-	int remaining;
-	int threshold;
-	U32 bitStream;
-	int bitCount;
-	unsigned charnum = 0;
-	int previous0 = 0;
-
-	bitStream = 0;
-	bitCount = 0;
-	/* Table Size */
-	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
-	bitCount += 4;
-
-	/* Init */
-	remaining = tableSize + 1; /* +1 for extra accuracy */
-	threshold = tableSize;
-	nbBits = tableLog + 1;
-
-	while (remaining > 1) { /* stops at 1 */
-		if (previous0) {
-			unsigned start = charnum;
-			while (!normalizedCounter[charnum])
-				charnum++;
-			while (charnum >= start + 24) {
-				start += 24;
-				bitStream += 0xFFFFU << bitCount;
-				if ((!writeIsSafe) && (out > oend - 2))
-					return ERROR(dstSize_tooSmall); /* Buffer overflow */
-				out[0] = (BYTE)bitStream;
-				out[1] = (BYTE)(bitStream >> 8);
-				out += 2;
-				bitStream >>= 16;
-			}
-			while (charnum >= start + 3) {
-				start += 3;
-				bitStream += 3 << bitCount;
-				bitCount += 2;
-			}
-			bitStream += (charnum - start) << bitCount;
-			bitCount += 2;
-			if (bitCount > 16) {
-				if ((!writeIsSafe) && (out > oend - 2))
-					return ERROR(dstSize_tooSmall); /* Buffer overflow */
-				out[0] = (BYTE)bitStream;
-				out[1] = (BYTE)(bitStream >> 8);
-				out += 2;
-				bitStream >>= 16;
-				bitCount -= 16;
-			}
-		}
-		{
-			int count = normalizedCounter[charnum++];
-			int const max = (2 * threshold - 1) - remaining;
-			remaining -= count < 0 ? -count : count;
-			count++; /* +1 for extra accuracy */
-			if (count >= threshold)
-				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-			bitStream += count << bitCount;
-			bitCount += nbBits;
-			bitCount -= (count < max);
-			previous0 = (count == 1);
-			if (remaining < 1)
-				return ERROR(GENERIC);
-			while (remaining < threshold)
-				nbBits--, threshold >>= 1;
-		}
-		if (bitCount > 16) {
-			if ((!writeIsSafe) && (out > oend - 2))
-				return ERROR(dstSize_tooSmall); /* Buffer overflow */
-			out[0] = (BYTE)bitStream;
-			out[1] = (BYTE)(bitStream >> 8);
-			out += 2;
-			bitStream >>= 16;
-			bitCount -= 16;
-		}
-	}
-
-	/* flush remaining bitStream */
-	if ((!writeIsSafe) && (out > oend - 2))
-		return ERROR(dstSize_tooSmall); /* Buffer overflow */
-	out[0] = (BYTE)bitStream;
-	out[1] = (BYTE)(bitStream >> 8);
-	out += (bitCount + 7) / 8;
-
-	if (charnum > maxSymbolValue + 1)
-		return ERROR(GENERIC);
-
-	return (out - ostart);
-}
-
-size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge); /* Unsupported */
-	if (tableLog < FSE_MIN_TABLELOG)
-		return ERROR(GENERIC); /* Unsupported */
-
-	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
-		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
-
-	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
-}
-
-/*-**************************************************************
-*  Counting histogram
-****************************************************************/
-/*! FSE_count_simple
-	This function counts byte values within `src`, and store the histogram into table `count`.
-	It doesn't use any additional memory.
-	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
-	For this reason, prefer using a table `count` with 256 elements.
-	@return : count of most numerous element
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
-{
-	const BYTE *ip = (const BYTE *)src;
-	const BYTE *const end = ip + srcSize;
-	unsigned maxSymbolValue = *maxSymbolValuePtr;
-	unsigned max = 0;
-
-	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
-	if (srcSize == 0) {
-		*maxSymbolValuePtr = 0;
-		return 0;
-	}
-
-	while (ip < end)
-		count[*ip++]++;
-
-	while (!count[maxSymbolValue])
-		maxSymbolValue--;
-	*maxSymbolValuePtr = maxSymbolValue;
-
-	{
-		U32 s;
-		for (s = 0; s <= maxSymbolValue; s++)
-			if (count[s] > max)
-				max = count[s];
-	}
-
-	return (size_t)max;
-}
-
-/* FSE_count_parallel_wksp() :
- * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
-static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
-				      unsigned *const workSpace)
-{
-	const BYTE *ip = (const BYTE *)source;
-	const BYTE *const iend = ip + sourceSize;
-	unsigned maxSymbolValue = *maxSymbolValuePtr;
-	unsigned max = 0;
-	U32 *const Counting1 = workSpace;
-	U32 *const Counting2 = Counting1 + 256;
-	U32 *const Counting3 = Counting2 + 256;
-	U32 *const Counting4 = Counting3 + 256;
-
-	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
-
-	/* safety checks */
-	if (!sourceSize) {
-		memset(count, 0, maxSymbolValue + 1);
-		*maxSymbolValuePtr = 0;
-		return 0;
-	}
-	if (!maxSymbolValue)
-		maxSymbolValue = 255; /* 0 == default */
-
-	/* by stripes of 16 bytes */
-	{
-		U32 cached = ZSTD_read32(ip);
-		ip += 4;
-		while (ip < iend - 15) {
-			U32 c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-			c = cached;
-			cached = ZSTD_read32(ip);
-			ip += 4;
-			Counting1[(BYTE)c]++;
-			Counting2[(BYTE)(c >> 8)]++;
-			Counting3[(BYTE)(c >> 16)]++;
-			Counting4[c >> 24]++;
-		}
-		ip -= 4;
-	}
-
-	/* finish last symbols */
-	while (ip < iend)
-		Counting1[*ip++]++;
-
-	if (checkMax) { /* verify stats will fit into destination table */
-		U32 s;
-		for (s = 255; s > maxSymbolValue; s--) {
-			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
-			if (Counting1[s])
-				return ERROR(maxSymbolValue_tooSmall);
-		}
-	}
-
-	{
-		U32 s;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-			if (count[s] > max)
-				max = count[s];
-		}
-	}
-
-	while (!count[maxSymbolValue])
-		maxSymbolValue--;
-	*maxSymbolValuePtr = maxSymbolValue;
-	return (size_t)max;
-}
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-	if (sourceSize < 1500)
-		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
-}
-
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-	if (*maxSymbolValuePtr < 255)
-		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
-	*maxSymbolValuePtr = 255;
-	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
-}
-
-/*-**************************************************************
-*  FSE Compression Code
-****************************************************************/
-/*! FSE_sizeof_CTable() :
-	FSE_CTable is a variable size structure which contains :
-	`U16 tableLog;`
-	`U16 maxSymbolValue;`
-	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
-	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
-Allocation is manual (C standard does not support variable-size structures).
-*/
-size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
-{
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge);
-	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
-}
-
-/* provides the minimum logSize to safely represent a distribution */
-static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
-{
-	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
-	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-	return minBits;
-}
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
-{
-	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
-	U32 tableLog = maxTableLog;
-	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
-	if (tableLog == 0)
-		tableLog = FSE_DEFAULT_TABLELOG;
-	if (maxBitsSrc < tableLog)
-		tableLog = maxBitsSrc; /* Accuracy can be reduced */
-	if (minBits > tableLog)
-		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
-	if (tableLog < FSE_MIN_TABLELOG)
-		tableLog = FSE_MIN_TABLELOG;
-	if (tableLog > FSE_MAX_TABLELOG)
-		tableLog = FSE_MAX_TABLELOG;
-	return tableLog;
-}
-
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
-}
-
-/* Secondary normalization method.
-   To be used when primary method fails. */
-
-static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
-{
-	short const NOT_YET_ASSIGNED = -2;
-	U32 s;
-	U32 distributed = 0;
-	U32 ToDistribute;
-
-	/* Init */
-	U32 const lowThreshold = (U32)(total >> tableLog);
-	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
-
-	for (s = 0; s <= maxSymbolValue; s++) {
-		if (count[s] == 0) {
-			norm[s] = 0;
-			continue;
-		}
-		if (count[s] <= lowThreshold) {
-			norm[s] = -1;
-			distributed++;
-			total -= count[s];
-			continue;
-		}
-		if (count[s] <= lowOne) {
-			norm[s] = 1;
-			distributed++;
-			total -= count[s];
-			continue;
-		}
-
-		norm[s] = NOT_YET_ASSIGNED;
-	}
-	ToDistribute = (1 << tableLog) - distributed;
-
-	if ((total / ToDistribute) > lowOne) {
-		/* risk of rounding to zero */
-		lowOne = (U32)((total * 3) / (ToDistribute * 2));
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
-				norm[s] = 1;
-				distributed++;
-				total -= count[s];
-				continue;
-			}
-		}
-		ToDistribute = (1 << tableLog) - distributed;
-	}
-
-	if (distributed == maxSymbolValue + 1) {
-		/* all values are pretty poor;
-		   probably incompressible data (should have already been detected);
-		   find max, then give all remaining points to max */
-		U32 maxV = 0, maxC = 0;
-		for (s = 0; s <= maxSymbolValue; s++)
-			if (count[s] > maxC)
-				maxV = s, maxC = count[s];
-		norm[maxV] += (short)ToDistribute;
-		return 0;
-	}
-
-	if (total == 0) {
-		/* all of the symbols were low enough for the lowOne or lowThreshold */
-		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
-			if (norm[s] > 0)
-				ToDistribute--, norm[s]++;
-		return 0;
-	}
-
-	{
-		U64 const vStepLog = 62 - tableLog;
-		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
-		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
-		U64 tmpTotal = mid;
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if (norm[s] == NOT_YET_ASSIGNED) {
-				U64 const end = tmpTotal + (count[s] * rStep);
-				U32 const sStart = (U32)(tmpTotal >> vStepLog);
-				U32 const sEnd = (U32)(end >> vStepLog);
-				U32 const weight = sEnd - sStart;
-				if (weight < 1)
-					return ERROR(GENERIC);
-				norm[s] = (short)weight;
-				tmpTotal = end;
-			}
-		}
-	}
-
-	return 0;
-}
-
-size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
-{
-	/* Sanity checks */
-	if (tableLog == 0)
-		tableLog = FSE_DEFAULT_TABLELOG;
-	if (tableLog < FSE_MIN_TABLELOG)
-		return ERROR(GENERIC); /* Unsupported size */
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge); /* Unsupported size */
-	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
-		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
-
-	{
-		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
-		U64 const scale = 62 - tableLog;
-		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
-		U64 const vStep = 1ULL << (scale - 20);
-		int stillToDistribute = 1 << tableLog;
-		unsigned s;
-		unsigned largest = 0;
-		short largestP = 0;
-		U32 lowThreshold = (U32)(total >> tableLog);
-
-		for (s = 0; s <= maxSymbolValue; s++) {
-			if (count[s] == total)
-				return 0; /* rle special case */
-			if (count[s] == 0) {
-				normalizedCounter[s] = 0;
-				continue;
-			}
-			if (count[s] <= lowThreshold) {
-				normalizedCounter[s] = -1;
-				stillToDistribute--;
-			} else {
-				short proba = (short)((count[s] * step) >> scale);
-				if (proba < 8) {
-					U64 restToBeat = vStep * rtbTable[proba];
-					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
-				}
-				if (proba > largestP)
-					largestP = proba, largest = s;
-				normalizedCounter[s] = proba;
-				stillToDistribute -= proba;
-			}
-		}
-		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
-			/* corner case, need another normalization method */
-			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
-			if (FSE_isError(errorCode))
-				return errorCode;
-		} else
-			normalizedCounter[largest] += (short)stillToDistribute;
-	}
-
-	return tableLog;
-}
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
-{
-	const unsigned tableSize = 1 << nbBits;
-	const unsigned tableMask = tableSize - 1;
-	const unsigned maxSymbolValue = tableMask;
-	void *const ptr = ct;
-	U16 *const tableU16 = ((U16 *)ptr) + 2;
-	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
-	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-	unsigned s;
-
-	/* Sanity checks */
-	if (nbBits < 1)
-		return ERROR(GENERIC); /* min size */
-
-	/* header */
-	tableU16[-2] = (U16)nbBits;
-	tableU16[-1] = (U16)maxSymbolValue;
-
-	/* Build table */
-	for (s = 0; s < tableSize; s++)
-		tableU16[s] = (U16)(tableSize + s);
-
-	/* Build Symbol Transformation Table */
-	{
-		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-		for (s = 0; s <= maxSymbolValue; s++) {
-			symbolTT[s].deltaNbBits = deltaNbBits;
-			symbolTT[s].deltaFindState = s - 1;
-		}
-	}
-
-	return 0;
-}
-
-/* fake FSE_CTable, for rle input (always same symbol) */
-size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
-{
-	void *ptr = ct;
-	U16 *tableU16 = ((U16 *)ptr) + 2;
-	void *FSCTptr = (U32 *)ptr + 2;
-	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
-
-	/* header */
-	tableU16[-2] = (U16)0;
-	tableU16[-1] = (U16)symbolValue;
-
-	/* Build table */
-	tableU16[0] = 0;
-	tableU16[1] = 0; /* just in case */
-
-	/* Build Symbol Transformation Table */
-	symbolTT[symbolValue].deltaNbBits = 0;
-	symbolTT[symbolValue].deltaFindState = 0;
-
-	return 0;
-}
-
-static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
-{
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *ip = iend;
-
-	BIT_CStream_t bitC;
-	FSE_CState_t CState1, CState2;
-
-	/* init */
-	if (srcSize <= 2)
-		return 0;
-	{
-		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
-		if (FSE_isError(initError))
-			return 0; /* not enough space available to write a bitstream */
-	}
-
-#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
-
-	if (srcSize & 1) {
-		FSE_initCState2(&CState1, ct, *--ip);
-		FSE_initCState2(&CState2, ct, *--ip);
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		FSE_FLUSHBITS(&bitC);
-	} else {
-		FSE_initCState2(&CState2, ct, *--ip);
-		FSE_initCState2(&CState1, ct, *--ip);
-	}
-
-	/* join to mod 4 */
-	srcSize -= 2;
-	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
-		FSE_encodeSymbol(&bitC, &CState2, *--ip);
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		FSE_FLUSHBITS(&bitC);
-	}
-
-	/* 2 or 4 encoding per loop */
-	while (ip > istart) {
-
-		FSE_encodeSymbol(&bitC, &CState2, *--ip);
-
-		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
-			FSE_FLUSHBITS(&bitC);
-
-		FSE_encodeSymbol(&bitC, &CState1, *--ip);
-
-		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
-			FSE_encodeSymbol(&bitC, &CState2, *--ip);
-			FSE_encodeSymbol(&bitC, &CState1, *--ip);
-		}
-
-		FSE_FLUSHBITS(&bitC);
-	}
-
-	FSE_flushCState(&bitC, &CState2);
-	FSE_flushCState(&bitC, &CState1);
-	return BIT_closeCStream(&bitC);
-}
-
-size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
-{
-	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
-
-	if (fast)
-		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
-	else
-		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
-}
-
-size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
deleted file mode 100644
index 0b353530fb3f..000000000000
--- a/lib/zstd/fse_decompress.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * FSE : Finite State Entropy decoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include "zstd_internal.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_isError ERR_isError
-#define FSE_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-	void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
-	FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr);
-	U16 *symbolNext = (U16 *)workspace;
-
-	U32 const maxSV1 = maxSymbolValue + 1;
-	U32 const tableSize = 1 << tableLog;
-	U32 highThreshold = tableSize - 1;
-
-	/* Sanity Checks */
-	if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1))
-		return ERROR(tableLog_tooLarge);
-	if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE)
-		return ERROR(maxSymbolValue_tooLarge);
-	if (tableLog > FSE_MAX_TABLELOG)
-		return ERROR(tableLog_tooLarge);
-
-	/* Init, lay down lowprob symbols */
-	{
-		FSE_DTableHeader DTableH;
-		DTableH.tableLog = (U16)tableLog;
-		DTableH.fastMode = 1;
-		{
-			S16 const largeLimit = (S16)(1 << (tableLog - 1));
-			U32 s;
-			for (s = 0; s < maxSV1; s++) {
-				if (normalizedCounter[s] == -1) {
-					tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
-					symbolNext[s] = 1;
-				} else {
-					if (normalizedCounter[s] >= largeLimit)
-						DTableH.fastMode = 0;
-					symbolNext[s] = normalizedCounter[s];
-				}
-			}
-		}
-		memcpy(dt, &DTableH, sizeof(DTableH));
-	}
-
-	/* Spread symbols */
-	{
-		U32 const tableMask = tableSize - 1;
-		U32 const step = FSE_TABLESTEP(tableSize);
-		U32 s, position = 0;
-		for (s = 0; s < maxSV1; s++) {
-			int i;
-			for (i = 0; i < normalizedCounter[s]; i++) {
-				tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-				position = (position + step) & tableMask;
-				while (position > highThreshold)
-					position = (position + step) & tableMask; /* lowprob area */
-			}
-		}
-		if (position != 0)
-			return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-	}
-
-	/* Build Decoding table */
-	{
-		U32 u;
-		for (u = 0; u < tableSize; u++) {
-			FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-			U16 nextState = symbolNext[symbol]++;
-			tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState));
-			tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize);
-		}
-	}
-
-	return 0;
-}
-
-/*-*******************************************************
-*  Decompression (Byte symbols)
-*********************************************************/
-size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue)
-{
-	void *ptr = dt;
-	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-	void *dPtr = dt + 1;
-	FSE_decode_t *const cell = (FSE_decode_t *)dPtr;
-
-	DTableH->tableLog = 0;
-	DTableH->fastMode = 0;
-
-	cell->newState = 0;
-	cell->symbol = symbolValue;
-	cell->nbBits = 0;
-
-	return 0;
-}
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits)
-{
-	void *ptr = dt;
-	FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-	void *dPtr = dt + 1;
-	FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr;
-	const unsigned tableSize = 1 << nbBits;
-	const unsigned tableMask = tableSize - 1;
-	const unsigned maxSV1 = tableMask + 1;
-	unsigned s;
-
-	/* Sanity checks */
-	if (nbBits < 1)
-		return ERROR(GENERIC); /* min size */
-
-	/* Build Decoding Table */
-	DTableH->tableLog = (U16)nbBits;
-	DTableH->fastMode = 1;
-	for (s = 0; s < maxSV1; s++) {
-		dinfo[s].newState = 0;
-		dinfo[s].symbol = (BYTE)s;
-		dinfo[s].nbBits = (BYTE)nbBits;
-	}
-
-	return 0;
-}
-
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt,
-						       const unsigned fast)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	BYTE *const omax = op + maxDstSize;
-	BYTE *const olimit = omax - 3;
-
-	BIT_DStream_t bitD;
-	FSE_DState_t state1;
-	FSE_DState_t state2;
-
-	/* Init */
-	CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
-
-	FSE_initDState(&state1, &bitD, dt);
-	FSE_initDState(&state2, &bitD, dt);
-
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
-
-	/* 4 symbols per loop */
-	for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) {
-		op[0] = FSE_GETSYMBOL(&state1);
-
-		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-			BIT_reloadDStream(&bitD);
-
-		op[1] = FSE_GETSYMBOL(&state2);
-
-		if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-		{
-			if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) {
-				op += 2;
-				break;
-			}
-		}
-
-		op[2] = FSE_GETSYMBOL(&state1);
-
-		if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-			BIT_reloadDStream(&bitD);
-
-		op[3] = FSE_GETSYMBOL(&state2);
-	}
-
-	/* tail */
-	/* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
-	while (1) {
-		if (op > (omax - 2))
-			return ERROR(dstSize_tooSmall);
-		*op++ = FSE_GETSYMBOL(&state1);
-		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-			*op++ = FSE_GETSYMBOL(&state2);
-			break;
-		}
-
-		if (op > (omax - 2))
-			return ERROR(dstSize_tooSmall);
-		*op++ = FSE_GETSYMBOL(&state2);
-		if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-			*op++ = FSE_GETSYMBOL(&state1);
-			break;
-		}
-	}
-
-	return op - ostart;
-}
-
-size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt)
-{
-	const void *ptr = dt;
-	const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr;
-	const U32 fastMode = DTableH->fastMode;
-
-	/* select fast mode (static) */
-	if (fastMode)
-		return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
-	return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
-}
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize)
-{
-	const BYTE *const istart = (const BYTE *)cSrc;
-	const BYTE *ip = istart;
-	unsigned tableLog;
-	unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-	size_t NCountLength;
-
-	FSE_DTable *dt;
-	short *counting;
-	size_t spaceUsed32 = 0;
-
-	FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32));
-
-	dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog);
-	counting = (short *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* normal FSE decoding mode */
-	NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-	if (FSE_isError(NCountLength))
-		return NCountLength;
-	// if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining
-	// case : NCountLength==cSrcSize */
-	if (tableLog > maxLog)
-		return ERROR(tableLog_tooLarge);
-	ip += NCountLength;
-	cSrcSize -= NCountLength;
-
-	CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize));
-
-	return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */
-}
diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h
deleted file mode 100644
index 923218d12e28..000000000000
--- a/lib/zstd/huf.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Huffman coder, part of New Generation Entropy library
- * header file
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef HUF_H_298734234
-#define HUF_H_298734234
-
-/* *** Dependencies *** */
-#include <linux/types.h> /* size_t */
-
-/* ***   Tool functions *** */
-#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
-size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
-
-/* Error Management */
-unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
-
-/* ***   Advanced function   *** */
-
-/** HUF_compress4X_wksp() :
-*   Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/* *** Dependencies *** */
-#include "mem.h" /* U32 */
-
-/* *** Constants *** */
-#define HUF_TABLELOG_MAX 12     /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
-#define HUF_SYMBOLVALUE_MAX 255
-
-#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
-#error "HUF_TABLELOG_MAX is too large !"
-#endif
-
-/* ****************************************
-*  Static allocation
-******************************************/
-/* HUF buffer bounds */
-#define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8)			 /* only true if incompressible pre-filtered with fast heuristic */
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* static allocation of HUF's Compression Table */
-#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-	U32 name##hb[maxSymbolValue + 1];              \
-	void *name##hv = &(name##hb);                  \
-	HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */
-
-/* static allocation of HUF's DTable */
-typedef U32 HUF_DTable;
-#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog)))
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)}
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)}
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10)
-#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10)
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* ****************************************
-*  Advanced decompression functions
-******************************************/
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				size_t workspaceSize);							       /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< double-symbols decoder */
-
-/* ****************************************
-*  HUF detailed API
-******************************************/
-/*!
-HUF_compress() does the following:
-1. count symbol occurrence from source[] into table count[] using FSE_count()
-2. (optional) refine tableLog using HUF_optimalTableLog()
-3. build Huffman table from count using HUF_buildCTable()
-4. save Huffman table to memory buffer using HUF_writeCTable_wksp()
-5. encode the data stream using HUF_compress4X_usingCTable()
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and regenerate 'CTable' using external methods.
-*/
-/* FSE_count() : find it within "fse.h" */
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize);
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-
-typedef enum {
-	HUF_repeat_none,  /**< Cannot use the previous table */
-	HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1,
-			     4}X_repeat */
-	HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
-} HUF_repeat;
-/** HUF_compress4X_repeat() :
-*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize);
-
-/*! HUF_readStats() :
-	Read compact Huffman tree, saved by HUF_writeCTable().
-	`huffWeight` is destination buffer.
-	@return : size read from `src` , or an error Code .
-	Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize,
-			  void *workspace, size_t workspaceSize);
-
-/** HUF_readCTable() :
-*   Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-/*
-HUF_decompress() does the following:
-1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
-2. build Huffman table from save, using HUF_readDTableXn()
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
-*/
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize);
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-/* single stream variants */
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			   size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-/** HUF_compress1X_repeat() :
-*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-			     int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize);
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-				   size_t workspaceSize); /**< double-symbols decoder */
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize,
-				    const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-#endif /* HUF_H_298734234 */
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
deleted file mode 100644
index fd32838c185f..000000000000
--- a/lib/zstd/huf_compress.c
+++ /dev/null
@@ -1,773 +0,0 @@
-/*
- * Huffman encoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h" /* header compression */
-#include "huf.h"
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-#define CHECK_V_F(e, f)     \
-	size_t const e = f; \
-	if (ERR_isError(e)) \
-	return f
-#define CHECK_F(f)                        \
-	{                                 \
-		CHECK_V_F(_var_err__, f); \
-	}
-
-/* **************************************************************
-*  Utils
-****************************************************************/
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-}
-
-/* *******************************************************
-*  HUF : Huffman block compression
-*********************************************************/
-/* HUF_compressWeights() :
- * Same as FSE_compress(), but dedicated to huff0's weights compression.
- * The use case needs much less stack memory.
- * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
- */
-#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *op = ostart;
-	BYTE *const oend = ostart + dstSize;
-
-	U32 maxSymbolValue = HUF_TABLELOG_MAX;
-	U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
-
-	FSE_CTable *CTable;
-	U32 *count;
-	S16 *norm;
-	size_t spaceUsed32 = 0;
-
-	HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
-
-	CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
-	count = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 1;
-	norm = (S16 *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* init conditions */
-	if (wtSize <= 1)
-		return 0; /* Not compressible */
-
-	/* Scan input and build symbol stats */
-	{
-		CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
-		if (maxCount == wtSize)
-			return 1; /* only a single symbol in src : rle */
-		if (maxCount == 1)
-			return 0; /* each symbol present maximum once => not compressible */
-	}
-
-	tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-	CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
-
-	/* Write table description header */
-	{
-		CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
-		op += hSize;
-	}
-
-	/* Compress */
-	CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
-	{
-		CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
-		if (cSize == 0)
-			return 0; /* not enough space for compressed data */
-		op += cSize;
-	}
-
-	return op - ostart;
-}
-
-struct HUF_CElt_s {
-	U16 val;
-	BYTE nbBits;
-}; /* typedef'd to HUF_CElt within "huf.h" */
-
-/*! HUF_writeCTable_wksp() :
-	`CTable` : Huffman tree to save, using huf representation.
-	@return : size of saved CTable */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
-{
-	BYTE *op = (BYTE *)dst;
-	U32 n;
-
-	BYTE *bitsToWeight;
-	BYTE *huffWeight;
-	size_t spaceUsed32 = 0;
-
-	bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* check conditions */
-	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-		return ERROR(maxSymbolValue_tooLarge);
-
-	/* convert to weight */
-	bitsToWeight[0] = 0;
-	for (n = 1; n < huffLog + 1; n++)
-		bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
-	for (n = 0; n < maxSymbolValue; n++)
-		huffWeight[n] = bitsToWeight[CTable[n].nbBits];
-
-	/* attempt weights compression by FSE */
-	{
-		CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
-		if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
-			op[0] = (BYTE)hSize;
-			return hSize + 1;
-		}
-	}
-
-	/* write raw values as 4-bits (max : 15) */
-	if (maxSymbolValue > (256 - 128))
-		return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
-	if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
-		return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
-	op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
-	huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
-	for (n = 0; n < maxSymbolValue; n += 2)
-		op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
-	return ((maxSymbolValue + 1) / 2) + 1;
-}
-
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 *rankVal;
-	BYTE *huffWeight;
-	U32 tableLog = 0;
-	U32 nbSymbols = 0;
-	size_t readSize;
-	size_t spaceUsed32 = 0;
-
-	rankVal = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	/* get symbol weights */
-	readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (ERR_isError(readSize))
-		return readSize;
-
-	/* check result */
-	if (tableLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	if (nbSymbols > maxSymbolValue + 1)
-		return ERROR(maxSymbolValue_tooSmall);
-
-	/* Prepare base value per rank */
-	{
-		U32 n, nextRankStart = 0;
-		for (n = 1; n <= tableLog; n++) {
-			U32 curr = nextRankStart;
-			nextRankStart += (rankVal[n] << (n - 1));
-			rankVal[n] = curr;
-		}
-	}
-
-	/* fill nbBits */
-	{
-		U32 n;
-		for (n = 0; n < nbSymbols; n++) {
-			const U32 w = huffWeight[n];
-			CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-		}
-	}
-
-	/* fill val */
-	{
-		U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
-		U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
-		{
-			U32 n;
-			for (n = 0; n < nbSymbols; n++)
-				nbPerRank[CTable[n].nbBits]++;
-		}
-		/* determine stating value per rank */
-		valPerRank[tableLog + 1] = 0; /* for w==0 */
-		{
-			U16 min = 0;
-			U32 n;
-			for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
-				valPerRank[n] = min;     /* get starting value within each rank */
-				min += nbPerRank[n];
-				min >>= 1;
-			}
-		}
-		/* assign value within rank, symbol order */
-		{
-			U32 n;
-			for (n = 0; n <= maxSymbolValue; n++)
-				CTable[n].val = valPerRank[CTable[n].nbBits]++;
-		}
-	}
-
-	return readSize;
-}
-
-typedef struct nodeElt_s {
-	U32 count;
-	U16 parent;
-	BYTE byte;
-	BYTE nbBits;
-} nodeElt;
-
-static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
-{
-	const U32 largestBits = huffNode[lastNonNull].nbBits;
-	if (largestBits <= maxNbBits)
-		return largestBits; /* early exit : no elt > maxNbBits */
-
-	/* there are several too large elements (at least >= 2) */
-	{
-		int totalCost = 0;
-		const U32 baseCost = 1 << (largestBits - maxNbBits);
-		U32 n = lastNonNull;
-
-		while (huffNode[n].nbBits > maxNbBits) {
-			totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
-			huffNode[n].nbBits = (BYTE)maxNbBits;
-			n--;
-		} /* n stops at huffNode[n].nbBits <= maxNbBits */
-		while (huffNode[n].nbBits == maxNbBits)
-			n--; /* n end at index of smallest symbol using < maxNbBits */
-
-		/* renorm totalCost */
-		totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
-
-		/* repay normalized cost */
-		{
-			U32 const noSymbol = 0xF0F0F0F0;
-			U32 rankLast[HUF_TABLELOG_MAX + 2];
-			int pos;
-
-			/* Get pos of last (smallest) symbol per rank */
-			memset(rankLast, 0xF0, sizeof(rankLast));
-			{
-				U32 currNbBits = maxNbBits;
-				for (pos = n; pos >= 0; pos--) {
-					if (huffNode[pos].nbBits >= currNbBits)
-						continue;
-					currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
-					rankLast[maxNbBits - currNbBits] = pos;
-				}
-			}
-
-			while (totalCost > 0) {
-				U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
-				for (; nBitsToDecrease > 1; nBitsToDecrease--) {
-					U32 highPos = rankLast[nBitsToDecrease];
-					U32 lowPos = rankLast[nBitsToDecrease - 1];
-					if (highPos == noSymbol)
-						continue;
-					if (lowPos == noSymbol)
-						break;
-					{
-						U32 const highTotal = huffNode[highPos].count;
-						U32 const lowTotal = 2 * huffNode[lowPos].count;
-						if (highTotal <= lowTotal)
-							break;
-					}
-				}
-				/* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-				/* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
-				while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
-					nBitsToDecrease++;
-				totalCost -= 1 << (nBitsToDecrease - 1);
-				if (rankLast[nBitsToDecrease - 1] == noSymbol)
-					rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
-				huffNode[rankLast[nBitsToDecrease]].nbBits++;
-				if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
-					rankLast[nBitsToDecrease] = noSymbol;
-				else {
-					rankLast[nBitsToDecrease]--;
-					if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
-						rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
-				}
-			} /* while (totalCost > 0) */
-
-			while (totalCost < 0) {		       /* Sometimes, cost correction overshoot */
-				if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
-								  (using maxNbBits) */
-					while (huffNode[n].nbBits == maxNbBits)
-						n--;
-					huffNode[n + 1].nbBits--;
-					rankLast[1] = n + 1;
-					totalCost++;
-					continue;
-				}
-				huffNode[rankLast[1] + 1].nbBits--;
-				rankLast[1]++;
-				totalCost++;
-			}
-		}
-	} /* there are several too large elements (at least >= 2) */
-
-	return maxNbBits;
-}
-
-typedef struct {
-	U32 base;
-	U32 curr;
-} rankPos;
-
-static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
-{
-	rankPos rank[32];
-	U32 n;
-
-	memset(rank, 0, sizeof(rank));
-	for (n = 0; n <= maxSymbolValue; n++) {
-		U32 r = BIT_highbit32(count[n] + 1);
-		rank[r].base++;
-	}
-	for (n = 30; n > 0; n--)
-		rank[n - 1].base += rank[n].base;
-	for (n = 0; n < 32; n++)
-		rank[n].curr = rank[n].base;
-	for (n = 0; n <= maxSymbolValue; n++) {
-		U32 const c = count[n];
-		U32 const r = BIT_highbit32(c + 1) + 1;
-		U32 pos = rank[r].curr++;
-		while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
-			huffNode[pos] = huffNode[pos - 1], pos--;
-		huffNode[pos].count = c;
-		huffNode[pos].byte = (BYTE)n;
-	}
-}
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
-typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
-{
-	nodeElt *const huffNode0 = (nodeElt *)workSpace;
-	nodeElt *const huffNode = huffNode0 + 1;
-	U32 n, nonNullRank;
-	int lowS, lowN;
-	U16 nodeNb = STARTNODE;
-	U32 nodeRoot;
-
-	/* safety checks */
-	if (wkspSize < sizeof(huffNodeTable))
-		return ERROR(GENERIC); /* workSpace is not large enough */
-	if (maxNbBits == 0)
-		maxNbBits = HUF_TABLELOG_DEFAULT;
-	if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-		return ERROR(GENERIC);
-	memset(huffNode0, 0, sizeof(huffNodeTable));
-
-	/* sort, decreasing order */
-	HUF_sort(huffNode, count, maxSymbolValue);
-
-	/* init for parents */
-	nonNullRank = maxSymbolValue;
-	while (huffNode[nonNullRank].count == 0)
-		nonNullRank--;
-	lowS = nonNullRank;
-	nodeRoot = nodeNb + lowS - 1;
-	lowN = nodeNb;
-	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
-	huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
-	nodeNb++;
-	lowS -= 2;
-	for (n = nodeNb; n <= nodeRoot; n++)
-		huffNode[n].count = (U32)(1U << 30);
-	huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
-
-	/* create parents */
-	while (nodeNb <= nodeRoot) {
-		U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-		U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-		huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-		huffNode[n1].parent = huffNode[n2].parent = nodeNb;
-		nodeNb++;
-	}
-
-	/* distribute weights (unlimited tree height) */
-	huffNode[nodeRoot].nbBits = 0;
-	for (n = nodeRoot - 1; n >= STARTNODE; n--)
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-	for (n = 0; n <= nonNullRank; n++)
-		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-
-	/* enforce maxTableLog */
-	maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
-
-	/* fill result into tree (val, nbBits) */
-	{
-		U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
-		U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
-		if (maxNbBits > HUF_TABLELOG_MAX)
-			return ERROR(GENERIC); /* check fit into table */
-		for (n = 0; n <= nonNullRank; n++)
-			nbPerRank[huffNode[n].nbBits]++;
-		/* determine stating value per rank */
-		{
-			U16 min = 0;
-			for (n = maxNbBits; n > 0; n--) {
-				valPerRank[n] = min; /* get starting value within each rank */
-				min += nbPerRank[n];
-				min >>= 1;
-			}
-		}
-		for (n = 0; n <= maxSymbolValue; n++)
-			tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
-		for (n = 0; n <= maxSymbolValue; n++)
-			tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
-	}
-
-	return maxNbBits;
-}
-
-static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-	size_t nbBits = 0;
-	int s;
-	for (s = 0; s <= (int)maxSymbolValue; ++s) {
-		nbBits += CTable[s].nbBits * count[s];
-	}
-	return nbBits >> 3;
-}
-
-static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-	int bad = 0;
-	int s;
-	for (s = 0; s <= (int)maxSymbolValue; ++s) {
-		bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
-	}
-	return !bad;
-}
-
-static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
-{
-	BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
-}
-
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
-#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
-
-#define HUF_FLUSHBITS_1(stream)                                            \
-	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
-	HUF_FLUSHBITS(stream)
-
-#define HUF_FLUSHBITS_2(stream)                                            \
-	if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
-	HUF_FLUSHBITS(stream)
-
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-	const BYTE *ip = (const BYTE *)src;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-	size_t n;
-	BIT_CStream_t bitC;
-
-	/* init */
-	if (dstSize < 8)
-		return 0; /* not enough space to compress */
-	{
-		size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
-		if (HUF_isError(initErr))
-			return 0;
-	}
-
-	n = srcSize & ~3; /* join to mod 4 */
-	switch (srcSize & 3) {
-	case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
-		fallthrough;
-	case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
-		fallthrough;
-	case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
-		fallthrough;
-	case 0:
-	default:;
-	}
-
-	for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
-		HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
-		HUF_FLUSHBITS_1(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
-		HUF_FLUSHBITS_2(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
-		HUF_FLUSHBITS_1(&bitC);
-		HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
-		HUF_FLUSHBITS(&bitC);
-	}
-
-	return BIT_closeCStream(&bitC);
-}
-
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-	size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
-	const BYTE *ip = (const BYTE *)src;
-	const BYTE *const iend = ip + srcSize;
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-
-	if (dstSize < 6 + 1 + 1 + 1 + 8)
-		return 0; /* minimum space to compress successfully */
-	if (srcSize < 12)
-		return 0; /* no saving possible : too small input */
-	op += 6;	  /* jumpTable */
-
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart + 2, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-		if (cSize == 0)
-			return 0;
-		ZSTD_writeLE16(ostart + 4, (U16)cSize);
-		op += cSize;
-	}
-
-	ip += segmentSize;
-	{
-		CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
-		if (cSize == 0)
-			return 0;
-		op += cSize;
-	}
-
-	return op - ostart;
-}
-
-static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
-					  const HUF_CElt *CTable)
-{
-	size_t const cSize =
-	    singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-	if (HUF_isError(cSize)) {
-		return cSize;
-	}
-	if (cSize == 0) {
-		return 0;
-	} /* uncompressible */
-	op += cSize;
-	/* check compressibility */
-	if ((size_t)(op - ostart) >= srcSize - 1) {
-		return 0;
-	}
-	return op - ostart;
-}
-
-/* `workSpace` must a table of at least 1024 unsigned */
-static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
-				    unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	BYTE *const ostart = (BYTE *)dst;
-	BYTE *const oend = ostart + dstSize;
-	BYTE *op = ostart;
-
-	U32 *count;
-	size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
-	HUF_CElt *CTable;
-	size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
-
-	/* checks & inits */
-	if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
-		return ERROR(GENERIC);
-	if (!srcSize)
-		return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
-	if (!dstSize)
-		return 0; /* cannot fit within dst budget */
-	if (srcSize > HUF_BLOCKSIZE_MAX)
-		return ERROR(srcSize_wrong); /* curr block size limit */
-	if (huffLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	if (!maxSymbolValue)
-		maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-	if (!huffLog)
-		huffLog = HUF_TABLELOG_DEFAULT;
-
-	count = (U32 *)workSpace;
-	workSpace = (BYTE *)workSpace + countSize;
-	wkspSize -= countSize;
-	CTable = (HUF_CElt *)workSpace;
-	workSpace = (BYTE *)workSpace + CTableSize;
-	wkspSize -= CTableSize;
-
-	/* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
-	if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
-		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-	}
-
-	/* Scan input and build symbol stats */
-	{
-		CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
-		if (largest == srcSize) {
-			*ostart = ((const BYTE *)src)[0];
-			return 1;
-		} /* single symbol, rle */
-		if (largest <= (srcSize >> 7) + 1)
-			return 0; /* Fast heuristic : not compressible enough */
-	}
-
-	/* Check validity of previous table */
-	if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
-		*repeat = HUF_repeat_none;
-	}
-	/* Heuristic : use existing table for small inputs */
-	if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
-		return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-	}
-
-	/* Build Huffman Tree */
-	huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-	{
-		CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
-		huffLog = (U32)maxBits;
-		/* Zero the unused symbols so we can check it for validity */
-		memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
-	}
-
-	/* Write table description header */
-	{
-		CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
-		/* Check if using the previous table will be beneficial */
-		if (repeat && *repeat != HUF_repeat_none) {
-			size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
-			size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
-			if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
-				return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-			}
-		}
-		/* Use the new table */
-		if (hSize + 12ul >= srcSize) {
-			return 0;
-		}
-		op += hSize;
-		if (repeat) {
-			*repeat = HUF_repeat_none;
-		}
-		if (oldHufTable) {
-			memcpy(oldHufTable, CTable, CTableSize);
-		} /* Save the new table */
-	}
-	return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
-}
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			   size_t wkspSize)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
-				     preferRepeat);
-}
-
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			   size_t wkspSize)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-			     size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-	return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
-				     preferRepeat);
-}
diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c
deleted file mode 100644
index 6526482047dc..000000000000
--- a/lib/zstd/huf_decompress.c
+++ /dev/null
@@ -1,960 +0,0 @@
-/*
- * Huffman decoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Dependencies
-****************************************************************/
-#include "bitstream.h" /* BIT_* */
-#include "fse.h"       /* header compression */
-#include "huf.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-	{                                                      \
-		enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-	} /* use only *after* variable declarations */
-
-/*-***************************/
-/*  generic DTableDesc       */
-/*-***************************/
-
-typedef struct {
-	BYTE maxTableLog;
-	BYTE tableType;
-	BYTE tableLog;
-	BYTE reserved;
-} DTableDesc;
-
-static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
-{
-	DTableDesc dtd;
-	memcpy(&dtd, table, sizeof(dtd));
-	return dtd;
-}
-
-/*-***************************/
-/*  single-symbol decoding   */
-/*-***************************/
-
-typedef struct {
-	BYTE byte;
-	BYTE nbBits;
-} HUF_DEltX2; /* single-symbol decoding */
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 tableLog = 0;
-	U32 nbSymbols = 0;
-	size_t iSize;
-	void *const dtPtr = DTable + 1;
-	HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
-
-	U32 *rankVal;
-	BYTE *huffWeight;
-	size_t spaceUsed32 = 0;
-
-	rankVal = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-	huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-	/* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
-
-	iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (HUF_isError(iSize))
-		return iSize;
-
-	/* Table header */
-	{
-		DTableDesc dtd = HUF_getDTableDesc(DTable);
-		if (tableLog > (U32)(dtd.maxTableLog + 1))
-			return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
-		dtd.tableType = 0;
-		dtd.tableLog = (BYTE)tableLog;
-		memcpy(DTable, &dtd, sizeof(dtd));
-	}
-
-	/* Calculate starting value for each rank */
-	{
-		U32 n, nextRankStart = 0;
-		for (n = 1; n < tableLog + 1; n++) {
-			U32 const curr = nextRankStart;
-			nextRankStart += (rankVal[n] << (n - 1));
-			rankVal[n] = curr;
-		}
-	}
-
-	/* fill DTable */
-	{
-		U32 n;
-		for (n = 0; n < nbSymbols; n++) {
-			U32 const w = huffWeight[n];
-			U32 const length = (1 << w) >> 1;
-			U32 u;
-			HUF_DEltX2 D;
-			D.byte = (BYTE)n;
-			D.nbBits = (BYTE)(tableLog + 1 - w);
-			for (u = rankVal[w]; u < rankVal[w] + length; u++)
-				dt[u] = D;
-			rankVal[w] += length;
-		}
-	}
-
-	return iSize;
-}
-
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-	BYTE const c = dt[val].byte;
-	BIT_skipBits(Dstream, dt[val].nbBits);
-	return c;
-}
-
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
-	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-	if (ZSTD_64bits())                     \
-	HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
-{
-	BYTE *const pStart = p;
-
-	/* up to 4 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
-		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-	}
-
-	/* closer to the end */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-	/* no more data to retrieve from bitstream, hence no need to reload */
-	while (p < pEnd)
-		HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-	return pEnd - pStart;
-}
-
-static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	BYTE *op = (BYTE *)dst;
-	BYTE *const oend = op + dstSize;
-	const void *dtPtr = DTable + 1;
-	const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-	BIT_DStream_t bitD;
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	U32 const dtLog = dtd.tableLog;
-
-	{
-		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-		if (HUF_isError(errorCode))
-			return errorCode;
-	}
-
-	HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
-
-	/* check */
-	if (!BIT_endOfDStream(&bitD))
-		return ERROR(corruption_detected);
-
-	return dstSize;
-}
-
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 0)
-		return ERROR(GENERIC);
-	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	/* Check */
-	if (cSrcSize < 10)
-		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-	{
-		const BYTE *const istart = (const BYTE *)cSrc;
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1;
-		const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-
-		/* Init */
-		BIT_DStream_t bitD1;
-		BIT_DStream_t bitD2;
-		BIT_DStream_t bitD3;
-		BIT_DStream_t bitD4;
-		size_t const length1 = ZSTD_readLE16(istart);
-		size_t const length2 = ZSTD_readLE16(istart + 2);
-		size_t const length3 = ZSTD_readLE16(istart + 4);
-		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-		const BYTE *const istart1 = istart + 6; /* jumpTable */
-		const BYTE *const istart2 = istart1 + length1;
-		const BYTE *const istart3 = istart2 + length2;
-		const BYTE *const istart4 = istart3 + length3;
-		const size_t segmentSize = (dstSize + 3) / 4;
-		BYTE *const opStart2 = ostart + segmentSize;
-		BYTE *const opStart3 = opStart2 + segmentSize;
-		BYTE *const opStart4 = opStart3 + segmentSize;
-		BYTE *op1 = ostart;
-		BYTE *op2 = opStart2;
-		BYTE *op3 = opStart3;
-		BYTE *op4 = opStart4;
-		U32 endSignal;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		U32 const dtLog = dtd.tableLog;
-
-		if (length4 > cSrcSize)
-			return ERROR(corruption_detected); /* overflow */
-		{
-			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-
-		/* 16-32 symbols per loop (4-8 symbols per stream) */
-		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
-			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-			HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-			HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-			HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		}
-
-		/* check corruption */
-		if (op1 > opStart2)
-			return ERROR(corruption_detected);
-		if (op2 > opStart3)
-			return ERROR(corruption_detected);
-		if (op3 > opStart4)
-			return ERROR(corruption_detected);
-		/* note : op4 supposed already verified within main loop */
-
-		/* finish bitStreams one by one */
-		HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-		HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-		HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-		HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
-
-		/* check */
-		endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-		if (!endSignal)
-			return ERROR(corruption_detected);
-
-		/* decoded size */
-		return dstSize;
-	}
-}
-
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 0)
-		return ERROR(GENERIC);
-	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* *************************/
-/* double-symbols decoding */
-/* *************************/
-typedef struct {
-	U16 sequence;
-	BYTE nbBits;
-	BYTE length;
-} HUF_DEltX4; /* double-symbols decoding */
-
-typedef struct {
-	BYTE symbol;
-	BYTE weight;
-} sortedSymbol_t;
-
-/* HUF_fillDTableX4Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
-				   const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
-{
-	HUF_DEltX4 DElt;
-	U32 rankVal[HUF_TABLELOG_MAX + 1];
-
-	/* get pre-calculated rankVal */
-	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-	/* fill skipped values */
-	if (minWeight > 1) {
-		U32 i, skipSize = rankVal[minWeight];
-		ZSTD_writeLE16(&(DElt.sequence), baseSeq);
-		DElt.nbBits = (BYTE)(consumed);
-		DElt.length = 1;
-		for (i = 0; i < skipSize; i++)
-			DTable[i] = DElt;
-	}
-
-	/* fill DTable */
-	{
-		U32 s;
-		for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
-			const U32 symbol = sortedSymbols[s].symbol;
-			const U32 weight = sortedSymbols[s].weight;
-			const U32 nbBits = nbBitsBaseline - weight;
-			const U32 length = 1 << (sizeLog - nbBits);
-			const U32 start = rankVal[weight];
-			U32 i = start;
-			const U32 end = start + length;
-
-			ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
-			DElt.nbBits = (BYTE)(nbBits + consumed);
-			DElt.length = 2;
-			do {
-				DTable[i++] = DElt;
-			} while (i < end); /* since length >= 1 */
-
-			rankVal[weight] += length;
-		}
-	}
-}
-
-typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
-typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
-
-static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
-			     rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
-{
-	U32 rankVal[HUF_TABLELOG_MAX + 1];
-	const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
-	const U32 minBits = nbBitsBaseline - maxWeight;
-	U32 s;
-
-	memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-	/* fill DTable */
-	for (s = 0; s < sortedListSize; s++) {
-		const U16 symbol = sortedList[s].symbol;
-		const U32 weight = sortedList[s].weight;
-		const U32 nbBits = nbBitsBaseline - weight;
-		const U32 start = rankVal[weight];
-		const U32 length = 1 << (targetLog - nbBits);
-
-		if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
-			U32 sortedRank;
-			int minWeight = nbBits + scaleLog;
-			if (minWeight < 1)
-				minWeight = 1;
-			sortedRank = rankStart[minWeight];
-			HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
-					       sortedListSize - sortedRank, nbBitsBaseline, symbol);
-		} else {
-			HUF_DEltX4 DElt;
-			ZSTD_writeLE16(&(DElt.sequence), symbol);
-			DElt.nbBits = (BYTE)(nbBits);
-			DElt.length = 1;
-			{
-				U32 const end = start + length;
-				U32 u;
-				for (u = start; u < end; u++)
-					DTable[u] = DElt;
-			}
-		}
-		rankVal[weight] += length;
-	}
-}
-
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-	U32 tableLog, maxW, sizeOfSort, nbSymbols;
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	U32 const maxTableLog = dtd.maxTableLog;
-	size_t iSize;
-	void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
-	HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
-	U32 *rankStart;
-
-	rankValCol_t *rankVal;
-	U32 *rankStats;
-	U32 *rankStart0;
-	sortedSymbol_t *sortedSymbol;
-	BYTE *weightList;
-	size_t spaceUsed32 = 0;
-
-	HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
-
-	rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-	rankStats = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 1;
-	rankStart0 = (U32 *)workspace + spaceUsed32;
-	spaceUsed32 += HUF_TABLELOG_MAX + 2;
-	sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-	weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
-	spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-	if ((spaceUsed32 << 2) > workspaceSize)
-		return ERROR(tableLog_tooLarge);
-	workspace = (U32 *)workspace + spaceUsed32;
-	workspaceSize -= (spaceUsed32 << 2);
-
-	rankStart = rankStart0 + 1;
-	memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
-
-	HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
-	if (maxTableLog > HUF_TABLELOG_MAX)
-		return ERROR(tableLog_tooLarge);
-	/* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
-
-	iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-	if (HUF_isError(iSize))
-		return iSize;
-
-	/* check result */
-	if (tableLog > maxTableLog)
-		return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
-
-	/* find maxWeight */
-	for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
-	} /* necessarily finds a solution before 0 */
-
-	/* Get start index of each weight */
-	{
-		U32 w, nextRankStart = 0;
-		for (w = 1; w < maxW + 1; w++) {
-			U32 curr = nextRankStart;
-			nextRankStart += rankStats[w];
-			rankStart[w] = curr;
-		}
-		rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
-		sizeOfSort = nextRankStart;
-	}
-
-	/* sort symbols by weight */
-	{
-		U32 s;
-		for (s = 0; s < nbSymbols; s++) {
-			U32 const w = weightList[s];
-			U32 const r = rankStart[w]++;
-			sortedSymbol[r].symbol = (BYTE)s;
-			sortedSymbol[r].weight = (BYTE)w;
-		}
-		rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
-	}
-
-	/* Build rankVal */
-	{
-		U32 *const rankVal0 = rankVal[0];
-		{
-			int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
-			U32 nextRankVal = 0;
-			U32 w;
-			for (w = 1; w < maxW + 1; w++) {
-				U32 curr = nextRankVal;
-				nextRankVal += rankStats[w] << (w + rescale);
-				rankVal0[w] = curr;
-			}
-		}
-		{
-			U32 const minBits = tableLog + 1 - maxW;
-			U32 consumed;
-			for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-				U32 *const rankValPtr = rankVal[consumed];
-				U32 w;
-				for (w = 1; w < maxW + 1; w++) {
-					rankValPtr[w] = rankVal0[w] >> consumed;
-				}
-			}
-		}
-	}
-
-	HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
-
-	dtd.tableLog = (BYTE)maxTableLog;
-	dtd.tableType = 1;
-	memcpy(DTable, &dtd, sizeof(dtd));
-	return iSize;
-}
-
-static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-	memcpy(op, dt + val, 2);
-	BIT_skipBits(DStream, dt[val].nbBits);
-	return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-	size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-	memcpy(op, dt + val, 1);
-	if (dt[val].length == 1)
-		BIT_skipBits(DStream, dt[val].nbBits);
-	else {
-		if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
-			BIT_skipBits(DStream, dt[val].nbBits);
-			if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
-				/* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
-				DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
-		}
-	}
-	return 1;
-}
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
-	if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
-	if (ZSTD_64bits())                     \
-	ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
-{
-	BYTE *const pStart = p;
-
-	/* up to 8 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
-		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-	}
-
-	/* closer to end : up to 2 symbols at a time */
-	while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
-	while (p <= pEnd - 2)
-		HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
-	if (p < pEnd)
-		p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
-	return p - pStart;
-}
-
-static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	BIT_DStream_t bitD;
-
-	/* Init */
-	{
-		size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-		if (HUF_isError(errorCode))
-			return errorCode;
-	}
-
-	/* decode */
-	{
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
-		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
-	}
-
-	/* check */
-	if (!BIT_endOfDStream(&bitD))
-		return ERROR(corruption_detected);
-
-	/* decoded size */
-	return dstSize;
-}
-
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 1)
-		return ERROR(GENERIC);
-	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	if (cSrcSize < 10)
-		return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-	{
-		const BYTE *const istart = (const BYTE *)cSrc;
-		BYTE *const ostart = (BYTE *)dst;
-		BYTE *const oend = ostart + dstSize;
-		const void *const dtPtr = DTable + 1;
-		const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-
-		/* Init */
-		BIT_DStream_t bitD1;
-		BIT_DStream_t bitD2;
-		BIT_DStream_t bitD3;
-		BIT_DStream_t bitD4;
-		size_t const length1 = ZSTD_readLE16(istart);
-		size_t const length2 = ZSTD_readLE16(istart + 2);
-		size_t const length3 = ZSTD_readLE16(istart + 4);
-		size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-		const BYTE *const istart1 = istart + 6; /* jumpTable */
-		const BYTE *const istart2 = istart1 + length1;
-		const BYTE *const istart3 = istart2 + length2;
-		const BYTE *const istart4 = istart3 + length3;
-		size_t const segmentSize = (dstSize + 3) / 4;
-		BYTE *const opStart2 = ostart + segmentSize;
-		BYTE *const opStart3 = opStart2 + segmentSize;
-		BYTE *const opStart4 = opStart3 + segmentSize;
-		BYTE *op1 = ostart;
-		BYTE *op2 = opStart2;
-		BYTE *op3 = opStart3;
-		BYTE *op4 = opStart4;
-		U32 endSignal;
-		DTableDesc const dtd = HUF_getDTableDesc(DTable);
-		U32 const dtLog = dtd.tableLog;
-
-		if (length4 > cSrcSize)
-			return ERROR(corruption_detected); /* overflow */
-		{
-			size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-		{
-			size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-			if (HUF_isError(errorCode))
-				return errorCode;
-		}
-
-		/* 16-32 symbols per loop (4-8 symbols per stream) */
-		endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
-			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-			HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
-			HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
-			HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
-			HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
-			endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-		}
-
-		/* check corruption */
-		if (op1 > opStart2)
-			return ERROR(corruption_detected);
-		if (op2 > opStart3)
-			return ERROR(corruption_detected);
-		if (op3 > opStart4)
-			return ERROR(corruption_detected);
-		/* note : op4 already verified within main loop */
-
-		/* finish bitStreams one by one */
-		HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
-		HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
-		HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
-		HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
-		/* check */
-		{
-			U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-			if (!endCheck)
-				return ERROR(corruption_detected);
-		}
-
-		/* decoded size */
-		return dstSize;
-	}
-}
-
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc dtd = HUF_getDTableDesc(DTable);
-	if (dtd.tableType != 1)
-		return ERROR(GENERIC);
-	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	const BYTE *ip = (const BYTE *)cSrc;
-
-	size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-	if (HUF_isError(hSize))
-		return hSize;
-	if (hSize >= cSrcSize)
-		return ERROR(srcSize_wrong);
-	ip += hSize;
-	cSrcSize -= hSize;
-
-	return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* ********************************/
-/* Generic decompression selector */
-/* ********************************/
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-			     : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-	DTableDesc const dtd = HUF_getDTableDesc(DTable);
-	return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-			     : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-typedef struct {
-	U32 tableTime;
-	U32 decode256Time;
-} algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
-    /* single, double, quad */
-    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==0 : impossible */
-    {{0, 0}, {1, 1}, {2, 2}},		     /* Q==1 : impossible */
-    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
-    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
-    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
-    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
-    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
-    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
-    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
-    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
-    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
-    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
-    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
-    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
-    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
-    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
-};
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
-{
-	/* decoder timing evaluation */
-	U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
-	U32 const D256 = (U32)(dstSize >> 8);
-	U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
-	U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-	DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
-	return DTime1 < DTime0;
-}
-
-typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
-
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if (cSrcSize > dstSize)
-		return ERROR(corruption_detected); /* invalid */
-	if (cSrcSize == dstSize) {
-		memcpy(dst, cSrc, dstSize);
-		return dstSize;
-	} /* not compressed */
-	if (cSrcSize == 1) {
-		memset(dst, *(const BYTE *)cSrc, dstSize);
-		return dstSize;
-	} /* RLE */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
-
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
-		return ERROR(corruption_detected); /* invalid */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-	/* validation checks */
-	if (dstSize == 0)
-		return ERROR(dstSize_tooSmall);
-	if (cSrcSize > dstSize)
-		return ERROR(corruption_detected); /* invalid */
-	if (cSrcSize == dstSize) {
-		memcpy(dst, cSrc, dstSize);
-		return dstSize;
-	} /* not compressed */
-	if (cSrcSize == 1) {
-		memset(dst, *(const BYTE *)cSrc, dstSize);
-		return dstSize;
-	} /* RLE */
-
-	{
-		U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-		return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-			      : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-	}
-}
diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h
deleted file mode 100644
index 93d7a2c377fe..000000000000
--- a/lib/zstd/mem.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef MEM_H_MODULE
-#define MEM_H_MODULE
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include <asm/unaligned.h>
-#include <linux/string.h> /* memcpy */
-#include <linux/types.h>  /* size_t, ptrdiff_t */
-
-/*-****************************************
-*  Compiler specifics
-******************************************/
-#define ZSTD_STATIC static inline
-
-/*-**************************************************************
-*  Basic Types
-*****************************************************************/
-typedef uint8_t BYTE;
-typedef uint16_t U16;
-typedef int16_t S16;
-typedef uint32_t U32;
-typedef int32_t S32;
-typedef uint64_t U64;
-typedef int64_t S64;
-typedef ptrdiff_t iPtrDiff;
-typedef uintptr_t uPtrDiff;
-
-/*-**************************************************************
-*  Memory I/O
-*****************************************************************/
-ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; }
-ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; }
-
-#if defined(__LITTLE_ENDIAN)
-#define ZSTD_LITTLE_ENDIAN 1
-#else
-#define ZSTD_LITTLE_ENDIAN 0
-#endif
-
-ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; }
-
-ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); }
-
-ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); }
-
-ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); }
-
-/*=== Little endian r/w ===*/
-
-ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); }
-
-ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); }
-
-ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val)
-{
-	ZSTD_writeLE16(memPtr, (U16)val);
-	((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
-}
-
-ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr)
-{
-	if (ZSTD_32bits())
-		return (size_t)ZSTD_readLE32(memPtr);
-	else
-		return (size_t)ZSTD_readLE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val)
-{
-	if (ZSTD_32bits())
-		ZSTD_writeLE32(memPtr, (U32)val);
-	else
-		ZSTD_writeLE64(memPtr, (U64)val);
-}
-
-/*=== Big endian r/w ===*/
-
-ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr)
-{
-	if (ZSTD_32bits())
-		return (size_t)ZSTD_readBE32(memPtr);
-	else
-		return (size_t)ZSTD_readBE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val)
-{
-	if (ZSTD_32bits())
-		ZSTD_writeBE32(memPtr, (U32)val);
-	else
-		ZSTD_writeBE64(memPtr, (U64)val);
-}
-
-/* function safe only for comparisons */
-ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length)
-{
-	switch (length) {
-	default:
-	case 4: return ZSTD_read32(memPtr);
-	case 3:
-		if (ZSTD_isLittleEndian())
-			return ZSTD_read32(memPtr) << 8;
-		else
-			return ZSTD_read32(memPtr) >> 8;
-	}
-}
-
-#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c
deleted file mode 100644
index a282624ee155..000000000000
--- a/lib/zstd/zstd_common.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
-#include <linux/kernel.h>
-
-/*=**************************************************************
-*  Custom allocator
-****************************************************************/
-
-#define stack_push(stack, size)                                 \
-	({                                                      \
-		void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \
-		(stack)->ptr = (char *)ptr + (size);            \
-		(stack)->ptr <= (stack)->end ? ptr : NULL;      \
-	})
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize)
-{
-	ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace};
-	ZSTD_stack *stack = (ZSTD_stack *)workspace;
-	/* Verify preconditions */
-	if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) {
-		ZSTD_customMem error = {NULL, NULL, NULL};
-		return error;
-	}
-	/* Initialize the stack */
-	stack->ptr = workspace;
-	stack->end = (char *)workspace + workspaceSize;
-	stack_push(stack, sizeof(ZSTD_stack));
-	return stackMem;
-}
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size)
-{
-	ZSTD_stack *stack = (ZSTD_stack *)opaque;
-	*size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr);
-	return stack_push(stack, *size);
-}
-
-void *ZSTD_stackAlloc(void *opaque, size_t size)
-{
-	ZSTD_stack *stack = (ZSTD_stack *)opaque;
-	return stack_push(stack, size);
-}
-void ZSTD_stackFree(void *opaque, void *address)
-{
-	(void)opaque;
-	(void)address;
-}
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); }
-
-void ZSTD_free(void *ptr, ZSTD_customMem customMem)
-{
-	if (ptr != NULL)
-		customMem.customFree(customMem.opaque, ptr);
-}
diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
new file mode 100644
index 000000000000..65548a4bb934
--- /dev/null
+++ b/lib/zstd/zstd_compress_module.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+#include "common/zstd_internal.h"
+
+#define ZSTD_FORWARD_IF_ERR(ret)            \
+	do {                                \
+		size_t const __ret = (ret); \
+		if (ZSTD_isError(__ret))    \
+			return __ret;       \
+	} while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+	unsigned long long pledged_src_size)
+{
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+		cctx, ZSTD_reset_session_and_parameters));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+		cctx, pledged_src_size));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+	ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+		cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+	return 0;
+}
+
+int zstd_min_clevel(void)
+{
+	return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+	return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+	return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+	unsigned long long estimated_src_size)
+{
+	return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+	ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+	return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+	return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+	unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+	zstd_cstream *cstream;
+
+	if (workspace == NULL)
+		return NULL;
+
+	cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+	if (cstream == NULL)
+		return NULL;
+
+	/* 0 means unknown in linux zstd API but means 0 in new zstd API */
+	if (pledged_src_size == 0)
+		pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+	if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+		return NULL;
+
+	return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+	unsigned long long pledged_src_size)
+{
+	return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+	return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
new file mode 100644
index 000000000000..f4ed952ed485
--- /dev/null
+++ b/lib/zstd/zstd_decompress_module.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+	return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+	return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+	return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+	return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+	const void *src, size_t src_size)
+{
+	return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+	return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+	size_t workspace_size)
+{
+	if (workspace == NULL)
+		return NULL;
+	(void)max_window_size;
+	return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+	return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+	zstd_in_buffer *input)
+{
+	return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+	return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+	size_t src_size)
+{
+	return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
deleted file mode 100644
index dac753397f86..000000000000
--- a/lib/zstd/zstd_internal.h
+++ /dev/null
@@ -1,273 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef ZSTD_CCOMMON_H_MODULE
-#define ZSTD_CCOMMON_H_MODULE
-
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-#define FORCE_INLINE static __always_inline
-#define FORCE_NOINLINE static noinline
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "mem.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/xxhash.h>
-#include <linux/zstd.h>
-
-/*-*************************************
-*  shared macros
-***************************************/
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define CHECK_F(f)                       \
-	{                                \
-		size_t const errcod = f; \
-		if (ERR_isError(errcod)) \
-			return errcod;   \
-	} /* check and Forward error code */
-#define CHECK_E(f, e)                    \
-	{                                \
-		size_t const errcod = f; \
-		if (ERR_isError(errcod)) \
-			return ERROR(e); \
-	} /* check and send Error code */
-#define ZSTD_STATIC_ASSERT(c)                                   \
-	{                                                       \
-		enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \
-	}
-
-/*-*************************************
-*  Common constants
-***************************************/
-#define ZSTD_OPT_NUM (1 << 12)
-#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
-
-#define ZSTD_REP_NUM 3		      /* number of repcodes */
-#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
-#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1)
-#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
-static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8};
-
-#define KB *(1 << 10)
-#define MB *(1 << 20)
-#define GB *(1U << 30)
-
-#define BIT7 128
-#define BIT6 64
-#define BIT5 32
-#define BIT4 16
-#define BIT1 2
-#define BIT0 1
-
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8};
-static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4};
-
-#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
-
-#define MIN_SEQUENCES_SIZE 1									  /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
-
-#define HufLog 12
-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
-
-#define LONGNBSEQ 0x7F00
-
-#define MINMATCH 3
-#define EQUAL_READ32 4
-
-#define Litbits 8
-#define MaxLit ((1 << Litbits) - 1)
-#define MaxML 52
-#define MaxLL 35
-#define MaxOff 28
-#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
-#define MLFSELog 9
-#define LLFSELog 9
-#define OffFSELog 8
-
-static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
-#define LL_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
-
-static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
-				       0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
-					      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
-#define ML_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
-
-static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
-#define OF_DEFAULTNORMLOG 5 /* for static allocation */
-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
-
-/*-*******************************************
-*  Shared functions to include for inlining
-*********************************************/
-ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-	/*
-	 * zstd relies heavily on gcc being able to analyze and inline this
-	 * memcpy() call, since it is called in a tight loop. Preboot mode
-	 * is compiled in freestanding mode, which stops gcc from analyzing
-	 * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
-	 * regular memcpy().
-	 */
-	__builtin_memcpy(dst, src, 8);
-}
-/*! ZSTD_wildcopy() :
-*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
-{
-	const BYTE* ip = (const BYTE*)src;
-	BYTE* op = (BYTE*)dst;
-	BYTE* const oend = op + length;
-#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200
-	/*
-	 * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
-	 * Avoid the bad case where the loop only runs once by handling the
-	 * special case separately. This doesn't trigger the bug because it
-	 * doesn't involve pointer/integer overflow.
-	 */
-	if (length <= 8)
-		return ZSTD_copy8(dst, src);
-#endif
-	do {
-		ZSTD_copy8(op, ip);
-		op += 8;
-		ip += 8;
-	} while (op < oend);
-}
-
-/*-*******************************************
-*  Private interfaces
-*********************************************/
-typedef struct ZSTD_stats_s ZSTD_stats_t;
-
-typedef struct {
-	U32 off;
-	U32 len;
-} ZSTD_match_t;
-
-typedef struct {
-	U32 price;
-	U32 off;
-	U32 mlen;
-	U32 litlen;
-	U32 rep[ZSTD_REP_NUM];
-} ZSTD_optimal_t;
-
-typedef struct seqDef_s {
-	U32 offset;
-	U16 litLength;
-	U16 matchLength;
-} seqDef;
-
-typedef struct {
-	seqDef *sequencesStart;
-	seqDef *sequences;
-	BYTE *litStart;
-	BYTE *lit;
-	BYTE *llCode;
-	BYTE *mlCode;
-	BYTE *ofCode;
-	U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-	U32 longLengthPos;
-	/* opt */
-	ZSTD_optimal_t *priceTable;
-	ZSTD_match_t *matchTable;
-	U32 *matchLengthFreq;
-	U32 *litLengthFreq;
-	U32 *litFreq;
-	U32 *offCodeFreq;
-	U32 matchLengthSum;
-	U32 matchSum;
-	U32 litLengthSum;
-	U32 litSum;
-	U32 offCodeSum;
-	U32 log2matchLengthSum;
-	U32 log2matchSum;
-	U32 log2litLengthSum;
-	U32 log2litSum;
-	U32 log2offCodeSum;
-	U32 factor;
-	U32 staticPrices;
-	U32 cachedPrice;
-	U32 cachedLitLength;
-	const BYTE *cachedLiterals;
-} seqStore_t;
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx);
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr);
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx);
-
-/*= Custom memory allocation functions */
-typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size);
-typedef void (*ZSTD_freeFunction)(void *opaque, void *address);
-typedef struct {
-	ZSTD_allocFunction customAlloc;
-	ZSTD_freeFunction customFree;
-	void *opaque;
-} ZSTD_customMem;
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_free(void *ptr, ZSTD_customMem customMem);
-
-/*====== stack allocation  ======*/
-
-typedef struct {
-	void *ptr;
-	const void *end;
-} ZSTD_stack;
-
-#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t))
-#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t))
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize);
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size);
-void *ZSTD_stackAlloc(void *opaque, size_t size);
-void ZSTD_stackFree(void *opaque, void *address);
-
-/*======  common function  ======*/
-
-ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); }
-
-/* hidden functions */
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx);
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx);
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx);
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict);
-size_t ZSTD_freeDDict(ZSTD_DDict *cdict);
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs);
-size_t ZSTD_freeDStream(ZSTD_DStream *zds);
-
-#endif /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h
deleted file mode 100644
index 55e1b4cba808..000000000000
--- a/lib/zstd/zstd_opt.h
+++ /dev/null
@@ -1,1014 +0,0 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this file is intended to be included within zstd_compress.c */
-
-#ifndef ZSTD_OPT_H_91842398743
-#define ZSTD_OPT_H_91842398743
-
-#define ZSTD_LITFREQ_ADD 2
-#define ZSTD_FREQ_DIV 4
-#define ZSTD_MAX_PRICE (1 << 30)
-
-/*-*************************************
-*  Price functions for optimal parser
-***************************************/
-FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr)
-{
-	ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1);
-	ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1);
-	ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1);
-	ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1);
-	ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum));
-}
-
-ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize)
-{
-	unsigned u;
-
-	ssPtr->cachedLiterals = NULL;
-	ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-	ssPtr->staticPrices = 0;
-
-	if (ssPtr->litLengthSum == 0) {
-		if (srcSize <= 1024)
-			ssPtr->staticPrices = 1;
-
-		for (u = 0; u <= MaxLit; u++)
-			ssPtr->litFreq[u] = 0;
-		for (u = 0; u < srcSize; u++)
-			ssPtr->litFreq[src[u]]++;
-
-		ssPtr->litSum = 0;
-		ssPtr->litLengthSum = MaxLL + 1;
-		ssPtr->matchLengthSum = MaxML + 1;
-		ssPtr->offCodeSum = (MaxOff + 1);
-		ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits);
-
-		for (u = 0; u <= MaxLit; u++) {
-			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->litSum += ssPtr->litFreq[u];
-		}
-		for (u = 0; u <= MaxLL; u++)
-			ssPtr->litLengthFreq[u] = 1;
-		for (u = 0; u <= MaxML; u++)
-			ssPtr->matchLengthFreq[u] = 1;
-		for (u = 0; u <= MaxOff; u++)
-			ssPtr->offCodeFreq[u] = 1;
-	} else {
-		ssPtr->matchLengthSum = 0;
-		ssPtr->litLengthSum = 0;
-		ssPtr->offCodeSum = 0;
-		ssPtr->matchSum = 0;
-		ssPtr->litSum = 0;
-
-		for (u = 0; u <= MaxLit; u++) {
-			ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1));
-			ssPtr->litSum += ssPtr->litFreq[u];
-		}
-		for (u = 0; u <= MaxLL; u++) {
-			ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1));
-			ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
-		}
-		for (u = 0; u <= MaxML; u++) {
-			ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
-			ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
-		}
-		ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
-		for (u = 0; u <= MaxOff; u++) {
-			ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV);
-			ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
-		}
-	}
-
-	ZSTD_setLog2Prices(ssPtr);
-}
-
-FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals)
-{
-	U32 price, u;
-
-	if (ssPtr->staticPrices)
-		return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6);
-
-	if (litLength == 0)
-		return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1);
-
-	/* literals */
-	if (ssPtr->cachedLiterals == literals) {
-		U32 const additional = litLength - ssPtr->cachedLitLength;
-		const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
-		price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
-		for (u = 0; u < additional; u++)
-			price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1);
-		ssPtr->cachedPrice = price;
-		ssPtr->cachedLitLength = litLength;
-	} else {
-		price = litLength * ssPtr->log2litSum;
-		for (u = 0; u < litLength; u++)
-			price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1);
-
-		if (litLength >= 12) {
-			ssPtr->cachedLiterals = literals;
-			ssPtr->cachedPrice = price;
-			ssPtr->cachedLitLength = litLength;
-		}
-	}
-
-	/* literal Length */
-	{
-		const BYTE LL_deltaCode = 19;
-		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-		price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1);
-	}
-
-	return price;
-}
-
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra)
-{
-	/* offset */
-	U32 price;
-	BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-
-	if (seqStorePtr->staticPrices)
-		return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode;
-
-	price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1);
-	if (!ultra && offCode >= 20)
-		price += (offCode - 19) * 2;
-
-	/* match Length */
-	{
-		const BYTE ML_deltaCode = 36;
-		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-		price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1);
-	}
-
-	return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
-}
-
-ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength)
-{
-	U32 u;
-
-	/* literals */
-	seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD;
-	for (u = 0; u < litLength; u++)
-		seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
-
-	/* literal Length */
-	{
-		const BYTE LL_deltaCode = 19;
-		const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-		seqStorePtr->litLengthFreq[llCode]++;
-		seqStorePtr->litLengthSum++;
-	}
-
-	/* match offset */
-	{
-		BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-		seqStorePtr->offCodeSum++;
-		seqStorePtr->offCodeFreq[offCode]++;
-	}
-
-	/* match Length */
-	{
-		const BYTE ML_deltaCode = 36;
-		const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-		seqStorePtr->matchLengthFreq[mlCode]++;
-		seqStorePtr->matchLengthSum++;
-	}
-
-	ZSTD_setLog2Prices(seqStorePtr);
-}
-
-#define SET_PRICE(pos, mlen_, offset_, litlen_, price_)           \
-	{                                                         \
-		while (last_pos < pos) {                          \
-			opt[last_pos + 1].price = ZSTD_MAX_PRICE; \
-			last_pos++;                               \
-		}                                                 \
-		opt[pos].mlen = mlen_;                            \
-		opt[pos].off = offset_;                           \
-		opt[pos].litlen = litlen_;                        \
-		opt[pos].price = price_;                          \
-	}
-
-/* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip)
-{
-	U32 *const hashTable3 = zc->hashTable3;
-	U32 const hashLog3 = zc->hashLog3;
-	const BYTE *const base = zc->base;
-	U32 idx = zc->nextToUpdate3;
-	const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
-	const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
-
-	while (idx < target) {
-		hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx;
-		idx++;
-	}
-
-	return hashTable3[hash3];
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict,
-					 ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	const BYTE *const base = zc->base;
-	const U32 curr = (U32)(ip - base);
-	const U32 hashLog = zc->params.cParams.hashLog;
-	const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
-	U32 *const hashTable = zc->hashTable;
-	U32 matchIndex = hashTable[h];
-	U32 *const bt = zc->chainTable;
-	const U32 btLog = zc->params.cParams.chainLog - 1;
-	const U32 btMask = (1U << btLog) - 1;
-	size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-	const BYTE *const dictBase = zc->dictBase;
-	const U32 dictLimit = zc->dictLimit;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-	const U32 windowLow = zc->lowLimit;
-	U32 *smallerPtr = bt + 2 * (curr & btMask);
-	U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-	U32 matchEndIdx = curr + 8;
-	U32 dummy32; /* to be nullified at the end */
-	U32 mnum = 0;
-
-	const U32 minMatch = (mls == 3) ? 3 : 4;
-	size_t bestLength = minMatchLen - 1;
-
-	if (minMatch == 3) { /* HC3 match finder */
-		U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip);
-		if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) {
-			const BYTE *match;
-			size_t currMl = 0;
-			if ((!extDict) || matchIndex3 >= dictLimit) {
-				match = base + matchIndex3;
-				if (match[bestLength] == ip[bestLength])
-					currMl = ZSTD_count(ip, match, iLimit);
-			} else {
-				match = dictBase + matchIndex3;
-				if (ZSTD_readMINMATCH(match, MINMATCH) ==
-				    ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
-					currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
-			}
-
-			/* save best solution */
-			if (currMl > bestLength) {
-				bestLength = currMl;
-				matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3;
-				matches[mnum].len = (U32)currMl;
-				mnum++;
-				if (currMl > ZSTD_OPT_NUM)
-					goto update;
-				if (ip + currMl == iLimit)
-					goto update; /* best possible, and avoid read overflow*/
-			}
-		}
-	}
-
-	hashTable[h] = curr; /* Update Hash Table */
-
-	while (nbCompares-- && (matchIndex > windowLow)) {
-		U32 *nextPtr = bt + 2 * (matchIndex & btMask);
-		size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-		const BYTE *match;
-
-		if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-			match = base + matchIndex;
-			if (match[matchLength] == ip[matchLength]) {
-				matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1;
-			}
-		} else {
-			match = dictBase + matchIndex;
-			matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart);
-			if (matchIndex + matchLength >= dictLimit)
-				match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-		}
-
-		if (matchLength > bestLength) {
-			if (matchLength > matchEndIdx - matchIndex)
-				matchEndIdx = matchIndex + (U32)matchLength;
-			bestLength = matchLength;
-			matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex;
-			matches[mnum].len = (U32)matchLength;
-			mnum++;
-			if (matchLength > ZSTD_OPT_NUM)
-				break;
-			if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */
-				break;			/* drop, to guarantee consistency (miss a little bit of compression) */
-		}
-
-		if (match[matchLength] < ip[matchLength]) {
-			/* match is smaller than curr */
-			*smallerPtr = matchIndex;	  /* update smaller idx */
-			commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-			if (matchIndex <= btLow) {
-				smallerPtr = &dummy32;
-				break;
-			}			  /* beyond tree size, stop the search */
-			smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-			matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-		} else {
-			/* match is larger than curr */
-			*largerPtr = matchIndex;
-			commonLengthLarger = matchLength;
-			if (matchIndex <= btLow) {
-				largerPtr = &dummy32;
-				break;
-			} /* beyond tree size, stop the search */
-			largerPtr = nextPtr;
-			matchIndex = nextPtr[0];
-		}
-	}
-
-	*smallerPtr = *largerPtr = 0;
-
-update:
-	zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-	return mnum;
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches,
-				const U32 minMatchLen)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-					  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-					  ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	switch (matchLengthSearch) {
-	case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-	default:
-	case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-	case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-	case 7:
-	case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-	}
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls,
-					ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	if (ip < zc->base + zc->nextToUpdate)
-		return 0; /* skipped area */
-	ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-	return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-						  const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-						  ZSTD_match_t *matches, const U32 minMatchLen)
-{
-	switch (matchLengthSearch) {
-	case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-	default:
-	case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-	case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-	case 7:
-	case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-	}
-}
-
-/*-*******************************
-*  Optimal parser
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const BYTE *const prefixStart = base + ctx->dictLimit;
-
-	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-	const U32 sufficient_len = ctx->params.cParams.targetLength;
-	const U32 mls = ctx->params.cParams.searchLength;
-	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-	ZSTD_match_t *matches = seqStorePtr->matchTable;
-	const BYTE *inr;
-	U32 offset, rep[ZSTD_REP_NUM];
-
-	/* init */
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-	ip += (ip == prefixStart);
-	{
-		U32 i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			rep[i] = ctx->rep[i];
-	}
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		U32 cur, match_num, last_pos, litlen, price;
-		U32 u, mlen, best_mlen, best_off, litLength;
-		memset(opt, 0, sizeof(ZSTD_optimal_t));
-		last_pos = 0;
-		litlen = (U32)(ip - anchor);
-
-		/* check repCode */
-		{
-			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-			for (i = (ip == anchor); i < last_i; i++) {
-				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-				if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) &&
-				    (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
-					mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch;
-					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-						best_mlen = mlen;
-						best_off = i;
-						cur = 0;
-						last_pos = 1;
-						goto _storeSequence;
-					}
-					best_off = i - (ip == anchor);
-					do {
-						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-						if (mlen > last_pos || price < opt[mlen].price)
-							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-						mlen--;
-					} while (mlen >= minMatch);
-				}
-			}
-		}
-
-		match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
-
-		if (!last_pos && !match_num) {
-			ip++;
-			continue;
-		}
-
-		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-			best_mlen = matches[match_num - 1].len;
-			best_off = matches[match_num - 1].off;
-			cur = 0;
-			last_pos = 1;
-			goto _storeSequence;
-		}
-
-		/* set prices using matches at position = 0 */
-		best_mlen = (last_pos) ? last_pos : minMatch;
-		for (u = 0; u < match_num; u++) {
-			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-			best_mlen = matches[u].len;
-			while (mlen <= best_mlen) {
-				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-				if (mlen > last_pos || price < opt[mlen].price)
-					SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
-				mlen++;
-			}
-		}
-
-		if (last_pos < minMatch) {
-			ip++;
-			continue;
-		}
-
-		/* initialize opt[0] */
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				opt[0].rep[i] = rep[i];
-		}
-		opt[0].mlen = 1;
-		opt[0].litlen = litlen;
-
-		/* check further positions */
-		for (cur = 1; cur <= last_pos; cur++) {
-			inr = ip + cur;
-
-			if (opt[cur - 1].mlen == 1) {
-				litlen = opt[cur - 1].litlen + 1;
-				if (cur > litlen) {
-					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-				} else
-					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-			} else {
-				litlen = 1;
-				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-			}
-
-			if (cur > last_pos || price <= opt[cur].price)
-				SET_PRICE(cur, 1, 0, litlen, price);
-
-			if (cur == last_pos)
-				break;
-
-			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-				continue;
-
-			mlen = opt[cur].mlen;
-			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-				opt[cur].rep[2] = opt[cur - mlen].rep[1];
-				opt[cur].rep[1] = opt[cur - mlen].rep[0];
-				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-			} else {
-				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-				opt[cur].rep[0] =
-				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-			}
-
-			best_mlen = minMatch;
-			{
-				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-				for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */
-					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-					if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) &&
-					    (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
-						mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch;
-
-						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-							best_mlen = mlen;
-							best_off = i;
-							last_pos = cur + 1;
-							goto _storeSequence;
-						}
-
-						best_off = i - (opt[cur].mlen != 1);
-						if (mlen > best_mlen)
-							best_mlen = mlen;
-
-						do {
-							if (opt[cur].mlen == 1) {
-								litlen = opt[cur].litlen;
-								if (cur > litlen) {
-									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-															best_off, mlen - MINMATCH, ultra);
-								} else
-									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-							} else {
-								litlen = 0;
-								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-							}
-
-							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-								SET_PRICE(cur + mlen, mlen, i, litlen, price);
-							mlen--;
-						} while (mlen >= minMatch);
-					}
-				}
-			}
-
-			match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
-
-			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-				best_mlen = matches[match_num - 1].len;
-				best_off = matches[match_num - 1].off;
-				last_pos = cur + 1;
-				goto _storeSequence;
-			}
-
-			/* set prices using matches at position = cur */
-			for (u = 0; u < match_num; u++) {
-				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-				best_mlen = matches[u].len;
-
-				while (mlen <= best_mlen) {
-					if (opt[cur].mlen == 1) {
-						litlen = opt[cur].litlen;
-						if (cur > litlen)
-							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-													matches[u].off - 1, mlen - MINMATCH, ultra);
-						else
-							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-					} else {
-						litlen = 0;
-						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-					}
-
-					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-					mlen++;
-				}
-			}
-		}
-
-		best_mlen = opt[last_pos].mlen;
-		best_off = opt[last_pos].off;
-		cur = last_pos - best_mlen;
-
-	/* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-		opt[0].mlen = 1;
-
-		while (1) {
-			mlen = opt[cur].mlen;
-			offset = opt[cur].off;
-			opt[cur].mlen = best_mlen;
-			opt[cur].off = best_off;
-			best_mlen = mlen;
-			best_off = offset;
-			if (mlen > cur)
-				break;
-			cur -= mlen;
-		}
-
-		for (u = 0; u <= last_pos;) {
-			u += opt[u].mlen;
-		}
-
-		for (cur = 0; cur < last_pos;) {
-			mlen = opt[cur].mlen;
-			if (mlen == 1) {
-				ip++;
-				cur++;
-				continue;
-			}
-			offset = opt[cur].off;
-			cur += mlen;
-			litLength = (U32)(ip - anchor);
-
-			if (offset > ZSTD_REP_MOVE_OPT) {
-				rep[2] = rep[1];
-				rep[1] = rep[0];
-				rep[0] = offset - ZSTD_REP_MOVE_OPT;
-				offset--;
-			} else {
-				if (offset != 0) {
-					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-					if (offset != 1)
-						rep[2] = rep[1];
-					rep[1] = rep[0];
-					rep[0] = best_off;
-				}
-				if (litLength == 0)
-					offset--;
-			}
-
-			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			anchor = ip = ip + mlen;
-		}
-	} /* for (cur=0; cur < last_pos; ) */
-
-	/* Save reps for next block */
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			ctx->repToConfirm[i] = rep[i];
-	}
-
-	/* Last Literals */
-	{
-		size_t const lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-	seqStore_t *seqStorePtr = &(ctx->seqStore);
-	const BYTE *const istart = (const BYTE *)src;
-	const BYTE *ip = istart;
-	const BYTE *anchor = istart;
-	const BYTE *const iend = istart + srcSize;
-	const BYTE *const ilimit = iend - 8;
-	const BYTE *const base = ctx->base;
-	const U32 lowestIndex = ctx->lowLimit;
-	const U32 dictLimit = ctx->dictLimit;
-	const BYTE *const prefixStart = base + dictLimit;
-	const BYTE *const dictBase = ctx->dictBase;
-	const BYTE *const dictEnd = dictBase + dictLimit;
-
-	const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-	const U32 sufficient_len = ctx->params.cParams.targetLength;
-	const U32 mls = ctx->params.cParams.searchLength;
-	const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-	ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-	ZSTD_match_t *matches = seqStorePtr->matchTable;
-	const BYTE *inr;
-
-	/* init */
-	U32 offset, rep[ZSTD_REP_NUM];
-	{
-		U32 i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			rep[i] = ctx->rep[i];
-	}
-
-	ctx->nextToUpdate3 = ctx->nextToUpdate;
-	ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-	ip += (ip == prefixStart);
-
-	/* Match Loop */
-	while (ip < ilimit) {
-		U32 cur, match_num, last_pos, litlen, price;
-		U32 u, mlen, best_mlen, best_off, litLength;
-		U32 curr = (U32)(ip - base);
-		memset(opt, 0, sizeof(ZSTD_optimal_t));
-		last_pos = 0;
-		opt[0].litlen = (U32)(ip - anchor);
-
-		/* check repCode */
-		{
-			U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-			for (i = (ip == anchor); i < last_i; i++) {
-				const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-				const U32 repIndex = (U32)(curr - repCur);
-				const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-				const BYTE *const repMatch = repBase + repIndex;
-				if ((repCur > 0 && repCur <= (S32)curr) &&
-				    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-				    && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-					/* repcode detected we should take it */
-					const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-					mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-					if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-						best_mlen = mlen;
-						best_off = i;
-						cur = 0;
-						last_pos = 1;
-						goto _storeSequence;
-					}
-
-					best_off = i - (ip == anchor);
-					litlen = opt[0].litlen;
-					do {
-						price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-						if (mlen > last_pos || price < opt[mlen].price)
-							SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-						mlen--;
-					} while (mlen >= minMatch);
-				}
-			}
-		}
-
-		match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
-
-		if (!last_pos && !match_num) {
-			ip++;
-			continue;
-		}
-
-		{
-			U32 i;
-			for (i = 0; i < ZSTD_REP_NUM; i++)
-				opt[0].rep[i] = rep[i];
-		}
-		opt[0].mlen = 1;
-
-		if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-			best_mlen = matches[match_num - 1].len;
-			best_off = matches[match_num - 1].off;
-			cur = 0;
-			last_pos = 1;
-			goto _storeSequence;
-		}
-
-		best_mlen = (last_pos) ? last_pos : minMatch;
-
-		/* set prices using matches at position = 0 */
-		for (u = 0; u < match_num; u++) {
-			mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-			best_mlen = matches[u].len;
-			litlen = opt[0].litlen;
-			while (mlen <= best_mlen) {
-				price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-				if (mlen > last_pos || price < opt[mlen].price)
-					SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
-				mlen++;
-			}
-		}
-
-		if (last_pos < minMatch) {
-			ip++;
-			continue;
-		}
-
-		/* check further positions */
-		for (cur = 1; cur <= last_pos; cur++) {
-			inr = ip + cur;
-
-			if (opt[cur - 1].mlen == 1) {
-				litlen = opt[cur - 1].litlen + 1;
-				if (cur > litlen) {
-					price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-				} else
-					price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-			} else {
-				litlen = 1;
-				price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-			}
-
-			if (cur > last_pos || price <= opt[cur].price)
-				SET_PRICE(cur, 1, 0, litlen, price);
-
-			if (cur == last_pos)
-				break;
-
-			if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-				continue;
-
-			mlen = opt[cur].mlen;
-			if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-				opt[cur].rep[2] = opt[cur - mlen].rep[1];
-				opt[cur].rep[1] = opt[cur - mlen].rep[0];
-				opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-			} else {
-				opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-				opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-				opt[cur].rep[0] =
-				    ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-			}
-
-			best_mlen = minMatch;
-			{
-				U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-				for (i = (mlen != 1); i < last_i; i++) {
-					const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-					const U32 repIndex = (U32)(curr + cur - repCur);
-					const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-					const BYTE *const repMatch = repBase + repIndex;
-					if ((repCur > 0 && repCur <= (S32)(curr + cur)) &&
-					    (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-					    && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-						/* repcode detected */
-						const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-						mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-						if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-							best_mlen = mlen;
-							best_off = i;
-							last_pos = cur + 1;
-							goto _storeSequence;
-						}
-
-						best_off = i - (opt[cur].mlen != 1);
-						if (mlen > best_mlen)
-							best_mlen = mlen;
-
-						do {
-							if (opt[cur].mlen == 1) {
-								litlen = opt[cur].litlen;
-								if (cur > litlen) {
-									price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-															best_off, mlen - MINMATCH, ultra);
-								} else
-									price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-							} else {
-								litlen = 0;
-								price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-							}
-
-							if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-								SET_PRICE(cur + mlen, mlen, i, litlen, price);
-							mlen--;
-						} while (mlen >= minMatch);
-					}
-				}
-			}
-
-			match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
-
-			if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-				best_mlen = matches[match_num - 1].len;
-				best_off = matches[match_num - 1].off;
-				last_pos = cur + 1;
-				goto _storeSequence;
-			}
-
-			/* set prices using matches at position = cur */
-			for (u = 0; u < match_num; u++) {
-				mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-				best_mlen = matches[u].len;
-
-				while (mlen <= best_mlen) {
-					if (opt[cur].mlen == 1) {
-						litlen = opt[cur].litlen;
-						if (cur > litlen)
-							price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-													matches[u].off - 1, mlen - MINMATCH, ultra);
-						else
-							price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-					} else {
-						litlen = 0;
-						price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-					}
-
-					if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-						SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-					mlen++;
-				}
-			}
-		} /* for (cur = 1; cur <= last_pos; cur++) */
-
-		best_mlen = opt[last_pos].mlen;
-		best_off = opt[last_pos].off;
-		cur = last_pos - best_mlen;
-
-	/* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-		opt[0].mlen = 1;
-
-		while (1) {
-			mlen = opt[cur].mlen;
-			offset = opt[cur].off;
-			opt[cur].mlen = best_mlen;
-			opt[cur].off = best_off;
-			best_mlen = mlen;
-			best_off = offset;
-			if (mlen > cur)
-				break;
-			cur -= mlen;
-		}
-
-		for (u = 0; u <= last_pos;) {
-			u += opt[u].mlen;
-		}
-
-		for (cur = 0; cur < last_pos;) {
-			mlen = opt[cur].mlen;
-			if (mlen == 1) {
-				ip++;
-				cur++;
-				continue;
-			}
-			offset = opt[cur].off;
-			cur += mlen;
-			litLength = (U32)(ip - anchor);
-
-			if (offset > ZSTD_REP_MOVE_OPT) {
-				rep[2] = rep[1];
-				rep[1] = rep[0];
-				rep[0] = offset - ZSTD_REP_MOVE_OPT;
-				offset--;
-			} else {
-				if (offset != 0) {
-					best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-					if (offset != 1)
-						rep[2] = rep[1];
-					rep[1] = rep[0];
-					rep[0] = best_off;
-				}
-
-				if (litLength == 0)
-					offset--;
-			}
-
-			ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-			anchor = ip = ip + mlen;
-		}
-	} /* for (cur=0; cur < last_pos; ) */
-
-	/* Save reps for next block */
-	{
-		int i;
-		for (i = 0; i < ZSTD_REP_NUM; i++)
-			ctx->repToConfirm[i] = rep[i];
-	}
-
-	/* Last Literals */
-	{
-		size_t lastLLSize = iend - anchor;
-		memcpy(seqStorePtr->lit, anchor, lastLLSize);
-		seqStorePtr->lit += lastLLSize;
-	}
-}
-
-#endif /* ZSTD_OPT_H_91842398743 */
-- 
cgit v1.2.3


From fa443bc3c1e4b28d9315dea882e8358ba6e26f8b Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Fri, 29 Oct 2021 17:28:56 +0200
Subject: HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows to selectively autoload drivers for ISH devices.
Currently all ISH drivers are loaded for all systems having any ISH
device.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Acked-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mod_devicetable.h   | 13 +++++++++++++
 scripts/mod/devicetable-offsets.c |  3 +++
 scripts/mod/file2alias.c          | 24 ++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index ae2e75d15b21..befbf53c4b7c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -895,4 +895,17 @@ struct dfl_device_id {
 	kernel_ulong_t driver_data;
 };
 
+/* ISHTP (Integrated Sensor Hub Transport Protocol) */
+
+#define ISHTP_MODULE_PREFIX	"ishtp:"
+
+/**
+ * struct ishtp_device_id - ISHTP device identifier
+ * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
+ * @context: pointer to driver specific data
+ */
+struct ishtp_device_id {
+	guid_t guid;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index cc3625617a0e..c0d3bcb99138 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -259,5 +259,8 @@ int main(void)
 	DEVID_FIELD(dfl_device_id, type);
 	DEVID_FIELD(dfl_device_id, feature_id);
 
+	DEVID(ishtp_device_id);
+	DEVID_FIELD(ishtp_device_id, guid);
+
 	return 0;
 }
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 49aba862073e..5258247d78ac 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -115,6 +115,17 @@ static inline void add_uuid(char *str, uuid_le uuid)
 		uuid.b[12], uuid.b[13], uuid.b[14], uuid.b[15]);
 }
 
+static inline void add_guid(char *str, guid_t guid)
+{
+	int len = strlen(str);
+
+	sprintf(str + len, "%02X%02X%02X%02X-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
+		guid.b[3], guid.b[2], guid.b[1], guid.b[0],
+		guid.b[5], guid.b[4], guid.b[7], guid.b[6],
+		guid.b[8], guid.b[9], guid.b[10], guid.b[11],
+		guid.b[12], guid.b[13], guid.b[14], guid.b[15]);
+}
+
 /**
  * Check that sizeof(device_id type) are consistent with size of section
  * in .o file. If in-consistent then userspace and kernel does not agree
@@ -1380,6 +1391,18 @@ static int do_mhi_entry(const char *filename, void *symval, char *alias)
 	return 1;
 }
 
+/* Looks like: ishtp:{guid} */
+static int do_ishtp_entry(const char *filename, void *symval, char *alias)
+{
+	DEF_FIELD(symval, ishtp_device_id, guid);
+
+	strcpy(alias, ISHTP_MODULE_PREFIX "{");
+	add_guid(alias, guid);
+	strcat(alias, "}");
+
+	return 1;
+}
+
 static int do_auxiliary_entry(const char *filename, void *symval, char *alias)
 {
 	DEF_FIELD_ADDR(symval, auxiliary_device_id, name);
@@ -1499,6 +1522,7 @@ static const struct devtable devtable[] = {
 	{"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry},
 	{"ssam", SIZE_ssam_device_id, do_ssam_entry},
 	{"dfl", SIZE_dfl_device_id, do_dfl_entry},
+	{"ishtp", SIZE_ishtp_device_id, do_ishtp_entry},
 };
 
 /* Create MODULE_ALIAS() statements.
-- 
cgit v1.2.3


From 9ef4d0209cbadb63656a7aa29fde49c27ab2b9bf Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Nov 2021 15:11:41 +0800
Subject: blk-mq: add one API for waiting until quiesce is done

Some drivers(NVMe, SCSI) need to call quiesce and unquiesce in pair, but it
is hard to switch to this style, so these drivers need one atomic flag for
helping to balance quiesce and unquiesce.

When quiesce is in-progress, the driver still needs to wait until
the quiesce is done, so add API of blk_mq_wait_quiesce_done() for
these drivers.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20211109071144.181581-2-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 28 ++++++++++++++++++++--------
 include/linux/blk-mq.h |  1 +
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5a9cd9fe8da3..d3e5fcbc943b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -251,22 +251,18 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
 
 /**
- * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
  * @q: request queue.
  *
- * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Once this function is returned, we make
- * sure no dispatch can happen until the queue is unquiesced via
- * blk_mq_unquiesce_queue().
+ * Note: it is driver's responsibility for making sure that quiesce has
+ * been started.
  */
-void blk_mq_quiesce_queue(struct request_queue *q)
+void blk_mq_wait_quiesce_done(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
 	unsigned int i;
 	bool rcu = false;
 
-	blk_mq_quiesce_queue_nowait(q);
-
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (hctx->flags & BLK_MQ_F_BLOCKING)
 			synchronize_srcu(hctx->srcu);
@@ -276,6 +272,22 @@ void blk_mq_quiesce_queue(struct request_queue *q)
 	if (rcu)
 		synchronize_rcu();
 }
+EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);
+
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+	blk_mq_quiesce_queue_nowait(q);
+	blk_mq_wait_quiesce_done(q);
+}
 EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
 
 /*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b4039fdf1b04..d53ee59ba131 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -803,6 +803,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_wait_quiesce_done(struct request_queue *q);
 void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
-- 
cgit v1.2.3


From 51b8c1fe250d1bd70c1722dc3c414f5cff2d7cca Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 8 Nov 2021 18:31:24 -0800
Subject: vfs: keep inodes with page cache off the inode shrinker LRU

Historically (pre-2.5), the inode shrinker used to reclaim only empty
inodes and skip over those that still contained page cache.  This caused
problems on highmem hosts: struct inode could put fill lowmem zones
before the cache was getting reclaimed in the highmem zones.

To address this, the inode shrinker started to strip page cache to
facilitate reclaiming lowmem.  However, this comes with its own set of
problems: the shrinkers may drop actively used page cache just because
the inodes are not currently open or dirty - think working with a large
git tree.  It further doesn't respect cgroup memory protection settings
and can cause priority inversions between containers.

Nowadays, the page cache also holds non-resident info for evicted cache
pages in order to detect refaults.  We've come to rely heavily on this
data inside reclaim for protecting the cache workingset and driving swap
behavior.  We also use it to quantify and report workload health through
psi.  The latter in turn is used for fleet health monitoring, as well as
driving automated memory sizing of workloads and containers, proactive
reclaim and memory offloading schemes.

The consequences of dropping page cache prematurely is that we're seeing
subtle and not-so-subtle failures in all of the above-mentioned
scenarios, with the workload generally entering unexpected thrashing
states while losing the ability to reliably detect it.

To fix this on non-highmem systems at least, going back to rotating
inodes on the LRU isn't feasible.  We've tried (commit a76cf1a474d7
("mm: don't reclaim inodes with many attached pages")) and failed
(commit 69056ee6a8a3 ("Revert "mm: don't reclaim inodes with many
attached pages"")).

The issue is mostly that shrinker pools attract pressure based on their
size, and when objects get skipped the shrinkers remember this as
deferred reclaim work.  This accumulates excessive pressure on the
remaining inodes, and we can quickly eat into heavily used ones, or
dirty ones that require IO to reclaim, when there potentially is plenty
of cold, clean cache around still.

Instead, this patch keeps populated inodes off the inode LRU in the
first place - just like an open file or dirty state would.  An otherwise
clean and unused inode then gets queued when the last cache entry
disappears.  This solves the problem without reintroducing the reclaim
issues, and generally is a bit more scalable than having to wade through
potentially hundreds of thousands of busy inodes.

Locking is a bit tricky because the locks protecting the inode state
(i_lock) and the inode LRU (lru_list.lock) don't nest inside the
irq-safe page cache lock (i_pages.xa_lock).  Page cache deletions are
serialized through i_lock, taken before the i_pages lock, to make sure
depopulated inodes are queued reliably.  Additions may race with
deletions, but we'll check again in the shrinker.  If additions race
with the shrinker itself, we're protected by the i_lock: if find_inode()
or iput() win, the shrinker will bail on the elevated i_count or
I_REFERENCED; if the shrinker wins and goes ahead with the inode, it
will set I_FREEING and inhibit further igets(), which will cause the
other side to create a new instance of the inode instead.

Link: https://lkml.kernel.org/r/20210614211904.14420-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Roman Gushchin <guro@fb.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/inode.c              | 46 ++++++++++++++++++++++++++-------------------
 fs/internal.h           |  1 -
 include/linux/fs.h      |  1 +
 include/linux/pagemap.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
 mm/filemap.c            |  8 ++++++++
 mm/truncate.c           | 19 +++++++++++++++++--
 mm/vmscan.c             |  7 +++++++
 mm/workingset.c         | 10 ++++++++++
 8 files changed, 120 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index ed0cab8a32db..a49695f57e1e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -428,11 +428,20 @@ void ihold(struct inode *inode)
 }
 EXPORT_SYMBOL(ihold);
 
-static void inode_lru_list_add(struct inode *inode)
+static void __inode_add_lru(struct inode *inode, bool rotate)
 {
+	if (inode->i_state & (I_DIRTY_ALL | I_SYNC | I_FREEING | I_WILL_FREE))
+		return;
+	if (atomic_read(&inode->i_count))
+		return;
+	if (!(inode->i_sb->s_flags & SB_ACTIVE))
+		return;
+	if (!mapping_shrinkable(&inode->i_data))
+		return;
+
 	if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
 		this_cpu_inc(nr_unused);
-	else
+	else if (rotate)
 		inode->i_state |= I_REFERENCED;
 }
 
@@ -443,16 +452,11 @@ static void inode_lru_list_add(struct inode *inode)
  */
 void inode_add_lru(struct inode *inode)
 {
-	if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
-				I_FREEING | I_WILL_FREE)) &&
-	    !atomic_read(&inode->i_count) && inode->i_sb->s_flags & SB_ACTIVE)
-		inode_lru_list_add(inode);
+	__inode_add_lru(inode, false);
 }
 
-
 static void inode_lru_list_del(struct inode *inode)
 {
-
 	if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
 		this_cpu_dec(nr_unused);
 }
@@ -728,10 +732,6 @@ again:
 /*
  * Isolate the inode from the LRU in preparation for freeing it.
  *
- * Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed.  If the inode has metadata buffers attached to
- * mapping->private_list then try to remove them.
- *
  * If the inode has the I_REFERENCED flag set, then it means that it has been
  * used recently - the flag is set in iput_final(). When we encounter such an
  * inode, clear the flag and move it to the back of the LRU so it gets another
@@ -747,31 +747,39 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
 	struct inode	*inode = container_of(item, struct inode, i_lru);
 
 	/*
-	 * we are inverting the lru lock/inode->i_lock here, so use a trylock.
-	 * If we fail to get the lock, just skip it.
+	 * We are inverting the lru lock/inode->i_lock here, so use a
+	 * trylock. If we fail to get the lock, just skip it.
 	 */
 	if (!spin_trylock(&inode->i_lock))
 		return LRU_SKIP;
 
 	/*
-	 * Referenced or dirty inodes are still in use. Give them another pass
-	 * through the LRU as we canot reclaim them now.
+	 * Inodes can get referenced, redirtied, or repopulated while
+	 * they're already on the LRU, and this can make them
+	 * unreclaimable for a while. Remove them lazily here; iput,
+	 * sync, or the last page cache deletion will requeue them.
 	 */
 	if (atomic_read(&inode->i_count) ||
-	    (inode->i_state & ~I_REFERENCED)) {
+	    (inode->i_state & ~I_REFERENCED) ||
+	    !mapping_shrinkable(&inode->i_data)) {
 		list_lru_isolate(lru, &inode->i_lru);
 		spin_unlock(&inode->i_lock);
 		this_cpu_dec(nr_unused);
 		return LRU_REMOVED;
 	}
 
-	/* recently referenced inodes get one more pass */
+	/* Recently referenced inodes get one more pass */
 	if (inode->i_state & I_REFERENCED) {
 		inode->i_state &= ~I_REFERENCED;
 		spin_unlock(&inode->i_lock);
 		return LRU_ROTATE;
 	}
 
+	/*
+	 * On highmem systems, mapping_shrinkable() permits dropping
+	 * page cache in order to free up struct inodes: lowmem might
+	 * be under pressure before the cache inside the highmem zone.
+	 */
 	if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
 		__iget(inode);
 		spin_unlock(&inode->i_lock);
@@ -1638,7 +1646,7 @@ static void iput_final(struct inode *inode)
 	if (!drop &&
 	    !(inode->i_state & I_DONTCACHE) &&
 	    (sb->s_flags & SB_ACTIVE)) {
-		inode_add_lru(inode);
+		__inode_add_lru(inode, true);
 		spin_unlock(&inode->i_lock);
 		return;
 	}
diff --git a/fs/internal.h b/fs/internal.h
index 3cd065c8a66b..2854ff29f116 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -149,7 +149,6 @@ extern int vfs_open(const struct path *, struct file *);
  * inode.c
  */
 extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
-extern void inode_add_lru(struct inode *inode);
 extern int dentry_needs_remove_privs(struct dentry *dentry);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 226de651f52e..de35a2640e70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3193,6 +3193,7 @@ static inline void remove_inode_hash(struct inode *inode)
 }
 
 extern void inode_sb_list_add(struct inode *inode);
+extern void inode_add_lru(struct inode *inode);
 
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 62db6b0176b9..5c74a45ff97a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -23,6 +23,56 @@ static inline bool mapping_empty(struct address_space *mapping)
 	return xa_empty(&mapping->i_pages);
 }
 
+/*
+ * mapping_shrinkable - test if page cache state allows inode reclaim
+ * @mapping: the page cache mapping
+ *
+ * This checks the mapping's cache state for the pupose of inode
+ * reclaim and LRU management.
+ *
+ * The caller is expected to hold the i_lock, but is not required to
+ * hold the i_pages lock, which usually protects cache state. That's
+ * because the i_lock and the list_lru lock that protect the inode and
+ * its LRU state don't nest inside the irq-safe i_pages lock.
+ *
+ * Cache deletions are performed under the i_lock, which ensures that
+ * when an inode goes empty, it will reliably get queued on the LRU.
+ *
+ * Cache additions do not acquire the i_lock and may race with this
+ * check, in which case we'll report the inode as shrinkable when it
+ * has cache pages. This is okay: the shrinker also checks the
+ * refcount and the referenced bit, which will be elevated or set in
+ * the process of adding new cache pages to an inode.
+ */
+static inline bool mapping_shrinkable(struct address_space *mapping)
+{
+	void *head;
+
+	/*
+	 * On highmem systems, there could be lowmem pressure from the
+	 * inodes before there is highmem pressure from the page
+	 * cache. Make inodes shrinkable regardless of cache state.
+	 */
+	if (IS_ENABLED(CONFIG_HIGHMEM))
+		return true;
+
+	/* Cache completely empty? Shrink away. */
+	head = rcu_access_pointer(mapping->i_pages.xa_head);
+	if (!head)
+		return true;
+
+	/*
+	 * The xarray stores single offset-0 entries directly in the
+	 * head pointer, which allows non-resident page cache entries
+	 * to escape the shadow shrinker's list of xarray nodes. The
+	 * inode shrinker needs to pick them up under memory pressure.
+	 */
+	if (!xa_is_node(head) && xa_is_value(head))
+		return true;
+
+	return false;
+}
+
 /*
  * Bits in mapping->flags.
  */
diff --git a/mm/filemap.c b/mm/filemap.c
index b6140debc2da..06de9a17499f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -262,9 +262,13 @@ void delete_from_page_cache(struct page *page)
 	struct address_space *mapping = page_mapping(page);
 
 	BUG_ON(!PageLocked(page));
+	spin_lock(&mapping->host->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 	__delete_from_page_cache(page, NULL);
 	xa_unlock_irq(&mapping->i_pages);
+	if (mapping_shrinkable(mapping))
+		inode_add_lru(mapping->host);
+	spin_unlock(&mapping->host->i_lock);
 
 	page_cache_free_page(mapping, page);
 }
@@ -340,6 +344,7 @@ void delete_from_page_cache_batch(struct address_space *mapping,
 	if (!pagevec_count(pvec))
 		return;
 
+	spin_lock(&mapping->host->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 	for (i = 0; i < pagevec_count(pvec); i++) {
 		trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
@@ -348,6 +353,9 @@ void delete_from_page_cache_batch(struct address_space *mapping,
 	}
 	page_cache_delete_batch(mapping, pvec);
 	xa_unlock_irq(&mapping->i_pages);
+	if (mapping_shrinkable(mapping))
+		inode_add_lru(mapping->host);
+	spin_unlock(&mapping->host->i_lock);
 
 	for (i = 0; i < pagevec_count(pvec); i++)
 		page_cache_free_page(mapping, pvec->pages[i]);
diff --git a/mm/truncate.c b/mm/truncate.c
index 714eaf19821d..cc83a3f7c1ad 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -45,9 +45,13 @@ static inline void __clear_shadow_entry(struct address_space *mapping,
 static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
 			       void *entry)
 {
+	spin_lock(&mapping->host->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 	__clear_shadow_entry(mapping, index, entry);
 	xa_unlock_irq(&mapping->i_pages);
+	if (mapping_shrinkable(mapping))
+		inode_add_lru(mapping->host);
+	spin_unlock(&mapping->host->i_lock);
 }
 
 /*
@@ -73,8 +77,10 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		return;
 
 	dax = dax_mapping(mapping);
-	if (!dax)
+	if (!dax) {
+		spin_lock(&mapping->host->i_lock);
 		xa_lock_irq(&mapping->i_pages);
+	}
 
 	for (i = j; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
@@ -93,8 +99,12 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		__clear_shadow_entry(mapping, index, page);
 	}
 
-	if (!dax)
+	if (!dax) {
 		xa_unlock_irq(&mapping->i_pages);
+		if (mapping_shrinkable(mapping))
+			inode_add_lru(mapping->host);
+		spin_unlock(&mapping->host->i_lock);
+	}
 	pvec->nr = j;
 }
 
@@ -567,6 +577,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
 		return 0;
 
+	spin_lock(&mapping->host->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 	if (PageDirty(page))
 		goto failed;
@@ -574,6 +585,9 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	BUG_ON(page_has_private(page));
 	__delete_from_page_cache(page, NULL);
 	xa_unlock_irq(&mapping->i_pages);
+	if (mapping_shrinkable(mapping))
+		inode_add_lru(mapping->host);
+	spin_unlock(&mapping->host->i_lock);
 
 	if (mapping->a_ops->freepage)
 		mapping->a_ops->freepage(page);
@@ -582,6 +596,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
 	return 1;
 failed:
 	xa_unlock_irq(&mapping->i_pages);
+	spin_unlock(&mapping->host->i_lock);
 	return 0;
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 41f5f6007c30..26f07518d7a4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1190,6 +1190,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(mapping != page_mapping(page));
 
+	if (!PageSwapCache(page))
+		spin_lock(&mapping->host->i_lock);
 	xa_lock_irq(&mapping->i_pages);
 	/*
 	 * The non racy check for a busy page.
@@ -1258,6 +1260,9 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 			shadow = workingset_eviction(page, target_memcg);
 		__delete_from_page_cache(page, shadow);
 		xa_unlock_irq(&mapping->i_pages);
+		if (mapping_shrinkable(mapping))
+			inode_add_lru(mapping->host);
+		spin_unlock(&mapping->host->i_lock);
 
 		if (freepage != NULL)
 			freepage(page);
@@ -1267,6 +1272,8 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 
 cannot_free:
 	xa_unlock_irq(&mapping->i_pages);
+	if (!PageSwapCache(page))
+		spin_unlock(&mapping->host->i_lock);
 	return 0;
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index d5b81e4f4cbe..23df60ce2e10 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -543,6 +543,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out;
 	}
 
+	if (!spin_trylock(&mapping->host->i_lock)) {
+		xa_unlock(&mapping->i_pages);
+		spin_unlock_irq(lru_lock);
+		ret = LRU_RETRY;
+		goto out;
+	}
+
 	list_lru_isolate(lru, item);
 	__dec_lruvec_kmem_state(node, WORKINGSET_NODES);
 
@@ -562,6 +569,9 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 
 out_invalid:
 	xa_unlock_irq(&mapping->i_pages);
+	if (mapping_shrinkable(mapping))
+		inode_add_lru(mapping->host);
+	spin_unlock(&mapping->host->i_lock);
 	ret = LRU_REMOVED_RETRY;
 out:
 	cond_resched();
-- 
cgit v1.2.3


From 83c1fd763b3220458652f7d656d5047292ebcde4 Mon Sep 17 00:00:00 2001
From: zhangyiru <zhangyiru3@huawei.com>
Date: Mon, 8 Nov 2021 18:31:27 -0800
Subject: mm,hugetlb: remove mlock ulimit for SHM_HUGETLB

Commit 21a3c273f88c ("mm, hugetlb: add thread name and pid to
SHM_HUGETLB mlock rlimit warning") marked this as deprecated in 2012,
but it is not deleted yet.

Mike says he still sees that message in log files on occasion, so maybe we
should preserve this warning.

Also remove hugetlbfs related user_shm_unlock in ipc/shm.c and remove the
user_shm_unlock after out.

Link: https://lkml.kernel.org/r/20211103105857.25041-1-zhangyiru3@huawei.com
Signed-off-by: zhangyiru <zhangyiru3@huawei.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Liu Zixian <liuzixian4@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: wuxu.wu <wuxu.wu@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 23 ++++++++---------------
 include/linux/hugetlb.h |  6 ++----
 ipc/shm.c               |  8 +-------
 mm/memfd.c              |  4 +---
 mm/mmap.c               |  3 +--
 5 files changed, 13 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cdfb1ae78a3f..49d2e686be74 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1446,8 +1446,8 @@ static int get_hstate_idx(int page_size_log)
  * otherwise hugetlb_reserve_pages reserves one less hugepages than intended.
  */
 struct file *hugetlb_file_setup(const char *name, size_t size,
-				vm_flags_t acctflag, struct ucounts **ucounts,
-				int creat_flags, int page_size_log)
+				vm_flags_t acctflag, int creat_flags,
+				int page_size_log)
 {
 	struct inode *inode;
 	struct vfsmount *mnt;
@@ -1458,22 +1458,19 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 	if (hstate_idx < 0)
 		return ERR_PTR(-ENODEV);
 
-	*ucounts = NULL;
 	mnt = hugetlbfs_vfsmount[hstate_idx];
 	if (!mnt)
 		return ERR_PTR(-ENOENT);
 
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
-		*ucounts = current_ucounts();
-		if (user_shm_lock(size, *ucounts)) {
-			task_lock(current);
-			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n",
+		struct ucounts *ucounts = current_ucounts();
+
+		if (user_shm_lock(size, ucounts)) {
+			pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is obsolete\n",
 				current->comm, current->pid);
-			task_unlock(current);
-		} else {
-			*ucounts = NULL;
-			return ERR_PTR(-EPERM);
+			user_shm_unlock(size, ucounts);
 		}
+		return ERR_PTR(-EPERM);
 	}
 
 	file = ERR_PTR(-ENOSPC);
@@ -1498,10 +1495,6 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
 
 	iput(inode);
 out:
-	if (*ucounts) {
-		user_shm_unlock(size, *ucounts);
-		*ucounts = NULL;
-	}
 	return file;
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 44c2ab0dfa59..00351ccb49a3 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -477,8 +477,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
 extern const struct file_operations hugetlbfs_file_operations;
 extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct,
-				struct ucounts **ucounts, int creat_flags,
-				int page_size_log);
+				int creat_flags, int page_size_log);
 
 static inline bool is_file_hugepages(struct file *file)
 {
@@ -497,8 +496,7 @@ static inline struct hstate *hstate_inode(struct inode *i)
 #define is_file_hugepages(file)			false
 static inline struct file *
 hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag,
-		struct ucounts **ucounts, int creat_flags,
-		int page_size_log)
+		int creat_flags, int page_size_log)
 {
 	return ERR_PTR(-ENOSYS);
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index ab749be6d8b7..4942bdd65748 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -287,9 +287,6 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
 		shmem_lock(shm_file, 0, shp->mlock_ucounts);
-	else if (shp->mlock_ucounts)
-		user_shm_unlock(i_size_read(file_inode(shm_file)),
-				shp->mlock_ucounts);
 	fput(shm_file);
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
@@ -650,8 +647,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, hugesize, acctflag,
-				  &shp->mlock_ucounts, HUGETLB_SHMFS_INODE,
-				(shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
+				HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
@@ -698,8 +694,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 no_id:
 	ipc_update_pid(&shp->shm_cprid, NULL);
 	ipc_update_pid(&shp->shm_lprid, NULL);
-	if (is_file_hugepages(file) && shp->mlock_ucounts)
-		user_shm_unlock(size, shp->mlock_ucounts);
 	fput(file);
 	ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
 	return error;
diff --git a/mm/memfd.c b/mm/memfd.c
index 081dd33e6a61..9f80f162791a 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -297,9 +297,7 @@ SYSCALL_DEFINE2(memfd_create,
 	}
 
 	if (flags & MFD_HUGETLB) {
-		struct ucounts *ucounts = NULL;
-
-		file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts,
+		file = hugetlb_file_setup(name, 0, VM_NORESERVE,
 					HUGETLB_ANONHUGE_INODE,
 					(flags >> MFD_HUGE_SHIFT) &
 					MFD_HUGE_MASK);
diff --git a/mm/mmap.c b/mm/mmap.c
index b22a07f5e761..bfb0ea164a90 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1599,7 +1599,6 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 			goto out_fput;
 		}
 	} else if (flags & MAP_HUGETLB) {
-		struct ucounts *ucounts = NULL;
 		struct hstate *hs;
 
 		hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
@@ -1615,7 +1614,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
 		 */
 		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len,
 				VM_NORESERVE,
-				&ucounts, HUGETLB_ANONHUGE_INODE,
+				HUGETLB_ANONHUGE_INODE,
 				(flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK);
 		if (IS_ERR(file))
 			return PTR_ERR(file);
-- 
cgit v1.2.3


From cc5f2704c93456e6f1c671c5cf7b582a55df5484 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 8 Nov 2021 18:31:48 -0800
Subject: proc/vmcore: convert oldmem_pfn_is_ram callback to more generic
 vmcore callbacks

Let's support multiple registered callbacks, making sure that
registering vmcore callbacks cannot fail.  Make the callback return a
bool instead of an int, handling how to deal with errors internally.
Drop unused HAVE_OLDMEM_PFN_IS_RAM.

We soon want to make use of this infrastructure from other drivers:
virtio-mem, registering one callback for each virtio-mem device, to
prevent reading unplugged virtio-mem memory.

Handle it via a generic vmcore_cb structure, prepared for future
extensions: for example, once we support virtio-mem on s390x where the
vmcore is completely constructed in the second kernel, we want to detect
and add plugged virtio-mem memory ranges to the vmcore in order for them
to get dumped properly.

Handle corner cases that are unexpected and shouldn't happen in sane
setups: registering a callback after the vmcore has already been opened
(warn only) and unregistering a callback after the vmcore has already been
opened (warn and essentially read only zeroes from that point on).

Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/aperture_64.c | 13 +++++-
 arch/x86/xen/mmu_hvm.c        | 11 +++--
 fs/proc/vmcore.c              | 99 ++++++++++++++++++++++++++++++-------------
 include/linux/crash_dump.h    | 26 ++++++++++--
 4 files changed, 111 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 10562885f5fc..af3ba08b684b 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
 		      (pfn >= aperture_pfn_start + aperture_page_count));
 }
 
+#ifdef CONFIG_PROC_VMCORE
+static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
+{
+	return !!gart_mem_pfn_is_ram(pfn);
+}
+
+static struct vmcore_cb gart_vmcore_cb = {
+	.pfn_is_ram = gart_oldmem_pfn_is_ram,
+};
+#endif
+
 static void __init exclude_from_core(u64 aper_base, u32 aper_order)
 {
 	aperture_pfn_start = aper_base >> PAGE_SHIFT;
 	aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
 #ifdef CONFIG_PROC_VMCORE
-	WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
+	register_vmcore_cb(&gart_vmcore_cb);
 #endif
 #ifdef CONFIG_PROC_KCORE
 	WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
diff --git a/arch/x86/xen/mmu_hvm.c b/arch/x86/xen/mmu_hvm.c
index 6ba8826dcdcc..509bdee3ab90 100644
--- a/arch/x86/xen/mmu_hvm.c
+++ b/arch/x86/xen/mmu_hvm.c
@@ -12,10 +12,10 @@
  * The kdump kernel has to check whether a pfn of the crashed kernel
  * was a ballooned page. vmcore is using this function to decide
  * whether to access a pfn of the crashed kernel.
- * Returns 0 if the pfn is not backed by a RAM page, the caller may
+ * Returns "false" if the pfn is not backed by a RAM page, the caller may
  * handle the pfn special in this case.
  */
-static int xen_oldmem_pfn_is_ram(unsigned long pfn)
+static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
 {
 	struct xen_hvm_get_mem_type a = {
 		.domid = DOMID_SELF,
@@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)
 
 	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
 		pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
-		return -ENXIO;
+		return true;
 	}
 	return a.mem_type != HVMMEM_mmio_dm;
 }
+static struct vmcore_cb xen_vmcore_cb = {
+	.pfn_is_ram = xen_vmcore_pfn_is_ram,
+};
 #endif
 
 static void xen_hvm_exit_mmap(struct mm_struct *mm)
@@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
 	if (is_pagetable_dying_supported())
 		pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
 #ifdef CONFIG_PROC_VMCORE
-	WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
+	register_vmcore_cb(&xen_vmcore_cb);
 #endif
 }
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index a9bd80ab670e..7a04b2eca287 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
 /* Device Dump Size */
 static size_t vmcoredd_orig_sz;
 
-/*
- * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
- * The called function has to take care of module refcounting.
- */
-static int (*oldmem_pfn_is_ram)(unsigned long pfn);
-
-int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
+static DECLARE_RWSEM(vmcore_cb_rwsem);
+/* List of registered vmcore callbacks. */
+static LIST_HEAD(vmcore_cb_list);
+/* Whether we had a surprise unregistration of a callback. */
+static bool vmcore_cb_unstable;
+/* Whether the vmcore has been opened once. */
+static bool vmcore_opened;
+
+void register_vmcore_cb(struct vmcore_cb *cb)
 {
-	if (oldmem_pfn_is_ram)
-		return -EBUSY;
-	oldmem_pfn_is_ram = fn;
-	return 0;
+	down_write(&vmcore_cb_rwsem);
+	INIT_LIST_HEAD(&cb->next);
+	list_add_tail(&cb->next, &vmcore_cb_list);
+	/*
+	 * Registering a vmcore callback after the vmcore was opened is
+	 * very unusual (e.g., manual driver loading).
+	 */
+	if (vmcore_opened)
+		pr_warn_once("Unexpected vmcore callback registration\n");
+	up_write(&vmcore_cb_rwsem);
 }
-EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
+EXPORT_SYMBOL_GPL(register_vmcore_cb);
 
-void unregister_oldmem_pfn_is_ram(void)
+void unregister_vmcore_cb(struct vmcore_cb *cb)
 {
-	oldmem_pfn_is_ram = NULL;
-	wmb();
+	down_write(&vmcore_cb_rwsem);
+	list_del(&cb->next);
+	/*
+	 * Unregistering a vmcore callback after the vmcore was opened is
+	 * very unusual (e.g., forced driver removal), but we cannot stop
+	 * unregistering.
+	 */
+	if (vmcore_opened) {
+		pr_warn_once("Unexpected vmcore callback unregistration\n");
+		vmcore_cb_unstable = true;
+	}
+	up_write(&vmcore_cb_rwsem);
 }
-EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
+EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
 
 static bool pfn_is_ram(unsigned long pfn)
 {
-	int (*fn)(unsigned long pfn);
-	/* pfn is ram unless fn() checks pagetype */
+	struct vmcore_cb *cb;
 	bool ret = true;
 
-	/*
-	 * Ask hypervisor if the pfn is really ram.
-	 * A ballooned page contains no data and reading from such a page
-	 * will cause high load in the hypervisor.
-	 */
-	fn = oldmem_pfn_is_ram;
-	if (fn)
-		ret = !!fn(pfn);
+	lockdep_assert_held_read(&vmcore_cb_rwsem);
+	if (unlikely(vmcore_cb_unstable))
+		return false;
+
+	list_for_each_entry(cb, &vmcore_cb_list, next) {
+		if (unlikely(!cb->pfn_is_ram))
+			continue;
+		ret = cb->pfn_is_ram(cb, pfn);
+		if (!ret)
+			break;
+	}
 
 	return ret;
 }
 
+static int open_vmcore(struct inode *inode, struct file *file)
+{
+	down_read(&vmcore_cb_rwsem);
+	vmcore_opened = true;
+	up_read(&vmcore_cb_rwsem);
+
+	return 0;
+}
+
 /* Reads a page from the oldmem device from given offset. */
 ssize_t read_from_oldmem(char *buf, size_t count,
 			 u64 *ppos, int userbuf,
@@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
 	offset = (unsigned long)(*ppos % PAGE_SIZE);
 	pfn = (unsigned long)(*ppos / PAGE_SIZE);
 
+	down_read(&vmcore_cb_rwsem);
 	do {
 		if (count > (PAGE_SIZE - offset))
 			nr_bytes = PAGE_SIZE - offset;
@@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count,
 				tmp = copy_oldmem_page(pfn, buf, nr_bytes,
 						       offset, userbuf);
 
-			if (tmp < 0)
+			if (tmp < 0) {
+				up_read(&vmcore_cb_rwsem);
 				return tmp;
+			}
 		}
 		*ppos += nr_bytes;
 		count -= nr_bytes;
@@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
 		offset = 0;
 	} while (count);
 
+	up_read(&vmcore_cb_rwsem);
 	return read;
 }
 
@@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
 			    unsigned long from, unsigned long pfn,
 			    unsigned long size, pgprot_t prot)
 {
+	int ret;
+
 	/*
 	 * Check if oldmem_pfn_is_ram was registered to avoid
 	 * looping over all pages without a reason.
 	 */
-	if (oldmem_pfn_is_ram)
-		return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
+	down_read(&vmcore_cb_rwsem);
+	if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
+		ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
 	else
-		return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+		ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
+	up_read(&vmcore_cb_rwsem);
+	return ret;
 }
 
 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
@@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 #endif
 
 static const struct proc_ops vmcore_proc_ops = {
+	.proc_open	= open_vmcore,
 	.proc_read	= read_vmcore,
 	.proc_lseek	= default_llseek,
 	.proc_mmap	= mmap_vmcore,
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 2618577a4d6d..0c547d866f1e 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
 		elfcorehdr_addr = ELFCORE_ADDR_ERR;
 }
 
-#define HAVE_OLDMEM_PFN_IS_RAM 1
-extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
-extern void unregister_oldmem_pfn_is_ram(void);
+/**
+ * struct vmcore_cb - driver callbacks for /proc/vmcore handling
+ * @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
+ *              reading the vmcore. Will return "true" if it is RAM or if the
+ *              callback cannot tell. If any callback returns "false", it's not
+ *              RAM and the page must not be accessed; zeroes should be
+ *              indicated in the vmcore instead. For example, a ballooned page
+ *              contains no data and reading from such a page will cause high
+ *              load in the hypervisor.
+ * @next: List head to manage registered callbacks internally; initialized by
+ *        register_vmcore_cb().
+ *
+ * vmcore callbacks allow drivers managing physical memory ranges to
+ * coordinate with vmcore handling code, for example, to prevent accessing
+ * physical memory ranges that should not be accessed when reading the vmcore,
+ * although included in the vmcore header as memory ranges to dump.
+ */
+struct vmcore_cb {
+	bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
+	struct list_head next;
+};
+extern void register_vmcore_cb(struct vmcore_cb *cb);
+extern void unregister_vmcore_cb(struct vmcore_cb *cb);
 
 #else /* !CONFIG_CRASH_DUMP */
 static inline bool is_kdump_kernel(void) { return 0; }
-- 
cgit v1.2.3


From f5d80614844a725cbf66fd6524e01c218ede2141 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:08 -0800
Subject: kernel.h: drop unneeded <linux/kernel.h> inclusion from other headers

Patch series "kernel.h further split", v5.

kernel.h is a set of something which is not related to each other and
often used in non-crossed compilation units, especially when drivers
need only one or two macro definitions from it.

This patch (of 7):

There is no evidence we need kernel.h inclusion in certain headers.
Drop unneeded <linux/kernel.h> inclusion from other headers.

[sfr@canb.auug.org.au: bottom_half.h needs kernel]
  Link: https://lkml.kernel.org/r/20211015202908.1c417ae2@canb.auug.org.au

Link: https://lkml.kernel.org/r/20211013170417.87909-1-andriy.shevchenko@linux.intel.com
Link: https://lkml.kernel.org/r/20211013170417.87909-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bottom_half.h | 1 +
 include/linux/rwsem.h       | 1 -
 include/linux/smp.h         | 1 -
 include/linux/spinlock.h    | 1 -
 4 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index eed86eb0a1de..11d107d88d03 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,6 +2,7 @@
 #ifndef _LINUX_BH_H
 #define _LINUX_BH_H
 
+#include <linux/kernel.h>
 #include <linux/preempt.h>
 
 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 352c6127cb90..f9348769e558 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -11,7 +11,6 @@
 #include <linux/linkage.h>
 
 #include <linux/types.h>
-#include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/atomic.h>
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 510519e8a1eb..a80ab58ae3f1 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -108,7 +108,6 @@ static inline void on_each_cpu_cond(smp_cond_func_t cond_func,
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
-#include <linux/kernel.h>
 #include <linux/compiler.h>
 #include <linux/thread_info.h>
 #include <asm/smp.h>
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 45310ea1b1d7..22d672f81705 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -57,7 +57,6 @@
 #include <linux/compiler.h>
 #include <linux/irqflags.h>
 #include <linux/thread_info.h>
-#include <linux/kernel.h>
 #include <linux/stringify.h>
 #include <linux/bottom_half.h>
 #include <linux/lockdep.h>
-- 
cgit v1.2.3


From d2a8ebbf8192b84b11f1b204c4f7c602df32aeac Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:12 -0800
Subject: kernel.h: split out container_of() and typeof_member() macros

kernel.h is being used as a dump for all kinds of stuff for a long time.
Here is the attempt cleaning it up by splitting out container_of() and
typeof_member() macros.

For time being include new header back to kernel.h to avoid twisted
indirected includes for existing users.

Note, there are _a lot_ of headers and modules that include kernel.h
solely for one of these macros and this allows to unburden compiler for
the twisted inclusion paths and to make new code cleaner in the future.

Link: https://lkml.kernel.org/r/20211013170417.87909-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/container_of.h | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/kernel.h       | 33 +--------------------------------
 2 files changed, 41 insertions(+), 32 deletions(-)
 create mode 100644 include/linux/container_of.h

(limited to 'include/linux')

diff --git a/include/linux/container_of.h b/include/linux/container_of.h
new file mode 100644
index 000000000000..dd56019838c6
--- /dev/null
+++ b/include/linux/container_of.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CONTAINER_OF_H
+#define _LINUX_CONTAINER_OF_H
+
+#include <linux/build_bug.h>
+#include <linux/err.h>
+
+#define typeof_member(T, m)	typeof(((T*)0)->m)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({				\
+	void *__mptr = (void *)(ptr);					\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	((type *)(__mptr - offsetof(type, member))); })
+
+/**
+ * container_of_safe - cast a member of a structure out to the containing structure
+ * @ptr:	the pointer to the member.
+ * @type:	the type of the container struct this is embedded in.
+ * @member:	the name of the member within the struct.
+ *
+ * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged.
+ */
+#define container_of_safe(ptr, type, member) ({				\
+	void *__mptr = (void *)(ptr);					\
+	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
+			 !__same_type(*(ptr), void),			\
+			 "pointer type mismatch in container_of()");	\
+	IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) :			\
+		((type *)(__mptr - offsetof(type, member))); })
+
+#endif	/* _LINUX_CONTAINER_OF_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 471bc0593679..ed4465757cec 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -9,6 +9,7 @@
 #include <linux/stddef.h>
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <linux/container_of.h>
 #include <linux/bitops.h>
 #include <linux/kstrtox.h>
 #include <linux/log2.h>
@@ -52,8 +53,6 @@
 }					\
 )
 
-#define typeof_member(T, m)	typeof(((T*)0)->m)
-
 #define _RET_IP_		(unsigned long)__builtin_return_address(0)
 #define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
 
@@ -484,36 +483,6 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #define __CONCAT(a, b) a ## b
 #define CONCATENATE(a, b) __CONCAT(a, b)
 
-/**
- * container_of - cast a member of a structure out to the containing structure
- * @ptr:	the pointer to the member.
- * @type:	the type of the container struct this is embedded in.
- * @member:	the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({				\
-	void *__mptr = (void *)(ptr);					\
-	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
-			 !__same_type(*(ptr), void),			\
-			 "pointer type mismatch in container_of()");	\
-	((type *)(__mptr - offsetof(type, member))); })
-
-/**
- * container_of_safe - cast a member of a structure out to the containing structure
- * @ptr:	the pointer to the member.
- * @type:	the type of the container struct this is embedded in.
- * @member:	the name of the member within the struct.
- *
- * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged.
- */
-#define container_of_safe(ptr, type, member) ({				\
-	void *__mptr = (void *)(ptr);					\
-	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
-			 !__same_type(*(ptr), void),			\
-			 "pointer type mismatch in container_of()");	\
-	IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) :			\
-		((type *)(__mptr - offsetof(type, member))); })
-
 /* Rebuild everything on CONFIG_FTRACE_MCOUNT_RECORD */
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD
-- 
cgit v1.2.3


From cd7187e112c91186185c0553b1f9d033ed399d3a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:19 -0800
Subject: include/linux/list.h: replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211013170417.87909-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index f2af4b4aa4e9..6636fc07f918 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -2,11 +2,13 @@
 #ifndef _LINUX_LIST_H
 #define _LINUX_LIST_H
 
+#include <linux/container_of.h>
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/poison.h>
 #include <linux/const.h>
-#include <linux/kernel.h>
+
+#include <asm/barrier.h>
 
 /*
  * Circular doubly linked list implementation.
-- 
cgit v1.2.3


From 50b09d6145da04e19fa448670e4918ce496f1c77 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:22 -0800
Subject: include/linux/llist.h: replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211013170417.87909-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/llist.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/llist.h b/include/linux/llist.h
index 24f207b0190b..85bda2d02d65 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -49,7 +49,9 @@
  */
 
 #include <linux/atomic.h>
-#include <linux/kernel.h>
+#include <linux/container_of.h>
+#include <linux/stddef.h>
+#include <linux/types.h>
 
 struct llist_head {
 	struct llist_node *first;
-- 
cgit v1.2.3


From c540f9595956ff3c4b2cca1145c893941f921bcd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:25 -0800
Subject: include/linux/plist.h: replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211013170417.87909-7-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Boqun Feng <boqun.feng@gmail.com>
Cc: Brendan Higgins <brendanhiggins@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Waiman Long <longman@redhat.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/plist.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index 66bab1bca35c..0f352c1d3c80 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -73,8 +73,11 @@
 #ifndef _LINUX_PLIST_H_
 #define _LINUX_PLIST_H_
 
-#include <linux/kernel.h>
+#include <linux/container_of.h>
 #include <linux/list.h>
+#include <linux/types.h>
+
+#include <asm/bug.h>
 
 struct plist_head {
 	struct list_head node_list;
-- 
cgit v1.2.3


From 5f6286a608107a68646241b57d2bd2a4fc139ef9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:32 -0800
Subject: include/linux/delay.h: replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

[akpm@linux-foundation.org: cxd2880_common.h needs bits.h for GENMASK()]
[andriy.shevchenko@linux.intel.com: delay.h: fix for removed kernel.h]
  Link: https://lkml.kernel.org/r/20211028170143.56523-1-andriy.shevchenko@linux.intel.com
[akpm@linux-foundation.org: include/linux/fwnode.h needs bits.h for BIT()]

Link: https://lkml.kernel.org/r/20211027150324.79827-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/riscv/lib/delay.c                               | 4 ++++
 arch/s390/include/asm/facility.h                     | 4 ++++
 drivers/media/dvb-frontends/cxd2880/cxd2880_common.h | 1 +
 include/linux/delay.h                                | 2 +-
 include/linux/fwnode.h                               | 1 +
 5 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c
index f51c9a03bca1..49d510ba75fd 100644
--- a/arch/riscv/lib/delay.c
+++ b/arch/riscv/lib/delay.c
@@ -4,10 +4,14 @@
  */
 
 #include <linux/delay.h>
+#include <linux/math.h>
 #include <linux/param.h>
 #include <linux/timex.h>
+#include <linux/types.h>
 #include <linux/export.h>
 
+#include <asm/processor.h>
+
 /*
  * This is copies from arch/arm/include/asm/delay.h
  *
diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h
index e3aa354ab9f4..94b6919026df 100644
--- a/arch/s390/include/asm/facility.h
+++ b/arch/s390/include/asm/facility.h
@@ -9,8 +9,12 @@
 #define __ASM_FACILITY_H
 
 #include <asm/facility-defs.h>
+
+#include <linux/minmax.h>
 #include <linux/string.h>
+#include <linux/types.h>
 #include <linux/preempt.h>
+
 #include <asm/lowcore.h>
 
 #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
diff --git a/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h b/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h
index b05bce71ab35..9dc15a5a9683 100644
--- a/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h
+++ b/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
+#include <linux/bits.h>
 #include <linux/string.h>
 
 int cxd2880_convert2s_complement(u32 value, u32 bitlen);
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 1d0e2ce6b6d9..8eacf67eb212 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -19,7 +19,7 @@
  *   https://lists.openwall.net/linux-kernel/2011/01/09/56
  */
 
-#include <linux/kernel.h>
+#include <linux/math.h>
 
 extern unsigned long loops_per_jiffy;
 
diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h
index 9f4ad719bfe3..3a532ba66f6c 100644
--- a/include/linux/fwnode.h
+++ b/include/linux/fwnode.h
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/bits.h>
 #include <linux/err.h>
 
 struct fwnode_operations;
-- 
cgit v1.2.3


From 1fcbd5deac51f30a7ed3f10bea422d08a2b3aba6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:35 -0800
Subject: include/linux/sbitmap.h: replace kernel.h with the necessary
 inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211027150437.79921-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sbitmap.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 2713e689ad66..8121034d1f13 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -9,8 +9,17 @@
 #ifndef __LINUX_SCALE_BITMAP_H
 #define __LINUX_SCALE_BITMAP_H
 
-#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/cache.h>
+#include <linux/list.h>
+#include <linux/log2.h>
+#include <linux/minmax.h>
+#include <linux/percpu.h>
 #include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/types.h>
+#include <linux/wait.h>
 
 struct seq_file;
 
-- 
cgit v1.2.3


From 98e1385ef24bd607586fbf44e73decd5112d3d4a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:38 -0800
Subject: include/linux/radix-tree.h: replace kernel.h with the necessary
 inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Link: https://lkml.kernel.org/r/20211027150528.80003-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 64ad900ac742..f7c1d21c2f39 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -9,8 +9,10 @@
 #define _LINUX_RADIX_TREE_H
 
 #include <linux/bitops.h>
-#include <linux/kernel.h>
+#include <linux/gfp.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/math.h>
 #include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/rcupdate.h>
-- 
cgit v1.2.3


From b4b87651104dd32ab258d097d47b97e243a95222 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:32:41 -0800
Subject: include/linux/generic-radix-tree.h: replace kernel.h with the
 necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

[akpm@linux-foundation.org: include math.h for round_up()]

Link: https://lkml.kernel.org/r/20211027150548.80042-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/generic-radix-tree.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h
index bfd00320c7f3..107613f7d792 100644
--- a/include/linux/generic-radix-tree.h
+++ b/include/linux/generic-radix-tree.h
@@ -38,8 +38,9 @@
 
 #include <asm/page.h>
 #include <linux/bug.h>
-#include <linux/kernel.h>
 #include <linux/log2.h>
+#include <linux/math.h>
+#include <linux/types.h>
 
 struct genradix_root;
 
-- 
cgit v1.2.3


From e52340de11d8bca3ba35e5a72fea4f2a5b6abbbb Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 8 Nov 2021 18:32:43 -0800
Subject: kernel.h: split out instruction pointer accessors

bottom_half.h needs _THIS_IP_ to be standalone, so split that and
_RET_IP_ out from kernel.h into the new instruction_pointer.h.  kernel.h
directly needs them, so include it there and replace the include of
kernel.h with this new file in bottom_half.h.

Link: https://lkml.kernel.org/r/20211028161248.45232-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bottom_half.h         | 2 +-
 include/linux/instruction_pointer.h | 8 ++++++++
 include/linux/kernel.h              | 4 +---
 3 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/instruction_pointer.h

(limited to 'include/linux')

diff --git a/include/linux/bottom_half.h b/include/linux/bottom_half.h
index 11d107d88d03..fc53e0ad56d9 100644
--- a/include/linux/bottom_half.h
+++ b/include/linux/bottom_half.h
@@ -2,7 +2,7 @@
 #ifndef _LINUX_BH_H
 #define _LINUX_BH_H
 
-#include <linux/kernel.h>
+#include <linux/instruction_pointer.h>
 #include <linux/preempt.h>
 
 #if defined(CONFIG_PREEMPT_RT) || defined(CONFIG_TRACE_IRQFLAGS)
diff --git a/include/linux/instruction_pointer.h b/include/linux/instruction_pointer.h
new file mode 100644
index 000000000000..cda1f706eaeb
--- /dev/null
+++ b/include/linux/instruction_pointer.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_INSTRUCTION_POINTER_H
+#define _LINUX_INSTRUCTION_POINTER_H
+
+#define _RET_IP_		(unsigned long)__builtin_return_address(0)
+#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
+
+#endif /* _LINUX_INSTRUCTION_POINTER_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ed4465757cec..46ca4404fb93 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -20,6 +20,7 @@
 #include <linux/printk.h>
 #include <linux/build_bug.h>
 #include <linux/static_call_types.h>
+#include <linux/instruction_pointer.h>
 #include <asm/byteorder.h>
 
 #include <uapi/linux/kernel.h>
@@ -53,9 +54,6 @@
 }					\
 )
 
-#define _RET_IP_		(unsigned long)__builtin_return_address(0)
-#define _THIS_IP_  ({ __label__ __here; __here: (unsigned long)&&__here; })
-
 /**
  * upper_32_bits - return bits 32-63 of a number
  * @n: the number we're accessing
-- 
cgit v1.2.3


From e1edc277e6f6dfb372216522dfc57f9381c39e35 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 8 Nov 2021 18:32:46 -0800
Subject: linux/container_of.h: switch to static_assert

_Static_assert() is evaluated already in the compiler's frontend, and
gives a somehat more to-the-point error, compared to the BUILD_BUG_ON
macro, which only fires after the optimizer has had a chance to
eliminate calls to functions marked with __attribute__((error)).  In
theory, this might make builds a tiny bit faster.

There's also a little less gunk in the error message emitted:

  lib/sort.c: In function `foo':
  include/linux/build_bug.h:78:41: error: static assertion failed: "pointer type mismatch in container_of()"
     78 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)

compared to

  lib/sort.c: In function `foo':
  include/linux/compiler_types.h:322:38: error: call to `__compiletime_assert_2' declared with attribute error: pointer type mismatch in container_of()
    322 |  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)

While at it, fix the copy-pasto in container_of_safe().

Link: https://lkml.kernel.org/r/20211015090530.2774079-1-linux@rasmusvillemoes.dk
Link: https://lore.kernel.org/lkml/20211014132331.GA4811@kernel.org/T/
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/container_of.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/container_of.h b/include/linux/container_of.h
index dd56019838c6..2f4944b791b8 100644
--- a/include/linux/container_of.h
+++ b/include/linux/container_of.h
@@ -16,9 +16,9 @@
  */
 #define container_of(ptr, type, member) ({				\
 	void *__mptr = (void *)(ptr);					\
-	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
-			 !__same_type(*(ptr), void),			\
-			 "pointer type mismatch in container_of()");	\
+	static_assert(__same_type(*(ptr), ((type *)0)->member) ||	\
+		      __same_type(*(ptr), void),			\
+		      "pointer type mismatch in container_of()");	\
 	((type *)(__mptr - offsetof(type, member))); })
 
 /**
@@ -31,9 +31,9 @@
  */
 #define container_of_safe(ptr, type, member) ({				\
 	void *__mptr = (void *)(ptr);					\
-	BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) &&	\
-			 !__same_type(*(ptr), void),			\
-			 "pointer type mismatch in container_of()");	\
+	static_assert(__same_type(*(ptr), ((type *)0)->member) ||	\
+		      __same_type(*(ptr), void),			\
+		      "pointer type mismatch in container_of_safe()");	\
 	IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) :			\
 		((type *)(__mptr - offsetof(type, member))); })
 
-- 
cgit v1.2.3


From 505be48165fa2f2cb795659b4fc61f35563d105e Mon Sep 17 00:00:00 2001
From: Imran Khan <imran.f.khan@oracle.com>
Date: Mon, 8 Nov 2021 18:33:12 -0800
Subject: lib, stackdepot: add helper to print stack entries

To print a stack entries, users of stackdepot, first use stack_depot_fetch
to get a list of stack entries and then use stack_trace_print to print
this list.  Provide a helper in stackdepot to print stack entries based on
stackdepot handle.  Also change above mentioned users to use this helper.

Link: https://lkml.kernel.org/r/20210915014806.3206938-3-imran.f.khan@oracle.com
Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stackdepot.h |  2 ++
 lib/stackdepot.c           | 18 ++++++++++++++++++
 mm/kasan/report.c          | 15 +++------------
 mm/page_owner.c            | 13 ++++---------
 4 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index d29860966bc9..8ab37500fcba 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -25,6 +25,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries);
 
+void stack_depot_print(depot_stack_handle_t stack);
+
 #ifdef CONFIG_STACKDEPOT
 int stack_depot_init(void);
 #else
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index f034f095c627..4e1f2982d0fa 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -213,6 +213,24 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	return NULL;
 }
 
+/**
+ * stack_depot_print - print stack entries from a depot
+ *
+ * @stack:		Stack depot handle which was returned from
+ *			stack_depot_save().
+ *
+ */
+void stack_depot_print(depot_stack_handle_t stack)
+{
+	unsigned long *entries;
+	unsigned int nr_entries;
+
+	nr_entries = stack_depot_fetch(stack, &entries);
+	if (nr_entries > 0)
+		stack_trace_print(entries, nr_entries, 0);
+}
+EXPORT_SYMBOL_GPL(stack_depot_print);
+
 /**
  * stack_depot_fetch - Fetch stack entries from a depot
  *
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 884a950c7026..3239fd8f8747 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -132,20 +132,11 @@ static void end_report(unsigned long *flags, unsigned long addr)
 	kasan_enable_current();
 }
 
-static void print_stack(depot_stack_handle_t stack)
-{
-	unsigned long *entries;
-	unsigned int nr_entries;
-
-	nr_entries = stack_depot_fetch(stack, &entries);
-	stack_trace_print(entries, nr_entries, 0);
-}
-
 static void print_track(struct kasan_track *track, const char *prefix)
 {
 	pr_err("%s by task %u:\n", prefix, track->pid);
 	if (track->stack) {
-		print_stack(track->stack);
+		stack_depot_print(track->stack);
 	} else {
 		pr_err("(stack is not available)\n");
 	}
@@ -214,12 +205,12 @@ static void describe_object_stacks(struct kmem_cache *cache, void *object,
 		return;
 	if (alloc_meta->aux_stack[0]) {
 		pr_err("Last potentially related work creation:\n");
-		print_stack(alloc_meta->aux_stack[0]);
+		stack_depot_print(alloc_meta->aux_stack[0]);
 		pr_err("\n");
 	}
 	if (alloc_meta->aux_stack[1]) {
 		pr_err("Second to last potentially related work creation:\n");
-		print_stack(alloc_meta->aux_stack[1]);
+		stack_depot_print(alloc_meta->aux_stack[1]);
 		pr_err("\n");
 	}
 #endif
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 62402d22539b..eff29be1218b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -394,8 +394,6 @@ void __dump_page_owner(const struct page *page)
 	struct page_ext *page_ext = lookup_page_ext(page);
 	struct page_owner *page_owner;
 	depot_stack_handle_t handle;
-	unsigned long *entries;
-	unsigned int nr_entries;
 	gfp_t gfp_mask;
 	int mt;
 
@@ -423,20 +421,17 @@ void __dump_page_owner(const struct page *page)
 		 page_owner->pid, page_owner->ts_nsec, page_owner->free_ts_nsec);
 
 	handle = READ_ONCE(page_owner->handle);
-	if (!handle) {
+	if (!handle)
 		pr_alert("page_owner allocation stack trace missing\n");
-	} else {
-		nr_entries = stack_depot_fetch(handle, &entries);
-		stack_trace_print(entries, nr_entries, 0);
-	}
+	else
+		stack_depot_print(handle);
 
 	handle = READ_ONCE(page_owner->free_handle);
 	if (!handle) {
 		pr_alert("page_owner free stack trace missing\n");
 	} else {
-		nr_entries = stack_depot_fetch(handle, &entries);
 		pr_alert("page last free stack trace:\n");
-		stack_trace_print(entries, nr_entries, 0);
+		stack_depot_print(handle);
 	}
 
 	if (page_owner->last_migrate_reason != -1)
-- 
cgit v1.2.3


From 0f68d45ef41abb618a9ca33996348ae73800a106 Mon Sep 17 00:00:00 2001
From: Imran Khan <imran.f.khan@oracle.com>
Date: Mon, 8 Nov 2021 18:33:16 -0800
Subject: lib, stackdepot: add helper to print stack entries into buffer

To print stack entries into a buffer, users of stackdepot, first get a
list of stack entries using stack_depot_fetch and then print this list
into a buffer using stack_trace_snprint.  Provide a helper in stackdepot
for this purpose.  Also change above mentioned users to use this helper.

[imran.f.khan@oracle.com: fix build error]
  Link: https://lkml.kernel.org/r/20210915175321.3472770-4-imran.f.khan@oracle.com
[imran.f.khan@oracle.com: export stack_depot_snprint() to modules]
  Link: https://lkml.kernel.org/r/20210916133535.3592491-4-imran.f.khan@oracle.com

Link: https://lkml.kernel.org/r/20210915014806.3206938-4-imran.f.khan@oracle.com
Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Jani Nikula <jani.nikula@intel.com>	[i915]
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +----
 drivers/gpu/drm/drm_mm.c                |  5 +----
 drivers/gpu/drm/i915/i915_vma.c         |  5 +----
 drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +++++---------------
 include/linux/stackdepot.h              |  3 +++
 lib/stackdepot.c                        | 25 +++++++++++++++++++++++++
 mm/page_owner.c                         |  5 +----
 7 files changed, 37 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 86d13d6bc463..2d1adab9e360 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history,
 	for (i = 0; i < history->len; i++) {
 		const struct drm_dp_mst_topology_ref_entry *entry =
 			&history->entries[i];
-		ulong *entries;
-		uint nr_entries;
 		u64 ts_nsec = entry->ts_nsec;
 		u32 rem_nsec = do_div(ts_nsec, 1000000000);
 
-		nr_entries = stack_depot_fetch(entry->backtrace, &entries);
-		stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
+		stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4);
 
 		drm_printf(&p, "  %d %ss (last at %5llu.%06u):\n%s",
 			   entry->count,
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 93d48a6f04ab..7d1c578388d3 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node)
 static void show_leaks(struct drm_mm *mm)
 {
 	struct drm_mm_node *node;
-	unsigned long *entries;
-	unsigned int nr_entries;
 	char *buf;
 
 	buf = kmalloc(BUFSZ, GFP_KERNEL);
@@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm)
 			continue;
 		}
 
-		nr_entries = stack_depot_fetch(node->stack, &entries);
-		stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
+		stack_depot_snprint(node->stack, buf, BUFSZ, 0);
 		DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
 			  node->start, node->size, buf);
 	}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..f2d9ed375109 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma)
 
 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 {
-	unsigned long *entries;
-	unsigned int nr_entries;
 	char buf[512];
 
 	if (!vma->node.stack) {
@@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 		return;
 	}
 
-	nr_entries = stack_depot_fetch(vma->node.stack, &entries);
-	stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
+	stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0);
 	DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
 			 vma->node.start, vma->node.size, reason, buf);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index eaf7688f517d..0d85f3c5c526 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
 	return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
 }
 
-static void __print_depot_stack(depot_stack_handle_t stack,
-				char *buf, int sz, int indent)
-{
-	unsigned long *entries;
-	unsigned int nr_entries;
-
-	nr_entries = stack_depot_fetch(stack, &entries);
-	stack_trace_snprint(buf, sz, entries, nr_entries, indent);
-}
-
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
 	spin_lock_init(&rpm->debug.lock);
@@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
 		if (!buf)
 			return;
 
-		__print_depot_stack(stack, buf, PAGE_SIZE, 2);
+		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
 		DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
 
 		stack = READ_ONCE(rpm->debug.last_release);
 		if (stack) {
-			__print_depot_stack(stack, buf, PAGE_SIZE, 2);
+			stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
 			DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
 		}
 
@@ -183,12 +173,12 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p,
 		return;
 
 	if (dbg->last_acquire) {
-		__print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2);
+		stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
 		drm_printf(p, "Wakeref last acquired:\n%s", buf);
 	}
 
 	if (dbg->last_release) {
-		__print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2);
+		stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
 		drm_printf(p, "Wakeref last released:\n%s", buf);
 	}
 
@@ -203,7 +193,7 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p,
 		rep = 1;
 		while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
 			rep++, i++;
-		__print_depot_stack(stack, buf, PAGE_SIZE, 2);
+		stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
 		drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
 	}
 
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 8ab37500fcba..c34b55a6e554 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -25,6 +25,9 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
 unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 			       unsigned long **entries);
 
+int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
+		       int spaces);
+
 void stack_depot_print(depot_stack_handle_t stack);
 
 #ifdef CONFIG_STACKDEPOT
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 4e1f2982d0fa..b437ae79aca1 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -213,6 +213,31 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	return NULL;
 }
 
+/**
+ * stack_depot_snprint - print stack entries from a depot into a buffer
+ *
+ * @handle:	Stack depot handle which was returned from
+ *		stack_depot_save().
+ * @buf:	Pointer to the print buffer
+ *
+ * @size:	Size of the print buffer
+ *
+ * @spaces:	Number of leading spaces to print
+ *
+ * Return:	Number of bytes printed.
+ */
+int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
+		       int spaces)
+{
+	unsigned long *entries;
+	unsigned int nr_entries;
+
+	nr_entries = stack_depot_fetch(handle, &entries);
+	return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries,
+						spaces) : 0;
+}
+EXPORT_SYMBOL_GPL(stack_depot_snprint);
+
 /**
  * stack_depot_print - print stack entries from a depot
  *
diff --git a/mm/page_owner.c b/mm/page_owner.c
index eff29be1218b..1653040d1133 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -329,8 +329,6 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 		depot_stack_handle_t handle)
 {
 	int ret, pageblock_mt, page_mt;
-	unsigned long *entries;
-	unsigned int nr_entries;
 	char *kbuf;
 
 	count = min_t(size_t, count, PAGE_SIZE);
@@ -361,8 +359,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn,
 	if (ret >= count)
 		goto err;
 
-	nr_entries = stack_depot_fetch(handle, &entries);
-	ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0);
+	ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0);
 	if (ret >= count)
 		goto err;
 
-- 
cgit v1.2.3


From bfb3ba32061da1a9217ef6d02fbcffb528e4c8df Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.demarchi@intel.com>
Date: Mon, 8 Nov 2021 18:33:19 -0800
Subject: include/linux/string_helpers.h: add linux/string.h for strlen()

linux/string_helpers.h uses strlen(), so include the correponding header.
Otherwise we get a compilation error if it's not also included by whoever
included the helper.

Link: https://lkml.kernel.org/r/20211005212634.3223113-1-lucas.demarchi@intel.com
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string_helpers.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 68189c4a2eb1..4ba39e1403b2 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -4,6 +4,7 @@
 
 #include <linux/bits.h>
 #include <linux/ctype.h>
+#include <linux/string.h>
 #include <linux/types.h>
 
 struct file;
-- 
cgit v1.2.3


From 1b1ad288b8f1b11f83396e537003722897ecc12b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Nov 2021 18:33:43 -0800
Subject: kallsyms: remove arch specific text and data check

Patch series "sections: Unify kernel sections range check and use", v4.

There are three head files(kallsyms.h, kernel.h and sections.h) which
include the kernel sections range check, let's make some cleanup and unify
them.

1. cleanup arch specific text/data check and fix address boundary check
   in kallsyms.h

2. make all the basic/core kernel range check function into sections.h

3. update all the callers, and use the helper in sections.h to simplify
   the code

After this series, we have 5 APIs about kernel sections range check in
sections.h

 * is_kernel_rodata()		--- already in sections.h
 * is_kernel_core_data()	--- come from core_kernel_data() in kernel.h
 * is_kernel_inittext()		--- come from kernel.h and kallsyms.h
 * __is_kernel_text()		--- add new internal helper
 * __is_kernel()		--- add new internal helper

Note: For the last two helpers, people should not use directly, consider to
      use corresponding function in kallsyms.h.

This patch (of 11):

Remove arch specific text and data check after commit 4ba66a976072 ("arch:
remove blackfin port"), no need arch-specific text/data check.

Link: https://lkml.kernel.org/r/20210930071143.63410-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20210930071143.63410-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h | 16 ----------------
 include/linux/kallsyms.h       |  3 +--
 kernel/locking/lockdep.c       |  3 ---
 3 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 596ab2092289..614fc809de34 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -64,22 +64,6 @@ extern __visible const void __nosave_begin, __nosave_end;
 #define dereference_kernel_function_descriptor(p) ((void *)(p))
 #endif
 
-/* random extra sections (if any).  Override
- * in asm/sections.h */
-#ifndef arch_is_kernel_text
-static inline int arch_is_kernel_text(unsigned long addr)
-{
-	return 0;
-}
-#endif
-
-#ifndef arch_is_kernel_data
-static inline int arch_is_kernel_data(unsigned long addr)
-{
-	return 0;
-}
-#endif
-
 /**
  * memory_contains - checks if an object is contained within a memory region
  * @begin: virtual address of the beginning of the memory region
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index a1d6fc82d7f0..bac340972670 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -34,8 +34,7 @@ static inline int is_kernel_inittext(unsigned long addr)
 
 static inline int is_kernel_text(unsigned long addr)
 {
-	if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
-	    arch_is_kernel_text(addr))
+	if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext))
 		return 1;
 	return in_gate_area_no_mm(addr);
 }
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8e118caf835e..f2d3bf4960f4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -818,9 +818,6 @@ static int static_obj(const void *obj)
 	if ((addr >= start) && (addr < end))
 		return 1;
 
-	if (arch_is_kernel_data(addr))
-		return 1;
-
 	/*
 	 * in-kernel percpu var?
 	 */
-- 
cgit v1.2.3


From e7d5c4b0eb9be6ea5c2cfb510a9d6f56925118eb Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Nov 2021 18:33:47 -0800
Subject: kallsyms: fix address-checks for kernel related range

The is_kernel_inittext/is_kernel_text/is_kernel function should not
include the end address(the labels _einittext, _etext and _end) when check
the address range, the issue exists since Linux v2.6.12.

Link: https://lkml.kernel.org/r/20210930071143.63410-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Acked-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kallsyms.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index bac340972670..62ce22b27ea8 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -27,21 +27,21 @@ struct module;
 static inline int is_kernel_inittext(unsigned long addr)
 {
 	if (addr >= (unsigned long)_sinittext
-	    && addr <= (unsigned long)_einittext)
+	    && addr < (unsigned long)_einittext)
 		return 1;
 	return 0;
 }
 
 static inline int is_kernel_text(unsigned long addr)
 {
-	if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext))
+	if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext))
 		return 1;
 	return in_gate_area_no_mm(addr);
 }
 
 static inline int is_kernel(unsigned long addr)
 {
-	if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
+	if (addr >= (unsigned long)_stext && addr < (unsigned long)_end)
 		return 1;
 	return in_gate_area_no_mm(addr);
 }
-- 
cgit v1.2.3


From a20deb3a348719adaf8c12e1bf4b599bfc51836e Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Nov 2021 18:33:51 -0800
Subject: sections: move and rename core_kernel_data() to is_kernel_core_data()

Move core_kernel_data() into sections.h and rename it to
is_kernel_core_data(), also make it return bool value, then update all the
callers.

Link: https://lkml.kernel.org/r/20210930071143.63410-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h | 16 ++++++++++++++++
 include/linux/kernel.h         |  1 -
 kernel/extable.c               | 18 ------------------
 kernel/trace/ftrace.c          |  2 +-
 net/sysctl_net.c               |  2 +-
 5 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 614fc809de34..7cba8ff10d3a 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -128,6 +128,22 @@ static inline bool init_section_intersects(void *virt, size_t size)
 	return memory_intersects(__init_begin, __init_end, virt, size);
 }
 
+/**
+ * is_kernel_core_data - checks if the pointer address is located in the
+ *			 .data section
+ *
+ * @addr: address to check
+ *
+ * Returns: true if the address is located in .data, false otherwise.
+ * Note: On some archs it may return true for core RODATA, and false
+ *       for others. But will always be true for core RW data.
+ */
+static inline bool is_kernel_core_data(unsigned long addr)
+{
+	return addr >= (unsigned long)_sdata &&
+	       addr < (unsigned long)_edata;
+}
+
 /**
  * is_kernel_rodata - checks if the pointer address is located in the
  *                    .rodata section
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 46ca4404fb93..23f57a2d5a13 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -227,7 +227,6 @@ extern char *next_arg(char *args, char **param, char **val);
 
 extern int core_kernel_text(unsigned long addr);
 extern int init_kernel_text(unsigned long addr);
-extern int core_kernel_data(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
diff --git a/kernel/extable.c b/kernel/extable.c
index 290661f68e6b..0e3412b48bba 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -82,24 +82,6 @@ int notrace core_kernel_text(unsigned long addr)
 	return 0;
 }
 
-/**
- * core_kernel_data - tell if addr points to kernel data
- * @addr: address to test
- *
- * Returns true if @addr passed in is from the core kernel data
- * section.
- *
- * Note: On some archs it may return true for core RODATA, and false
- *  for others. But will always be true for core RW data.
- */
-int core_kernel_data(unsigned long addr)
-{
-	if (addr >= (unsigned long)_sdata &&
-	    addr < (unsigned long)_edata)
-		return 1;
-	return 0;
-}
-
 int __kernel_text_address(unsigned long addr)
 {
 	if (kernel_text_address(addr))
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index feebf57c6458..7aaef2b78760 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -323,7 +323,7 @@ int __register_ftrace_function(struct ftrace_ops *ops)
 	if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT))
 		return -EBUSY;
 
-	if (!core_kernel_data((unsigned long)ops))
+	if (!is_kernel_core_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
 	add_ftrace_ops(&ftrace_ops_list, ops);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index f6cb0d4d114c..4b45ed631eb8 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -144,7 +144,7 @@ static void ensure_safe_net_sysctl(struct net *net, const char *path,
 		addr = (unsigned long)ent->data;
 		if (is_module_address(addr))
 			where = "module";
-		else if (core_kernel_data(addr))
+		else if (is_kernel_core_data(addr))
 			where = "kernel";
 		else
 			continue;
-- 
cgit v1.2.3


From b9ad8fe7b8cab3814d1de41e8ddca602b2646f4b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Nov 2021 18:33:54 -0800
Subject: sections: move is_kernel_inittext() into sections.h

The is_kernel_inittext() and init_kernel_text() are with same
functionality, let's just keep is_kernel_inittext() and move it into
sections.h, then update all the callers.

Link: https://lkml.kernel.org/r/20210930071143.63410-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/unwind_orc.c   |  2 +-
 include/asm-generic/sections.h | 14 ++++++++++++++
 include/linux/kallsyms.h       |  8 --------
 include/linux/kernel.h         |  1 -
 kernel/extable.c               | 12 ++----------
 5 files changed, 17 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index a1202536fc57..d92ec2ced059 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -175,7 +175,7 @@ static struct orc_entry *orc_find(unsigned long ip)
 	}
 
 	/* vmlinux .init slow lookup: */
-	if (init_kernel_text(ip))
+	if (is_kernel_inittext(ip))
 		return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
 				  __stop_orc_unwind_ip - __start_orc_unwind_ip, ip);
 
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 7cba8ff10d3a..f82806ca8756 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -158,4 +158,18 @@ static inline bool is_kernel_rodata(unsigned long addr)
 	       addr < (unsigned long)__end_rodata;
 }
 
+/**
+ * is_kernel_inittext - checks if the pointer address is located in the
+ *                      .init.text section
+ *
+ * @addr: address to check
+ *
+ * Returns: true if the address is located in .init.text, false otherwise.
+ */
+static inline bool is_kernel_inittext(unsigned long addr)
+{
+	return addr >= (unsigned long)_sinittext &&
+	       addr < (unsigned long)_einittext;
+}
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index 62ce22b27ea8..bb5d9ec78e13 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -24,14 +24,6 @@
 struct cred;
 struct module;
 
-static inline int is_kernel_inittext(unsigned long addr)
-{
-	if (addr >= (unsigned long)_sinittext
-	    && addr < (unsigned long)_einittext)
-		return 1;
-	return 0;
-}
-
 static inline int is_kernel_text(unsigned long addr)
 {
 	if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 23f57a2d5a13..be84ab369650 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -226,7 +226,6 @@ extern bool parse_option_str(const char *str, const char *option);
 extern char *next_arg(char *args, char **param, char **val);
 
 extern int core_kernel_text(unsigned long addr);
-extern int init_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
diff --git a/kernel/extable.c b/kernel/extable.c
index 0e3412b48bba..6505207aa7e6 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -62,14 +62,6 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
 	return e;
 }
 
-int init_kernel_text(unsigned long addr)
-{
-	if (addr >= (unsigned long)_sinittext &&
-	    addr < (unsigned long)_einittext)
-		return 1;
-	return 0;
-}
-
 int notrace core_kernel_text(unsigned long addr)
 {
 	if (addr >= (unsigned long)_stext &&
@@ -77,7 +69,7 @@ int notrace core_kernel_text(unsigned long addr)
 		return 1;
 
 	if (system_state < SYSTEM_FREEING_INITMEM &&
-	    init_kernel_text(addr))
+	    is_kernel_inittext(addr))
 		return 1;
 	return 0;
 }
@@ -94,7 +86,7 @@ int __kernel_text_address(unsigned long addr)
 	 * Since we are after the module-symbols check, there's
 	 * no danger of address overlap:
 	 */
-	if (init_kernel_text(addr))
+	if (is_kernel_inittext(addr))
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From 8f6e42e83362650007ed298070a69d084f2c8baf Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 8 Nov 2021 18:34:02 -0800
Subject: sections: provide internal __is_kernel() and __is_kernel_text()
 helper

An internal __is_kernel() helper which only check the kernel address
ranges, and an internal __is_kernel_text() helper which only check text
section ranges.

Link: https://lkml.kernel.org/r/20210930071143.63410-7-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h | 29 +++++++++++++++++++++++++++++
 include/linux/kallsyms.h       |  4 ++--
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index f82806ca8756..1dfadb2e878d 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -172,4 +172,33 @@ static inline bool is_kernel_inittext(unsigned long addr)
 	       addr < (unsigned long)_einittext;
 }
 
+/**
+ * __is_kernel_text - checks if the pointer address is located in the
+ *                    .text section
+ *
+ * @addr: address to check
+ *
+ * Returns: true if the address is located in .text, false otherwise.
+ * Note: an internal helper, only check the range of _stext to _etext.
+ */
+static inline bool __is_kernel_text(unsigned long addr)
+{
+	return addr >= (unsigned long)_stext &&
+	       addr < (unsigned long)_etext;
+}
+
+/**
+ * __is_kernel - checks if the pointer address is located in the kernel range
+ *
+ * @addr: address to check
+ *
+ * Returns: true if the address is located in the kernel range, false otherwise.
+ * Note: an internal helper, only check the range of _stext to _end.
+ */
+static inline bool __is_kernel(unsigned long addr)
+{
+	return addr >= (unsigned long)_stext &&
+	       addr < (unsigned long)_end;
+}
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h
index bb5d9ec78e13..4176c7eca7b5 100644
--- a/include/linux/kallsyms.h
+++ b/include/linux/kallsyms.h
@@ -26,14 +26,14 @@ struct module;
 
 static inline int is_kernel_text(unsigned long addr)
 {
-	if ((addr >= (unsigned long)_stext && addr < (unsigned long)_etext))
+	if (__is_kernel_text(addr))
 		return 1;
 	return in_gate_area_no_mm(addr);
 }
 
 static inline int is_kernel(unsigned long addr)
 {
-	if (addr >= (unsigned long)_stext && addr < (unsigned long)_end)
+	if (__is_kernel(addr))
 		return 1;
 	return in_gate_area_no_mm(addr);
 }
-- 
cgit v1.2.3


From 5605f41917c6ad766814a8e417a4481e9157c991 Mon Sep 17 00:00:00 2001
From: Changcheng Deng <deng.changcheng@zte.com.cn>
Date: Mon, 8 Nov 2021 18:35:07 -0800
Subject: crash_dump: fix boolreturn.cocci warning

./include/linux/crash_dump.h: 119: 50-51: WARNING: return of 0/1 in
function 'is_kdump_kernel' with return type bool

Return statements in functions returning bool should use true/false
instead of 1/0.

Link: https://lkml.kernel.org/r/20211020083905.1037952-1-deng.changcheng@zte.com.cn
Signed-off-by: Changcheng Deng <deng.changcheng@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Reviewed-by: Simon Horman <horms@kernel.org>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Ye Guojin <ye.guojin@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_dump.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 0c547d866f1e..979c26176c1d 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -116,7 +116,7 @@ extern void register_vmcore_cb(struct vmcore_cb *cb);
 extern void unregister_vmcore_cb(struct vmcore_cb *cb);
 
 #else /* !CONFIG_CRASH_DUMP */
-static inline bool is_kdump_kernel(void) { return 0; }
+static inline bool is_kdump_kernel(void) { return false; }
 #endif /* CONFIG_CRASH_DUMP */
 
 /* Device Dump information to be filled by drivers */
-- 
cgit v1.2.3


From a10677a028b853c25054a4b8be04013ccb55682f Mon Sep 17 00:00:00 2001
From: Ye Guojin <ye.guojin@zte.com.cn>
Date: Mon, 8 Nov 2021 18:35:10 -0800
Subject: crash_dump: remove duplicate include in crash_dump.h

In crash_dump.h, header file <linux/pgtable.h> is included twice.  This
duplication was introduced in commit 65fddcfca8ad("mm: reorder includes
after introduction of linux/pgtable.h") where the order of the header
files is adjusted, while the old one was not removed.

Clean it up here.

Link: https://lkml.kernel.org/r/20211020090659.1038877-1-ye.guojin@zte.com.cn
Signed-off-by: Ye Guojin <ye.guojin@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Changcheng Deng <deng.changcheng@zte.com.cn>
Cc: Simon Horman <horms@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/crash_dump.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 979c26176c1d..620821549b23 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -8,8 +8,6 @@
 #include <linux/pgtable.h>
 #include <uapi/linux/vmcore.h>
 
-#include <linux/pgtable.h> /* for pgprot_t */
-
 /* For IS_ENABLED(CONFIG_CRASH_DUMP) */
 #define ELFCORE_ADDR_MAX	(-1ULL)
 #define ELFCORE_ADDR_ERR	(-2ULL)
-- 
cgit v1.2.3


From f26663684e76773ea86e2df13fb18f9d66c91151 Mon Sep 17 00:00:00 2001
From: Ye Guojin <ye.guojin@zte.com.cn>
Date: Mon, 8 Nov 2021 18:35:13 -0800
Subject: signal: remove duplicate include in signal.h

'linux/string.h' included in 'signal.h' is duplicated.
it's also included at line 7.

Link: https://lkml.kernel.org/r/20211019024934.973008-1-ye.guojin@zte.com.cn
Signed-off-by: Ye Guojin <ye.guojin@zte.com.cn>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3f96a6374e4f..37f5080c070a 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -126,7 +126,6 @@ static inline int sigequalsets(const sigset_t *set1, const sigset_t *set2)
 #define sigmask(sig)	(1UL << ((sig) - 1))
 
 #ifndef __HAVE_ARCH_SIG_SETOPS
-#include <linux/string.h>
 
 #define _SIG_SET_BINOP(name, op)					\
 static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
-- 
cgit v1.2.3


From 372904c080be44629d84bb15ed5e12eed44b5f9f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 8 Nov 2021 18:35:16 -0800
Subject: seq_file: move seq_escape() to a header

Move seq_escape() to the header as inliner, for a small kernel text size
reduction.

Link: https://lkml.kernel.org/r/20211001122917.67228-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c            | 16 ----------------
 include/linux/seq_file.h | 17 ++++++++++++++++-
 2 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 4a2cda04d3e2..f8e1f4ee87ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -383,22 +383,6 @@ void seq_escape_mem(struct seq_file *m, const char *src, size_t len,
 }
 EXPORT_SYMBOL(seq_escape_mem);
 
-/**
- *	seq_escape -	print string into buffer, escaping some characters
- *	@m:	target buffer
- *	@s:	string
- *	@esc:	set of characters that need escaping
- *
- *	Puts string into buffer, replacing each occurrence of character from
- *	@esc with usual octal escape.
- *	Use seq_has_overflowed() to check for errors.
- */
-void seq_escape(struct seq_file *m, const char *s, const char *esc)
-{
-	seq_escape_str(m, s, ESCAPE_OCTAL, esc);
-}
-EXPORT_SYMBOL(seq_escape);
-
 void seq_vprintf(struct seq_file *m, const char *f, va_list args)
 {
 	int len;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index dd99569595fd..103776e18555 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/string.h>
+#include <linux/string_helpers.h>
 #include <linux/bug.h>
 #include <linux/mutex.h>
 #include <linux/cpumask.h>
@@ -135,7 +136,21 @@ static inline void seq_escape_str(struct seq_file *m, const char *src,
 	seq_escape_mem(m, src, strlen(src), flags, esc);
 }
 
-void seq_escape(struct seq_file *m, const char *s, const char *esc);
+/**
+ * seq_escape - print string into buffer, escaping some characters
+ * @m: target buffer
+ * @s: NULL-terminated string
+ * @esc: set of characters that need escaping
+ *
+ * Puts string into buffer, replacing each occurrence of character from
+ * @esc with usual octal escape.
+ *
+ * Use seq_has_overflowed() to check for errors.
+ */
+static inline void seq_escape(struct seq_file *m, const char *s, const char *esc)
+{
+	seq_escape_str(m, s, ESCAPE_OCTAL, esc);
+}
 
 void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
 		  int rowsize, int groupsize, const void *buf, size_t len,
-- 
cgit v1.2.3


From 10a6de19cad6efb9b49883513afb810dc265fca2 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 8 Nov 2021 18:35:19 -0800
Subject: seq_file: fix passing wrong private data

DEFINE_PROC_SHOW_ATTRIBUTE() is supposed to be used to define a series
of functions and variables to register proc file easily. And the users
can use proc_create_data() to pass their own private data and get it
via seq->private in the callback. Unfortunately, the proc file system
use PDE_DATA() to get private data instead of inode->i_private. So fix
it. Fortunately, there only one user of it which does not pass any
private data, so this bug does not break any in-tree codes.

Link: https://lkml.kernel.org/r/20211029032638.84884-1-songmuchun@bytedance.com
Fixes: 97a32539b956 ("proc: convert everything to "struct proc_ops"")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Florent Revest <revest@chromium.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/seq_file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 103776e18555..72dbb44a4573 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = {			\
 #define DEFINE_PROC_SHOW_ATTRIBUTE(__name)				\
 static int __name ## _open(struct inode *inode, struct file *file)	\
 {									\
-	return single_open(file, __name ## _show, inode->i_private);	\
+	return single_open(file, __name ## _show, PDE_DATA(inode));	\
 }									\
 									\
 static const struct proc_ops __name ## _proc_ops = {			\
-- 
cgit v1.2.3


From 278167fd2f8ffe679351605fe03e29ff3ab8db18 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Tue, 9 Nov 2021 16:29:49 -0800
Subject: block: add __must_check for *add_disk*() callers

Now that we have done a spring cleaning on all drivers and added
error checking / handling, let's keep it that way and ensure
no new drivers fail to stick with it.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20211110002949.999380-1-mcgrof@kernel.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c         | 6 +++---
 include/linux/genhd.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index ca2fbab1d425..c5392cc24d37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -394,8 +394,8 @@ static void disk_scan_partitions(struct gendisk *disk)
  * This function registers the partitioning information in @disk
  * with the kernel.
  */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		     const struct attribute_group **groups)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups)
 
 {
 	struct device *ddev = disk_to_dev(disk);
@@ -544,7 +544,7 @@ out_disk_release_events:
 out_free_ext_minor:
 	if (disk->major == BLOCK_EXT_MAJOR)
 		blk_free_ext_minor(disk->first_minor);
-	return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
+	return ret;
 }
 EXPORT_SYMBOL(device_add_disk);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 462634b4b48f..74c410263113 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -205,9 +205,9 @@ static inline dev_t disk_devt(struct gendisk *disk)
 void disk_uevent(struct gendisk *disk, enum kobject_action action);
 
 /* block/genhd.c */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-		const struct attribute_group **groups);
-static inline int add_disk(struct gendisk *disk)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+				 const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
 {
 	return device_add_disk(NULL, disk, NULL);
 }
-- 
cgit v1.2.3


From 64355db3caf6468dc711995239efe0cbcd7d0091 Mon Sep 17 00:00:00 2001
From: Thomas Weißschuh <linux@weissschuh.net>
Date: Wed, 10 Nov 2021 13:16:55 +0100
Subject: mod_devicetable: fix kdocs for ishtp_device_id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kdocs were copied from another device_id struct and not adapted.

Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mod_devicetable.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index befbf53c4b7c..c70abe7aaef2 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -901,8 +901,7 @@ struct dfl_device_id {
 
 /**
  * struct ishtp_device_id - ISHTP device identifier
- * @guid_string: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba
- * @context: pointer to driver specific data
+ * @guid: GUID of the device.
  */
 struct ishtp_device_id {
 	guid_t guid;
-- 
cgit v1.2.3


From 5d5e4522a7f404d1a96fd6c703989d32a9c9568d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 7 Nov 2021 14:51:16 +1000
Subject: printk: restore flushing of NMI buffers on remote CPUs after NMI
 backtraces

printk from NMI context relies on irq work being raised on the local CPU
to print to console. This can be a problem if the NMI was raised by a
lockup detector to print lockup stack and regs, because the CPU may not
enable irqs (because it is locked up).

Introduce printk_trigger_flush() that can be called another CPU to try
to get those messages to the console, call that where printk_safe_flush
was previously called.

Fixes: 93d102f094be ("printk: remove safe buffers")
Cc: stable@vger.kernel.org # 5.15
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Reviewed-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20211107045116.1754411-1-npiggin@gmail.com
---
 arch/powerpc/kernel/watchdog.c | 6 ++++++
 include/linux/printk.h         | 4 ++++
 kernel/printk/printk.c         | 5 +++++
 lib/nmi_backtrace.c            | 6 ++++++
 4 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index dc17d8903d4f..6b7a83d5e03e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -186,6 +186,12 @@ static void watchdog_smp_panic(int cpu, u64 tb)
 	if (sysctl_hardlockup_all_cpu_backtrace)
 		trigger_allbutself_cpu_backtrace();
 
+	/*
+	 * Force flush any remote buffers that might be stuck in IRQ context
+	 * and therefore could not run their irq_work.
+	 */
+	printk_trigger_flush();
+
 	if (hardlockup_panic)
 		nmi_panic(NULL, "Hard LOCKUP");
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index a1379df43251..596ad6fa0336 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -206,6 +206,7 @@ void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
+void printk_trigger_flush(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -282,6 +283,9 @@ static inline void dump_stack_lvl(const char *log_lvl)
 static inline void dump_stack(void)
 {
 }
+static inline void printk_trigger_flush(void)
+{
+}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 65fffa6368c9..eabe23b0a982 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -3261,6 +3261,11 @@ void defer_console_output(void)
 	preempt_enable();
 }
 
+void printk_trigger_flush(void)
+{
+	defer_console_output();
+}
+
 int vprintk_deferred(const char *fmt, va_list args)
 {
 	int r;
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index f9e89001b52e..199ab201d501 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -75,6 +75,12 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 		touch_softlockup_watchdog();
 	}
 
+	/*
+	 * Force flush any remote buffers that might be stuck in IRQ context
+	 * and therefore could not run their irq_work.
+	 */
+	printk_trigger_flush();
+
 	clear_bit_unlock(0, &backtrace_flag);
 	put_cpu();
 }
-- 
cgit v1.2.3


From a19672f6b9718df247a9087a266150dbe833022e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Aug 2021 21:54:58 +0100
Subject: folio: Add a function to change the private data attached to a folio

Add a function, folio_change_private(), that will change the private data
attached to a folio, without the need to twiddle the private bit or the
refcount.  It assumes that folio_add_private() has already been called on
the page.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dominique Martinet <asmadeus@codewreck.org>
Tested-by: kafs-testing@auristor.com
Link: https://lore.kernel.org/r/162981149911.1901565.17776700811659843340.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/163005743485.2472992.5100702469503007023.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163584180781.4023316.5037526301198034310.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/163649324326.309189.17817587229450840783.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/163657848531.834781.14269656212269187893.stgit@warthog.procyon.org.uk/ # v5
---
 include/linux/pagemap.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 6a30916b76e5..1f560aecd9b5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -279,6 +279,25 @@ static inline void folio_attach_private(struct folio *folio, void *data)
 	folio_set_private(folio);
 }
 
+/**
+ * folio_change_private - Change private data on a folio.
+ * @folio: Folio to change the data on.
+ * @data: Data to set on the folio.
+ *
+ * Change the private data attached to a folio and return the old
+ * data.  The page must previously have had data attached and the data
+ * must be detached before the folio will be freed.
+ *
+ * Return: Data that was previously attached to the folio.
+ */
+static inline void *folio_change_private(struct folio *folio, void *data)
+{
+	void *old = folio_get_private(folio);
+
+	folio->private = data;
+	return old;
+}
+
 /**
  * folio_detach_private - Detach private data from a folio.
  * @folio: Folio to detach data from.
-- 
cgit v1.2.3


From 452c472e26348df1e7052544130aa98eebbd2331 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Aug 2021 22:09:57 +0100
Subject: folio: Add a function to get the host inode for a folio

Add a convenience function, folio_inode() that will get the host inode from
a folio's mapping.

Changes:
 ver #3:
  - Fix mistake in function description[2].
 ver #2:
  - Fix contradiction between doc and implementation by disallowing use
    with swap caches[1].

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dominique Martinet <asmadeus@codewreck.org>
Tested-by: kafs-testing@auristor.com
Link: https://lore.kernel.org/r/YST8OcVNy02Rivbm@casper.infradead.org/ [1]
Link: https://lore.kernel.org/r/YYKLkBwQdtn4ja+i@casper.infradead.org/ [2]
Link: https://lore.kernel.org/r/162880453171.3369675.3704943108660112470.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/162981151155.1901565.7010079316994382707.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/163005744370.2472992.18324470937328925723.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163584184628.4023316.9386282630968981869.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/163649325519.309189.15072332908703129455.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/163657850401.834781.1031963517399283294.stgit@warthog.procyon.org.uk/ # v5
---
 include/linux/pagemap.h | 14 ++++++++++++++
 mm/page-writeback.c     |  2 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1f560aecd9b5..1a0c646eb6ff 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -253,6 +253,20 @@ static inline struct address_space *page_mapping_file(struct page *page)
 	return folio_mapping(folio);
 }
 
+/**
+ * folio_inode - Get the host inode for this folio.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the inode that this folio
+ * belongs to.
+ *
+ * Do not call this for folios which aren't in the page cache.
+ */
+static inline struct inode *folio_inode(struct folio *folio)
+{
+	return folio->mapping->host;
+}
+
 static inline bool page_cache_add_speculative(struct page *page, int count)
 {
 	VM_BUG_ON_PAGE(PageTail(page), page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2d498bb62248..a613f8ef6a02 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2967,7 +2967,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-	if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
+	if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
 		folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);
-- 
cgit v1.2.3


From 78525c74d9e7d1a6ce69bd4388f045f6e474a20b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Aug 2021 09:49:13 +0100
Subject: netfs, 9p, afs, ceph: Use folios

Convert the netfs helper library to use folios throughout, convert the 9p
and afs filesystems to use folios in their file I/O paths and convert the
ceph filesystem to use just enough folios to compile.

With these changes, afs passes -g quick xfstests.

Changes
=======
ver #5:
 - Got rid of folio_end{io,_read,_write}() and inlined the stuff it does
   instead (Willy decided he didn't want this after all).

ver #4:
 - Fixed a bug in afs_redirty_page() whereby it didn't set the next page
   index in the loop and returned too early.
 - Simplified a check in v9fs_vfs_write_folio_locked()[1].
 - Undid a change to afs_symlink_readpage()[1].
 - Used offset_in_folio() in afs_write_end()[1].
 - Changed from using page_endio() to folio_end{io,_read,_write}()[1].

ver #2:
 - Add 9p foliation.

Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Jeff Layton <jlayton@kernel.org>
Tested-by: Dominique Martinet <asmadeus@codewreck.org>
Tested-by: kafs-testing@auristor.com
cc: Matthew Wilcox (Oracle) <willy@infradead.org>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Ilya Dryomov <idryomov@gmail.com>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: v9fs-developer@lists.sourceforge.net
cc: linux-afs@lists.infradead.org
cc: ceph-devel@vger.kernel.org
cc: linux-cachefs@redhat.com
Link: https://lore.kernel.org/r/YYKa3bfQZxK5/wDN@casper.infradead.org/ [1]
Link: https://lore.kernel.org/r/2408234.1628687271@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/162877311459.3085614.10601478228012245108.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/162981153551.1901565.3124454657133703341.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/163005745264.2472992.9852048135392188995.stgit@warthog.procyon.org.uk/ # v2
Link: https://lore.kernel.org/r/163584187452.4023316.500389675405550116.stgit@warthog.procyon.org.uk/ # v3
Link: https://lore.kernel.org/r/163649328026.309189.1124218109373941936.stgit@warthog.procyon.org.uk/ # v4
Link: https://lore.kernel.org/r/163657852454.834781.9265101983152100556.stgit@warthog.procyon.org.uk/ # v5
---
 fs/9p/vfs_addr.c           |  83 ++++++-----
 fs/9p/vfs_file.c           |  20 +--
 fs/afs/file.c              |  70 +++++----
 fs/afs/internal.h          |  46 +++---
 fs/afs/write.c             | 347 ++++++++++++++++++++++-----------------------
 fs/ceph/addr.c             |  80 ++++++-----
 fs/netfs/read_helper.c     | 165 ++++++++++-----------
 include/linux/netfs.h      |  12 +-
 include/trace/events/afs.h |  21 +--
 9 files changed, 428 insertions(+), 416 deletions(-)

(limited to 'include/linux')

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index adafdf86f42f..fac918ccb305 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -108,7 +108,9 @@ static const struct netfs_read_request_ops v9fs_req_ops = {
  */
 static int v9fs_vfs_readpage(struct file *file, struct page *page)
 {
-	return netfs_readpage(file, page, &v9fs_req_ops, NULL);
+	struct folio *folio = page_folio(page);
+
+	return netfs_readpage(file, folio, &v9fs_req_ops, NULL);
 }
 
 /**
@@ -130,13 +132,15 @@ static void v9fs_vfs_readahead(struct readahead_control *ractl)
 
 static int v9fs_release_page(struct page *page, gfp_t gfp)
 {
-	if (PagePrivate(page))
+	struct folio *folio = page_folio(page);
+
+	if (folio_test_private(folio))
 		return 0;
 #ifdef CONFIG_9P_FSCACHE
-	if (PageFsCache(page)) {
+	if (folio_test_fscache(folio)) {
 		if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
 			return 0;
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	}
 #endif
 	return 1;
@@ -152,55 +156,58 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
 static void v9fs_invalidate_page(struct page *page, unsigned int offset,
 				 unsigned int length)
 {
-	wait_on_page_fscache(page);
+	struct folio *folio = page_folio(page);
+
+	folio_wait_fscache(folio);
 }
 
-static int v9fs_vfs_writepage_locked(struct page *page)
+static int v9fs_vfs_write_folio_locked(struct folio *folio)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = folio_inode(folio);
 	struct v9fs_inode *v9inode = V9FS_I(inode);
-	loff_t start = page_offset(page);
-	loff_t size = i_size_read(inode);
+	loff_t start = folio_pos(folio);
+	loff_t i_size = i_size_read(inode);
 	struct iov_iter from;
-	int err, len;
+	size_t len = folio_size(folio);
+	int err;
+
+	if (start >= i_size)
+		return 0; /* Simultaneous truncation occurred */
 
-	if (page->index == size >> PAGE_SHIFT)
-		len = size & ~PAGE_MASK;
-	else
-		len = PAGE_SIZE;
+	len = min_t(loff_t, i_size - start, len);
 
-	iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len);
+	iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);
 
 	/* We should have writeback_fid always set */
 	BUG_ON(!v9inode->writeback_fid);
 
-	set_page_writeback(page);
+	folio_start_writeback(folio);
 
 	p9_client_write(v9inode->writeback_fid, start, &from, &err);
 
-	end_page_writeback(page);
+	folio_end_writeback(folio);
 	return err;
 }
 
 static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
 {
+	struct folio *folio = page_folio(page);
 	int retval;
 
-	p9_debug(P9_DEBUG_VFS, "page %p\n", page);
+	p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
 
-	retval = v9fs_vfs_writepage_locked(page);
+	retval = v9fs_vfs_write_folio_locked(folio);
 	if (retval < 0) {
 		if (retval == -EAGAIN) {
-			redirty_page_for_writepage(wbc, page);
+			folio_redirty_for_writepage(wbc, folio);
 			retval = 0;
 		} else {
-			SetPageError(page);
-			mapping_set_error(page->mapping, retval);
+			mapping_set_error(folio_mapping(folio), retval);
 		}
 	} else
 		retval = 0;
 
-	unlock_page(page);
+	folio_unlock(folio);
 	return retval;
 }
 
@@ -213,14 +220,15 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
 
 static int v9fs_launder_page(struct page *page)
 {
+	struct folio *folio = page_folio(page);
 	int retval;
 
-	if (clear_page_dirty_for_io(page)) {
-		retval = v9fs_vfs_writepage_locked(page);
+	if (folio_clear_dirty_for_io(folio)) {
+		retval = v9fs_vfs_write_folio_locked(folio);
 		if (retval)
 			return retval;
 	}
-	wait_on_page_fscache(page);
+	folio_wait_fscache(folio);
 	return 0;
 }
 
@@ -265,10 +273,10 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
 			    loff_t pos, unsigned int len, unsigned int flags,
-			    struct page **pagep, void **fsdata)
+			    struct page **subpagep, void **fsdata)
 {
 	int retval;
-	struct page *page;
+	struct folio *folio;
 	struct v9fs_inode *v9inode = V9FS_I(mapping->host);
 
 	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
@@ -279,31 +287,32 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
 	 * file.  We need to do this before we get a lock on the page in case
 	 * there's more than one writer competing for the same cache block.
 	 */
-	retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata,
+	retval = netfs_write_begin(filp, mapping, pos, len, flags, &folio, fsdata,
 				   &v9fs_req_ops, NULL);
 	if (retval < 0)
 		return retval;
 
-	*pagep = find_subpage(page, pos / PAGE_SIZE);
+	*subpagep = &folio->page;
 	return retval;
 }
 
 static int v9fs_write_end(struct file *filp, struct address_space *mapping,
 			  loff_t pos, unsigned int len, unsigned int copied,
-			  struct page *page, void *fsdata)
+			  struct page *subpage, void *fsdata)
 {
 	loff_t last_pos = pos + copied;
-	struct inode *inode = page->mapping->host;
+	struct folio *folio = page_folio(subpage);
+	struct inode *inode = mapping->host;
 
 	p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		if (unlikely(copied < len)) {
 			copied = 0;
 			goto out;
 		}
 
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	/*
@@ -314,10 +323,10 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
 		inode_add_bytes(inode, last_pos - inode->i_size);
 		i_size_write(inode, last_pos);
 	}
-	set_page_dirty(page);
+	folio_mark_dirty(folio);
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 
 	return copied;
 }
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 4244d48398ef..612e297f3763 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -528,13 +528,13 @@ static vm_fault_t
 v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
 	struct v9fs_inode *v9inode;
-	struct page *page = vmf->page;
+	struct folio *folio = page_folio(vmf->page);
 	struct file *filp = vmf->vma->vm_file;
 	struct inode *inode = file_inode(filp);
 
 
-	p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
-		 page, (unsigned long)filp->private_data);
+	p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
+		 folio, (unsigned long)filp->private_data);
 
 	v9inode = V9FS_I(inode);
 
@@ -542,24 +542,24 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 	 * be modified.  We then assume the entire page will need writing back.
 	 */
 #ifdef CONFIG_9P_FSCACHE
-	if (PageFsCache(page) &&
-	    wait_on_page_fscache_killable(page) < 0)
-		return VM_FAULT_RETRY;
+	if (folio_test_fscache(folio) &&
+	    folio_wait_fscache_killable(folio) < 0)
+		return VM_FAULT_NOPAGE;
 #endif
 
 	/* Update file times before taking page lock */
 	file_update_time(filp);
 
 	BUG_ON(!v9inode->writeback_fid);
-	if (lock_page_killable(page) < 0)
+	if (folio_lock_killable(folio) < 0)
 		return VM_FAULT_RETRY;
-	if (page->mapping != inode->i_mapping)
+	if (folio_mapping(folio) != inode->i_mapping)
 		goto out_unlock;
-	wait_for_stable_page(page);
+	folio_wait_stable(folio);
 
 	return VM_FAULT_LOCKED;
 out_unlock:
-	unlock_page(page);
+	folio_unlock(folio);
 	return VM_FAULT_NOPAGE;
 }
 
diff --git a/fs/afs/file.c b/fs/afs/file.c
index eb11d047c0ae..cb6ad61eec3b 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -324,21 +324,24 @@ static int afs_symlink_readpage(struct file *file, struct page *page)
 {
 	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
 	struct afs_read *fsreq;
+	struct folio *folio = page_folio(page);
 	int ret;
 
 	fsreq = afs_alloc_read(GFP_NOFS);
 	if (!fsreq)
 		return -ENOMEM;
 
-	fsreq->pos	= page->index * PAGE_SIZE;
-	fsreq->len	= PAGE_SIZE;
+	fsreq->pos	= folio_pos(folio);
+	fsreq->len	= folio_size(folio);
 	fsreq->vnode	= vnode;
 	fsreq->iter	= &fsreq->def_iter;
 	iov_iter_xarray(&fsreq->def_iter, READ, &page->mapping->i_pages,
 			fsreq->pos, fsreq->len);
 
 	ret = afs_fetch_data(fsreq->vnode, fsreq);
-	page_endio(page, false, ret);
+	if (ret == 0)
+		SetPageUptodate(page);
+	unlock_page(page);
 	return ret;
 }
 
@@ -362,7 +365,7 @@ static int afs_begin_cache_operation(struct netfs_read_request *rreq)
 }
 
 static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
-				 struct page *page, void **_fsdata)
+				 struct folio *folio, void **_fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
@@ -385,7 +388,9 @@ const struct netfs_read_request_ops afs_req_ops = {
 
 static int afs_readpage(struct file *file, struct page *page)
 {
-	return netfs_readpage(file, page, &afs_req_ops, NULL);
+	struct folio *folio = page_folio(page);
+
+	return netfs_readpage(file, folio, &afs_req_ops, NULL);
 }
 
 static void afs_readahead(struct readahead_control *ractl)
@@ -397,29 +402,29 @@ static void afs_readahead(struct readahead_control *ractl)
  * Adjust the dirty region of the page on truncation or full invalidation,
  * getting rid of the markers altogether if the region is entirely invalidated.
  */
-static void afs_invalidate_dirty(struct page *page, unsigned int offset,
+static void afs_invalidate_dirty(struct folio *folio, unsigned int offset,
 				 unsigned int length)
 {
-	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 	unsigned long priv;
 	unsigned int f, t, end = offset + length;
 
-	priv = page_private(page);
+	priv = (unsigned long)folio_get_private(folio);
 
 	/* we clean up only if the entire page is being invalidated */
-	if (offset == 0 && length == thp_size(page))
+	if (offset == 0 && length == folio_size(folio))
 		goto full_invalidate;
 
 	 /* If the page was dirtied by page_mkwrite(), the PTE stays writable
 	  * and we don't get another notification to tell us to expand it
 	  * again.
 	  */
-	if (afs_is_page_dirty_mmapped(priv))
+	if (afs_is_folio_dirty_mmapped(priv))
 		return;
 
 	/* We may need to shorten the dirty region */
-	f = afs_page_dirty_from(page, priv);
-	t = afs_page_dirty_to(page, priv);
+	f = afs_folio_dirty_from(folio, priv);
+	t = afs_folio_dirty_to(folio, priv);
 
 	if (t <= offset || f >= end)
 		return; /* Doesn't overlap */
@@ -437,17 +442,17 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
 	if (f == t)
 		goto undirty;
 
-	priv = afs_page_dirty(page, f, t);
-	set_page_private(page, priv);
-	trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
+	priv = afs_folio_dirty(folio, f, t);
+	folio_change_private(folio, (void *)priv);
+	trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
 	return;
 
 undirty:
-	trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
-	clear_page_dirty_for_io(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
+	folio_clear_dirty_for_io(folio);
 full_invalidate:
-	trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
-	detach_page_private(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
+	folio_detach_private(folio);
 }
 
 /*
@@ -458,14 +463,16 @@ full_invalidate:
 static void afs_invalidatepage(struct page *page, unsigned int offset,
 			       unsigned int length)
 {
-	_enter("{%lu},%u,%u", page->index, offset, length);
+	struct folio *folio = page_folio(page);
+
+	_enter("{%lu},%u,%u", folio_index(folio), offset, length);
 
 	BUG_ON(!PageLocked(page));
 
 	if (PagePrivate(page))
-		afs_invalidate_dirty(page, offset, length);
+		afs_invalidate_dirty(folio, offset, length);
 
-	wait_on_page_fscache(page);
+	folio_wait_fscache(folio);
 	_leave("");
 }
 
@@ -475,30 +482,31 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
  */
 static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 {
-	struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+	struct folio *folio = page_folio(page);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 
 	_enter("{{%llx:%llu}[%lu],%lx},%x",
-	       vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
+	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
 	       gfp_flags);
 
 	/* deny if page is being written to the cache and the caller hasn't
 	 * elected to wait */
 #ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page)) {
+	if (folio_test_fscache(folio)) {
 		if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
 			return false;
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	}
 #endif
 
-	if (PagePrivate(page)) {
-		trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
-		detach_page_private(page);
+	if (folio_test_private(folio)) {
+		trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
+		folio_detach_private(folio);
 	}
 
-	/* indicate that the page can be released */
+	/* Indicate that the folio can be released */
 	_leave(" = T");
-	return 1;
+	return true;
 }
 
 static void afs_add_open_mmap(struct afs_vnode *vnode)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 9357c53faa69..aa4c0d6c9780 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -876,59 +876,59 @@ struct afs_vnode_cache_aux {
 } __packed;
 
 /*
- * We use page->private to hold the amount of the page that we've written to,
+ * We use folio->private to hold the amount of the folio that we've written to,
  * splitting the field into two parts.  However, we need to represent a range
- * 0...PAGE_SIZE, so we reduce the resolution if the size of the page
+ * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
  * exceeds what we can encode.
  */
 #ifdef CONFIG_64BIT
-#define __AFS_PAGE_PRIV_MASK	0x7fffffffUL
-#define __AFS_PAGE_PRIV_SHIFT	32
-#define __AFS_PAGE_PRIV_MMAPPED	0x80000000UL
+#define __AFS_FOLIO_PRIV_MASK		0x7fffffffUL
+#define __AFS_FOLIO_PRIV_SHIFT		32
+#define __AFS_FOLIO_PRIV_MMAPPED	0x80000000UL
 #else
-#define __AFS_PAGE_PRIV_MASK	0x7fffUL
-#define __AFS_PAGE_PRIV_SHIFT	16
-#define __AFS_PAGE_PRIV_MMAPPED	0x8000UL
+#define __AFS_FOLIO_PRIV_MASK		0x7fffUL
+#define __AFS_FOLIO_PRIV_SHIFT		16
+#define __AFS_FOLIO_PRIV_MMAPPED	0x8000UL
 #endif
 
-static inline unsigned int afs_page_dirty_resolution(struct page *page)
+static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
 {
-	int shift = thp_order(page) + PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
+	int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
 	return (shift > 0) ? shift : 0;
 }
 
-static inline size_t afs_page_dirty_from(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
 {
-	unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
+	unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
 
 	/* The lower bound is inclusive */
-	return x << afs_page_dirty_resolution(page);
+	return x << afs_folio_dirty_resolution(folio);
 }
 
-static inline size_t afs_page_dirty_to(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
 {
-	unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
+	unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
 
 	/* The upper bound is immediately beyond the region */
-	return (x + 1) << afs_page_dirty_resolution(page);
+	return (x + 1) << afs_folio_dirty_resolution(folio);
 }
 
-static inline unsigned long afs_page_dirty(struct page *page, size_t from, size_t to)
+static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
 {
-	unsigned int res = afs_page_dirty_resolution(page);
+	unsigned int res = afs_folio_dirty_resolution(folio);
 	from >>= res;
 	to = (to - 1) >> res;
-	return (to << __AFS_PAGE_PRIV_SHIFT) | from;
+	return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
 }
 
-static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
+static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
 {
-	return priv | __AFS_PAGE_PRIV_MMAPPED;
+	return priv | __AFS_FOLIO_PRIV_MMAPPED;
 }
 
-static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
+static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
 {
-	return priv & __AFS_PAGE_PRIV_MMAPPED;
+	return priv & __AFS_FOLIO_PRIV_MMAPPED;
 }
 
 #include <trace/events/afs.h>
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 8b1d9c2f6bec..ca4909baf5e6 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -32,7 +32,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 		    struct page **_page, void **fsdata)
 {
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-	struct page *page;
+	struct folio *folio;
 	unsigned long priv;
 	unsigned f, from;
 	unsigned t, to;
@@ -46,12 +46,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	 * file.  We need to do this before we get a lock on the page in case
 	 * there's more than one writer competing for the same cache block.
 	 */
-	ret = netfs_write_begin(file, mapping, pos, len, flags, &page, fsdata,
+	ret = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata,
 				&afs_req_ops, NULL);
 	if (ret < 0)
 		return ret;
 
-	index = page->index;
+	index = folio_index(folio);
 	from = pos - index * PAGE_SIZE;
 	to = from + len;
 
@@ -59,14 +59,14 @@ try_again:
 	/* See if this page is already partially written in a way that we can
 	 * merge the new write with.
 	 */
-	if (PagePrivate(page)) {
-		priv = page_private(page);
-		f = afs_page_dirty_from(page, priv);
-		t = afs_page_dirty_to(page, priv);
+	if (folio_test_private(folio)) {
+		priv = (unsigned long)folio_get_private(folio);
+		f = afs_folio_dirty_from(folio, priv);
+		t = afs_folio_dirty_to(folio, priv);
 		ASSERTCMP(f, <=, t);
 
-		if (PageWriteback(page)) {
-			trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page);
+		if (folio_test_writeback(folio)) {
+			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
 			goto flush_conflicting_write;
 		}
 		/* If the file is being filled locally, allow inter-write
@@ -78,7 +78,7 @@ try_again:
 			goto flush_conflicting_write;
 	}
 
-	*_page = page;
+	*_page = &folio->page;
 	_leave(" = 0");
 	return 0;
 
@@ -87,17 +87,17 @@ try_again:
 	 */
 flush_conflicting_write:
 	_debug("flush conflict");
-	ret = write_one_page(page);
+	ret = folio_write_one(folio);
 	if (ret < 0)
 		goto error;
 
-	ret = lock_page_killable(page);
+	ret = folio_lock_killable(folio);
 	if (ret < 0)
 		goto error;
 	goto try_again;
 
 error:
-	put_page(page);
+	folio_put(folio);
 	_leave(" = %d", ret);
 	return ret;
 }
@@ -107,24 +107,25 @@ error:
  */
 int afs_write_end(struct file *file, struct address_space *mapping,
 		  loff_t pos, unsigned len, unsigned copied,
-		  struct page *page, void *fsdata)
+		  struct page *subpage, void *fsdata)
 {
+	struct folio *folio = page_folio(subpage);
 	struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 	unsigned long priv;
-	unsigned int f, from = pos & (thp_size(page) - 1);
+	unsigned int f, from = offset_in_folio(folio, pos);
 	unsigned int t, to = from + copied;
 	loff_t i_size, maybe_i_size;
 
 	_enter("{%llx:%llu},{%lx}",
-	       vnode->fid.vid, vnode->fid.vnode, page->index);
+	       vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		if (copied < len) {
 			copied = 0;
 			goto out;
 		}
 
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	if (copied == 0)
@@ -141,29 +142,29 @@ int afs_write_end(struct file *file, struct address_space *mapping,
 		write_sequnlock(&vnode->cb_lock);
 	}
 
-	if (PagePrivate(page)) {
-		priv = page_private(page);
-		f = afs_page_dirty_from(page, priv);
-		t = afs_page_dirty_to(page, priv);
+	if (folio_test_private(folio)) {
+		priv = (unsigned long)folio_get_private(folio);
+		f = afs_folio_dirty_from(folio, priv);
+		t = afs_folio_dirty_to(folio, priv);
 		if (from < f)
 			f = from;
 		if (to > t)
 			t = to;
-		priv = afs_page_dirty(page, f, t);
-		set_page_private(page, priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page);
+		priv = afs_folio_dirty(folio, f, t);
+		folio_change_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
 	} else {
-		priv = afs_page_dirty(page, from, to);
-		attach_page_private(page, (void *)priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page);
+		priv = afs_folio_dirty(folio, from, to);
+		folio_attach_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
 	}
 
-	if (set_page_dirty(page))
-		_debug("dirtied %lx", page->index);
+	if (folio_mark_dirty(folio))
+		_debug("dirtied %lx", folio_index(folio));
 
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 	return copied;
 }
 
@@ -174,40 +175,32 @@ static void afs_kill_pages(struct address_space *mapping,
 			   loff_t start, loff_t len)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct pagevec pv;
-	unsigned int loop, psize;
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
 	_enter("{%llx:%llu},%llx @%llx",
 	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	pagevec_init(&pv);
-
 	do {
-		_debug("kill %llx @%llx", len, start);
-
-		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-					      PAGEVEC_SIZE, pv.pages);
-		if (pv.nr == 0)
-			break;
+		_debug("kill %lx (to %lx)", index, last);
 
-		for (loop = 0; loop < pv.nr; loop++) {
-			struct page *page = pv.pages[loop];
+		folio = filemap_get_folio(mapping, index);
+		if (!folio) {
+			next = index + 1;
+			continue;
+		}
 
-			if (page->index * PAGE_SIZE >= start + len)
-				break;
+		next = folio_next_index(folio);
 
-			psize = thp_size(page);
-			start += psize;
-			len -= psize;
-			ClearPageUptodate(page);
-			end_page_writeback(page);
-			lock_page(page);
-			generic_error_remove_page(mapping, page);
-			unlock_page(page);
-		}
+		folio_clear_uptodate(folio);
+		folio_end_writeback(folio);
+		folio_lock(folio);
+		generic_error_remove_page(mapping, &folio->page);
+		folio_unlock(folio);
+		folio_put(folio);
 
-		__pagevec_release(&pv);
-	} while (len > 0);
+	} while (index = next, index <= last);
 
 	_leave("");
 }
@@ -220,37 +213,27 @@ static void afs_redirty_pages(struct writeback_control *wbc,
 			      loff_t start, loff_t len)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-	struct pagevec pv;
-	unsigned int loop, psize;
+	struct folio *folio;
+	pgoff_t index = start / PAGE_SIZE;
+	pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
 	_enter("{%llx:%llu},%llx @%llx",
 	       vnode->fid.vid, vnode->fid.vnode, len, start);
 
-	pagevec_init(&pv);
-
 	do {
 		_debug("redirty %llx @%llx", len, start);
 
-		pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-					      PAGEVEC_SIZE, pv.pages);
-		if (pv.nr == 0)
-			break;
-
-		for (loop = 0; loop < pv.nr; loop++) {
-			struct page *page = pv.pages[loop];
-
-			if (page->index * PAGE_SIZE >= start + len)
-				break;
-
-			psize = thp_size(page);
-			start += psize;
-			len -= psize;
-			redirty_page_for_writepage(wbc, page);
-			end_page_writeback(page);
+		folio = filemap_get_folio(mapping, index);
+		if (!folio) {
+			next = index + 1;
+			continue;
 		}
 
-		__pagevec_release(&pv);
-	} while (len > 0);
+		next = index + folio_nr_pages(folio);
+		folio_redirty_for_writepage(wbc, folio);
+		folio_end_writeback(folio);
+		folio_put(folio);
+	} while (index = next, index <= last);
 
 	_leave("");
 }
@@ -261,7 +244,7 @@ static void afs_redirty_pages(struct writeback_control *wbc,
 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
 {
 	struct address_space *mapping = vnode->vfs_inode.i_mapping;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t end;
 
 	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
@@ -272,15 +255,16 @@ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsign
 	rcu_read_lock();
 
 	end = (start + len - 1) / PAGE_SIZE;
-	xas_for_each(&xas, page, end) {
-		if (!PageWriteback(page)) {
-			kdebug("bad %x @%llx page %lx %lx", len, start, page->index, end);
-			ASSERT(PageWriteback(page));
+	xas_for_each(&xas, folio, end) {
+		if (!folio_test_writeback(folio)) {
+			kdebug("bad %x @%llx page %lx %lx",
+			       len, start, folio_index(folio), end);
+			ASSERT(folio_test_writeback(folio));
 		}
 
-		trace_afs_page_dirty(vnode, tracepoint_string("clear"), page);
-		detach_page_private(page);
-		page_endio(page, true, 0);
+		trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
+		folio_detach_private(folio);
+		folio_end_writeback(folio);
 	}
 
 	rcu_read_unlock();
@@ -437,7 +421,7 @@ static void afs_extend_writeback(struct address_space *mapping,
 				 unsigned int *_len)
 {
 	struct pagevec pvec;
-	struct page *page;
+	struct folio *folio;
 	unsigned long priv;
 	unsigned int psize, filler = 0;
 	unsigned int f, t;
@@ -456,43 +440,43 @@ static void afs_extend_writeback(struct address_space *mapping,
 		 */
 		rcu_read_lock();
 
-		xas_for_each(&xas, page, ULONG_MAX) {
+		xas_for_each(&xas, folio, ULONG_MAX) {
 			stop = true;
-			if (xas_retry(&xas, page))
+			if (xas_retry(&xas, folio))
 				continue;
-			if (xa_is_value(page))
+			if (xa_is_value(folio))
 				break;
-			if (page->index != index)
+			if (folio_index(folio) != index)
 				break;
 
-			if (!page_cache_get_speculative(page)) {
+			if (!folio_try_get_rcu(folio)) {
 				xas_reset(&xas);
 				continue;
 			}
 
 			/* Has the page moved or been split? */
-			if (unlikely(page != xas_reload(&xas))) {
-				put_page(page);
+			if (unlikely(folio != xas_reload(&xas))) {
+				folio_put(folio);
 				break;
 			}
 
-			if (!trylock_page(page)) {
-				put_page(page);
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
 				break;
 			}
-			if (!PageDirty(page) || PageWriteback(page)) {
-				unlock_page(page);
-				put_page(page);
+			if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
+				folio_unlock(folio);
+				folio_put(folio);
 				break;
 			}
 
-			psize = thp_size(page);
-			priv = page_private(page);
-			f = afs_page_dirty_from(page, priv);
-			t = afs_page_dirty_to(page, priv);
+			psize = folio_size(folio);
+			priv = (unsigned long)folio_get_private(folio);
+			f = afs_folio_dirty_from(folio, priv);
+			t = afs_folio_dirty_to(folio, priv);
 			if (f != 0 && !new_content) {
-				unlock_page(page);
-				put_page(page);
+				folio_unlock(folio);
+				folio_put(folio);
 				break;
 			}
 
@@ -503,8 +487,8 @@ static void afs_extend_writeback(struct address_space *mapping,
 			else if (t == psize || new_content)
 				stop = false;
 
-			index += thp_nr_pages(page);
-			if (!pagevec_add(&pvec, page))
+			index += folio_nr_pages(folio);
+			if (!pagevec_add(&pvec, &folio->page))
 				break;
 			if (stop)
 				break;
@@ -521,16 +505,16 @@ static void afs_extend_writeback(struct address_space *mapping,
 			break;
 
 		for (i = 0; i < pagevec_count(&pvec); i++) {
-			page = pvec.pages[i];
-			trace_afs_page_dirty(vnode, tracepoint_string("store+"), page);
+			folio = page_folio(pvec.pages[i]);
+			trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
 
-			if (!clear_page_dirty_for_io(page))
+			if (!folio_clear_dirty_for_io(folio))
 				BUG();
-			if (test_set_page_writeback(page))
+			if (folio_start_writeback(folio))
 				BUG();
 
-			*_count -= thp_nr_pages(page);
-			unlock_page(page);
+			*_count -= folio_nr_pages(folio);
+			folio_unlock(folio);
 		}
 
 		pagevec_release(&pvec);
@@ -544,10 +528,10 @@ static void afs_extend_writeback(struct address_space *mapping,
  * Synchronously write back the locked page and any subsequent non-locked dirty
  * pages.
  */
-static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
-					       struct writeback_control *wbc,
-					       struct page *page,
-					       loff_t start, loff_t end)
+static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
+						struct writeback_control *wbc,
+						struct folio *folio,
+						loff_t start, loff_t end)
 {
 	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
 	struct iov_iter iter;
@@ -558,22 +542,22 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
 	long count = wbc->nr_to_write;
 	int ret;
 
-	_enter(",%lx,%llx-%llx", page->index, start, end);
+	_enter(",%lx,%llx-%llx", folio_index(folio), start, end);
 
-	if (test_set_page_writeback(page))
+	if (folio_start_writeback(folio))
 		BUG();
 
-	count -= thp_nr_pages(page);
+	count -= folio_nr_pages(folio);
 
 	/* Find all consecutive lockable dirty pages that have contiguous
 	 * written regions, stopping when we find a page that is not
 	 * immediately lockable, is not dirty or is missing, or we reach the
 	 * end of the range.
 	 */
-	priv = page_private(page);
-	offset = afs_page_dirty_from(page, priv);
-	to = afs_page_dirty_to(page, priv);
-	trace_afs_page_dirty(vnode, tracepoint_string("store"), page);
+	priv = (unsigned long)folio_get_private(folio);
+	offset = afs_folio_dirty_from(folio, priv);
+	to = afs_folio_dirty_to(folio, priv);
+	trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
 
 	len = to - offset;
 	start += offset;
@@ -586,7 +570,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
 		max_len = min_t(unsigned long long, max_len, i_size - start);
 
 		if (len < max_len &&
-		    (to == thp_size(page) || new_content))
+		    (to == folio_size(folio) || new_content))
 			afs_extend_writeback(mapping, vnode, &count,
 					     start, max_len, new_content, &len);
 		len = min_t(loff_t, len, max_len);
@@ -596,7 +580,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
 	 * set; the first page is still locked at this point, but all the rest
 	 * have been unlocked.
 	 */
-	unlock_page(page);
+	folio_unlock(folio);
 
 	if (start < i_size) {
 		_debug("write back %x @%llx [%llx]", len, start, i_size);
@@ -657,16 +641,17 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
  * write a page back to the server
  * - the caller locked the page for us
  */
-int afs_writepage(struct page *page, struct writeback_control *wbc)
+int afs_writepage(struct page *subpage, struct writeback_control *wbc)
 {
+	struct folio *folio = page_folio(subpage);
 	ssize_t ret;
 	loff_t start;
 
-	_enter("{%lx},", page->index);
+	_enter("{%lx},", folio_index(folio));
 
-	start = page->index * PAGE_SIZE;
-	ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
-					      start, LLONG_MAX - start);
+	start = folio_index(folio) * PAGE_SIZE;
+	ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
+					       folio, start, LLONG_MAX - start);
 	if (ret < 0) {
 		_leave(" = %zd", ret);
 		return ret;
@@ -683,7 +668,8 @@ static int afs_writepages_region(struct address_space *mapping,
 				 struct writeback_control *wbc,
 				 loff_t start, loff_t end, loff_t *_next)
 {
-	struct page *page;
+	struct folio *folio;
+	struct page *head_page;
 	ssize_t ret;
 	int n;
 
@@ -693,13 +679,14 @@ static int afs_writepages_region(struct address_space *mapping,
 		pgoff_t index = start / PAGE_SIZE;
 
 		n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
-					     PAGECACHE_TAG_DIRTY, 1, &page);
+					     PAGECACHE_TAG_DIRTY, 1, &head_page);
 		if (!n)
 			break;
 
-		start = (loff_t)page->index * PAGE_SIZE; /* May regress with THPs */
+		folio = page_folio(head_page);
+		start = folio_pos(folio); /* May regress with THPs */
 
-		_debug("wback %lx", page->index);
+		_debug("wback %lx", folio_index(folio));
 
 		/* At this point we hold neither the i_pages lock nor the
 		 * page lock: the page may be truncated or invalidated
@@ -707,37 +694,38 @@ static int afs_writepages_region(struct address_space *mapping,
 		 * back from swapper_space to tmpfs file mapping
 		 */
 		if (wbc->sync_mode != WB_SYNC_NONE) {
-			ret = lock_page_killable(page);
+			ret = folio_lock_killable(folio);
 			if (ret < 0) {
-				put_page(page);
+				folio_put(folio);
 				return ret;
 			}
 		} else {
-			if (!trylock_page(page)) {
-				put_page(page);
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
 				return 0;
 			}
 		}
 
-		if (page->mapping != mapping || !PageDirty(page)) {
-			start += thp_size(page);
-			unlock_page(page);
-			put_page(page);
+		if (folio_mapping(folio) != mapping ||
+		    !folio_test_dirty(folio)) {
+			start += folio_size(folio);
+			folio_unlock(folio);
+			folio_put(folio);
 			continue;
 		}
 
-		if (PageWriteback(page)) {
-			unlock_page(page);
+		if (folio_test_writeback(folio)) {
+			folio_unlock(folio);
 			if (wbc->sync_mode != WB_SYNC_NONE)
-				wait_on_page_writeback(page);
-			put_page(page);
+				folio_wait_writeback(folio);
+			folio_put(folio);
 			continue;
 		}
 
-		if (!clear_page_dirty_for_io(page))
+		if (!folio_clear_dirty_for_io(folio))
 			BUG();
-		ret = afs_write_back_from_locked_page(mapping, wbc, page, start, end);
-		put_page(page);
+		ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+		folio_put(folio);
 		if (ret < 0) {
 			_leave(" = %zd", ret);
 			return ret;
@@ -862,7 +850,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
 	struct folio *folio = page_folio(vmf->page);
-	struct page *page = &folio->page;
 	struct file *file = vmf->vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -870,7 +857,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	unsigned long priv;
 	vm_fault_t ret = VM_FAULT_RETRY;
 
-	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
+	_enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
 	afs_validate(vnode, af->key);
 
@@ -880,18 +867,18 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 	 * be modified.  We then assume the entire page will need writing back.
 	 */
 #ifdef CONFIG_AFS_FSCACHE
-	if (PageFsCache(page) &&
-	    wait_on_page_fscache_killable(page) < 0)
+	if (folio_test_fscache(folio) &&
+	    folio_wait_fscache_killable(folio) < 0)
 		goto out;
 #endif
 
 	if (folio_wait_writeback_killable(folio))
 		goto out;
 
-	if (lock_page_killable(page) < 0)
+	if (folio_lock_killable(folio) < 0)
 		goto out;
 
-	/* We mustn't change page->private until writeback is complete as that
+	/* We mustn't change folio->private until writeback is complete as that
 	 * details the portion of the page we need to write back and we might
 	 * need to redirty the page if there's a problem.
 	 */
@@ -900,14 +887,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 		goto out;
 	}
 
-	priv = afs_page_dirty(page, 0, thp_size(page));
-	priv = afs_page_dirty_mmapped(priv);
-	if (PagePrivate(page)) {
-		set_page_private(page, priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite+"), page);
+	priv = afs_folio_dirty(folio, 0, folio_size(folio));
+	priv = afs_folio_dirty_mmapped(priv);
+	if (folio_test_private(folio)) {
+		folio_change_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
 	} else {
-		attach_page_private(page, (void *)priv);
-		trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), page);
+		folio_attach_private(folio, (void *)priv);
+		trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
 	}
 	file_update_time(file);
 
@@ -948,38 +935,38 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
 /*
  * Clean up a page during invalidation.
  */
-int afs_launder_page(struct page *page)
+int afs_launder_page(struct page *subpage)
 {
-	struct address_space *mapping = page->mapping;
-	struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+	struct folio *folio = page_folio(subpage);
+	struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 	struct iov_iter iter;
 	struct bio_vec bv[1];
 	unsigned long priv;
 	unsigned int f, t;
 	int ret = 0;
 
-	_enter("{%lx}", page->index);
+	_enter("{%lx}", folio_index(folio));
 
-	priv = page_private(page);
-	if (clear_page_dirty_for_io(page)) {
+	priv = (unsigned long)folio_get_private(folio);
+	if (folio_clear_dirty_for_io(folio)) {
 		f = 0;
-		t = thp_size(page);
-		if (PagePrivate(page)) {
-			f = afs_page_dirty_from(page, priv);
-			t = afs_page_dirty_to(page, priv);
+		t = folio_size(folio);
+		if (folio_test_private(folio)) {
+			f = afs_folio_dirty_from(folio, priv);
+			t = afs_folio_dirty_to(folio, priv);
 		}
 
-		bv[0].bv_page = page;
+		bv[0].bv_page = &folio->page;
 		bv[0].bv_offset = f;
 		bv[0].bv_len = t - f;
 		iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
 
-		trace_afs_page_dirty(vnode, tracepoint_string("launder"), page);
-		ret = afs_store_data(vnode, &iter, page_offset(page) + f, true);
+		trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
+		ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
 	}
 
-	trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page);
-	detach_page_private(page);
-	wait_on_page_fscache(page);
+	trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
+	folio_detach_private(folio);
+	folio_wait_fscache(folio);
 	return ret;
 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 99b80b5c7a93..04bbe853bcb1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -63,7 +63,7 @@
 	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-					struct page *page, void **_fsdata);
+					struct folio *folio, void **_fsdata);
 
 static inline struct ceph_snap_context *page_snap_context(struct page *page)
 {
@@ -317,13 +317,14 @@ static const struct netfs_read_request_ops ceph_netfs_read_ops = {
 };
 
 /* read a single page, without unlocking it. */
-static int ceph_readpage(struct file *file, struct page *page)
+static int ceph_readpage(struct file *file, struct page *subpage)
 {
+	struct folio *folio = page_folio(subpage);
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_vino vino = ceph_vino(inode);
-	u64 off = page_offset(page);
-	u64 len = thp_size(page);
+	size_t len = folio_size(folio);
+	u64 off = folio_file_pos(folio);
 
 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
 		/*
@@ -331,19 +332,19 @@ static int ceph_readpage(struct file *file, struct page *page)
 		 * into page cache while getting Fcr caps.
 		 */
 		if (off == 0) {
-			unlock_page(page);
+			folio_unlock(folio);
 			return -EINVAL;
 		}
-		zero_user_segment(page, 0, thp_size(page));
-		SetPageUptodate(page);
-		unlock_page(page);
+		zero_user_segment(&folio->page, 0, folio_size(folio));
+		folio_mark_uptodate(folio);
+		folio_unlock(folio);
 		return 0;
 	}
 
-	dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
-	     vino.ino, vino.snap, file, off, len, page, page->index);
+	dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
+	     vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
 
-	return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+	return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
 }
 
 static void ceph_readahead(struct readahead_control *ractl)
@@ -1187,18 +1188,18 @@ ceph_find_incompatible(struct page *page)
 }
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-					struct page *page, void **_fsdata)
+					struct folio *folio, void **_fsdata)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_snap_context *snapc;
 
-	snapc = ceph_find_incompatible(page);
+	snapc = ceph_find_incompatible(folio_page(folio, 0));
 	if (snapc) {
 		int r;
 
-		unlock_page(page);
-		put_page(page);
+		folio_unlock(folio);
+		folio_put(folio);
 		if (IS_ERR(snapc))
 			return PTR_ERR(snapc);
 
@@ -1216,12 +1217,12 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
  * clean, or already dirty within the same snap context.
  */
 static int ceph_write_begin(struct file *file, struct address_space *mapping,
-			    loff_t pos, unsigned len, unsigned flags,
+			    loff_t pos, unsigned len, unsigned aop_flags,
 			    struct page **pagep, void **fsdata)
 {
 	struct inode *inode = file_inode(file);
 	struct ceph_inode_info *ci = ceph_inode(inode);
-	struct page *page = NULL;
+	struct folio *folio = NULL;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int r;
 
@@ -1230,39 +1231,43 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 	 * for inline_version sent to the MDS.
 	 */
 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
-		page = grab_cache_page_write_begin(mapping, index, flags);
-		if (!page)
+		unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+		if (aop_flags & AOP_FLAG_NOFS)
+			fgp_flags |= FGP_NOFS;
+		folio = __filemap_get_folio(mapping, index, fgp_flags,
+					    mapping_gfp_mask(mapping));
+		if (!folio)
 			return -ENOMEM;
 
 		/*
 		 * The inline_version on a new inode is set to 1. If that's the
-		 * case, then the page is brand new and isn't yet Uptodate.
+		 * case, then the folio is brand new and isn't yet Uptodate.
 		 */
 		r = 0;
 		if (index == 0 && ci->i_inline_version != 1) {
-			if (!PageUptodate(page)) {
+			if (!folio_test_uptodate(folio)) {
 				WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
 					  ci->i_inline_version);
 				r = -EINVAL;
 			}
 			goto out;
 		}
-		zero_user_segment(page, 0, thp_size(page));
-		SetPageUptodate(page);
+		zero_user_segment(&folio->page, 0, folio_size(folio));
+		folio_mark_uptodate(folio);
 		goto out;
 	}
 
-	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+	r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
 			      &ceph_netfs_read_ops, NULL);
 out:
 	if (r == 0)
-		wait_on_page_fscache(page);
+		folio_wait_fscache(folio);
 	if (r < 0) {
-		if (page)
-			put_page(page);
+		if (folio)
+			folio_put(folio);
 	} else {
-		WARN_ON_ONCE(!PageLocked(page));
-		*pagep = page;
+		WARN_ON_ONCE(!folio_test_locked(folio));
+		*pagep = &folio->page;
 	}
 	return r;
 }
@@ -1273,32 +1278,33 @@ out:
  */
 static int ceph_write_end(struct file *file, struct address_space *mapping,
 			  loff_t pos, unsigned len, unsigned copied,
-			  struct page *page, void *fsdata)
+			  struct page *subpage, void *fsdata)
 {
+	struct folio *folio = page_folio(subpage);
 	struct inode *inode = file_inode(file);
 	bool check_cap = false;
 
-	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
-	     inode, page, (int)pos, (int)copied, (int)len);
+	dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
+	     inode, folio, (int)pos, (int)copied, (int)len);
 
-	if (!PageUptodate(page)) {
+	if (!folio_test_uptodate(folio)) {
 		/* just return that nothing was copied on a short copy */
 		if (copied < len) {
 			copied = 0;
 			goto out;
 		}
-		SetPageUptodate(page);
+		folio_mark_uptodate(folio);
 	}
 
 	/* did file size increase? */
 	if (pos+copied > i_size_read(inode))
 		check_cap = ceph_inode_set_size(inode, pos+copied);
 
-	set_page_dirty(page);
+	folio_mark_dirty(folio);
 
 out:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 
 	if (check_cap)
 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 994ec22d4040..9320a42dfaf9 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -230,7 +230,7 @@ static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async
 
 /*
  * Deal with the completion of writing the data to the cache.  We have to clear
- * the PG_fscache bits on the pages involved and release the caller's ref.
+ * the PG_fscache bits on the folios involved and release the caller's ref.
  *
  * May be called in softirq mode and we inherit a ref from the caller.
  */
@@ -238,7 +238,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
 					  bool was_async)
 {
 	struct netfs_read_subrequest *subreq;
-	struct page *page;
+	struct folio *folio;
 	pgoff_t unlocked = 0;
 	bool have_unlocked = false;
 
@@ -247,14 +247,14 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
 	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
 		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
 
-		xas_for_each(&xas, page, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
 			/* We might have multiple writes from the same huge
-			 * page, but we mustn't unlock a page more than once.
+			 * folio, but we mustn't unlock a folio more than once.
 			 */
-			if (have_unlocked && page->index <= unlocked)
+			if (have_unlocked && folio_index(folio) <= unlocked)
 				continue;
-			unlocked = page->index;
-			end_page_fscache(page);
+			unlocked = folio_index(folio);
+			folio_end_fscache(folio);
 			have_unlocked = true;
 		}
 	}
@@ -367,18 +367,17 @@ static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
 }
 
 /*
- * Unlock the pages in a read operation.  We need to set PG_fscache on any
- * pages we're going to write back before we unlock them.
+ * Unlock the folios in a read operation.  We need to set PG_fscache on any
+ * folios we're going to write back before we unlock them.
  */
 static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 {
 	struct netfs_read_subrequest *subreq;
-	struct page *page;
+	struct folio *folio;
 	unsigned int iopos, account = 0;
 	pgoff_t start_page = rreq->start / PAGE_SIZE;
 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
 	bool subreq_failed = false;
-	int i;
 
 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
 
@@ -403,9 +402,9 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
 
 	rcu_read_lock();
-	xas_for_each(&xas, page, last_page) {
-		unsigned int pgpos = (page->index - start_page) * PAGE_SIZE;
-		unsigned int pgend = pgpos + thp_size(page);
+	xas_for_each(&xas, folio, last_page) {
+		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+		unsigned int pgend = pgpos + folio_size(folio);
 		bool pg_failed = false;
 
 		for (;;) {
@@ -414,7 +413,7 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 				break;
 			}
 			if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
-				set_page_fscache(page);
+				folio_start_fscache(folio);
 			pg_failed |= subreq_failed;
 			if (pgend < iopos + subreq->len)
 				break;
@@ -433,17 +432,16 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 		}
 
 		if (!pg_failed) {
-			for (i = 0; i < thp_nr_pages(page); i++)
-				flush_dcache_page(page);
-			SetPageUptodate(page);
+			flush_dcache_folio(folio);
+			folio_mark_uptodate(folio);
 		}
 
-		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_PAGES, &rreq->flags)) {
-			if (page->index == rreq->no_unlock_page &&
-			    test_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags))
+		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+			if (folio_index(folio) == rreq->no_unlock_folio &&
+			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
 				_debug("no unlock");
 			else
-				unlock_page(page);
+				folio_unlock(folio);
 		}
 	}
 	rcu_read_unlock();
@@ -876,7 +874,6 @@ void netfs_readahead(struct readahead_control *ractl,
 		     void *netfs_priv)
 {
 	struct netfs_read_request *rreq;
-	struct page *page;
 	unsigned int debug_index = 0;
 	int ret;
 
@@ -911,11 +908,11 @@ void netfs_readahead(struct readahead_control *ractl,
 
 	} while (rreq->submitted < rreq->len);
 
-	/* Drop the refs on the pages here rather than in the cache or
+	/* Drop the refs on the folios here rather than in the cache or
 	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
 	 */
-	while ((page = readahead_page(ractl)))
-		put_page(page);
+	while (readahead_folio(ractl))
+		;
 
 	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
 	if (atomic_dec_and_test(&rreq->nr_rd_ops))
@@ -935,7 +932,7 @@ EXPORT_SYMBOL(netfs_readahead);
 /**
  * netfs_readpage - Helper to manage a readpage request
  * @file: The file to read from
- * @page: The page to read
+ * @folio: The folio to read
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
  *
@@ -950,7 +947,7 @@ EXPORT_SYMBOL(netfs_readahead);
  * This is usable whether or not caching is enabled.
  */
 int netfs_readpage(struct file *file,
-		   struct page *page,
+		   struct folio *folio,
 		   const struct netfs_read_request_ops *ops,
 		   void *netfs_priv)
 {
@@ -958,23 +955,23 @@ int netfs_readpage(struct file *file,
 	unsigned int debug_index = 0;
 	int ret;
 
-	_enter("%lx", page_index(page));
+	_enter("%lx", folio_index(folio));
 
 	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
 	if (!rreq) {
 		if (netfs_priv)
-			ops->cleanup(netfs_priv, page_file_mapping(page));
-		unlock_page(page);
+			ops->cleanup(netfs_priv, folio_file_mapping(folio));
+		folio_unlock(folio);
 		return -ENOMEM;
 	}
-	rreq->mapping	= page_file_mapping(page);
-	rreq->start	= page_file_offset(page);
-	rreq->len	= thp_size(page);
+	rreq->mapping	= folio_file_mapping(folio);
+	rreq->start	= folio_file_pos(folio);
+	rreq->len	= folio_size(folio);
 
 	if (ops->begin_cache_operation) {
 		ret = ops->begin_cache_operation(rreq);
 		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
-			unlock_page(page);
+			folio_unlock(folio);
 			goto out;
 		}
 	}
@@ -1012,40 +1009,40 @@ out:
 EXPORT_SYMBOL(netfs_readpage);
 
 /**
- * netfs_skip_page_read - prep a page for writing without reading first
- * @page: page being prepared
+ * netfs_skip_folio_read - prep a folio for writing without reading first
+ * @folio: The folio being prepared
  * @pos: starting position for the write
  * @len: length of write
  *
  * In some cases, write_begin doesn't need to read at all:
- * - full page write
- * - write that lies in a page that is completely beyond EOF
- * - write that covers the the page from start to EOF or beyond it
+ * - full folio write
+ * - write that lies in a folio that is completely beyond EOF
+ * - write that covers the folio from start to EOF or beyond it
  *
  * If any of these criteria are met, then zero out the unwritten parts
- * of the page and return true. Otherwise, return false.
+ * of the folio and return true. Otherwise, return false.
  */
-static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
+static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
 {
-	struct inode *inode = page->mapping->host;
+	struct inode *inode = folio_inode(folio);
 	loff_t i_size = i_size_read(inode);
-	size_t offset = offset_in_thp(page, pos);
+	size_t offset = offset_in_folio(folio, pos);
 
-	/* Full page write */
-	if (offset == 0 && len >= thp_size(page))
+	/* Full folio write */
+	if (offset == 0 && len >= folio_size(folio))
 		return true;
 
-	/* pos beyond last page in the file */
+	/* pos beyond last folio in the file */
 	if (pos - offset >= i_size)
 		goto zero_out;
 
-	/* Write that covers from the start of the page to EOF or beyond */
+	/* Write that covers from the start of the folio to EOF or beyond */
 	if (offset == 0 && (pos + len) >= i_size)
 		goto zero_out;
 
 	return false;
 zero_out:
-	zero_user_segments(page, 0, offset, offset + len, thp_size(page));
+	zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
 	return true;
 }
 
@@ -1054,9 +1051,9 @@ zero_out:
  * @file: The file to read from
  * @mapping: The mapping to read from
  * @pos: File position at which the write will begin
- * @len: The length of the write (may extend beyond the end of the page chosen)
- * @flags: AOP_* flags
- * @_page: Where to put the resultant page
+ * @len: The length of the write (may extend beyond the end of the folio chosen)
+ * @aop_flags: AOP_* flags
+ * @_folio: Where to put the resultant folio
  * @_fsdata: Place for the netfs to store a cookie
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
@@ -1072,37 +1069,41 @@ zero_out:
  * issue_op, is mandatory.
  *
  * The check_write_begin() operation can be provided to check for and flush
- * conflicting writes once the page is grabbed and locked.  It is passed a
+ * conflicting writes once the folio is grabbed and locked.  It is passed a
  * pointer to the fsdata cookie that gets returned to the VM to be passed to
  * write_end.  It is permitted to sleep.  It should return 0 if the request
- * should go ahead; unlock the page and return -EAGAIN to cause the page to be
- * regot; or return an error.
+ * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
+ * be regot; or return an error.
  *
  * This is usable whether or not caching is enabled.
  */
 int netfs_write_begin(struct file *file, struct address_space *mapping,
-		      loff_t pos, unsigned int len, unsigned int flags,
-		      struct page **_page, void **_fsdata,
+		      loff_t pos, unsigned int len, unsigned int aop_flags,
+		      struct folio **_folio, void **_fsdata,
 		      const struct netfs_read_request_ops *ops,
 		      void *netfs_priv)
 {
 	struct netfs_read_request *rreq;
-	struct page *page, *xpage;
+	struct folio *folio;
 	struct inode *inode = file_inode(file);
-	unsigned int debug_index = 0;
+	unsigned int debug_index = 0, fgp_flags;
 	pgoff_t index = pos >> PAGE_SHIFT;
 	int ret;
 
 	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
 
 retry:
-	page = grab_cache_page_write_begin(mapping, index, flags);
-	if (!page)
+	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+	if (aop_flags & AOP_FLAG_NOFS)
+		fgp_flags |= FGP_NOFS;
+	folio = __filemap_get_folio(mapping, index, fgp_flags,
+				    mapping_gfp_mask(mapping));
+	if (!folio)
 		return -ENOMEM;
 
 	if (ops->check_write_begin) {
 		/* Allow the netfs (eg. ceph) to flush conflicts. */
-		ret = ops->check_write_begin(file, pos, len, page, _fsdata);
+		ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
 		if (ret < 0) {
 			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
 			if (ret == -EAGAIN)
@@ -1111,28 +1112,28 @@ retry:
 		}
 	}
 
-	if (PageUptodate(page))
-		goto have_page;
+	if (folio_test_uptodate(folio))
+		goto have_folio;
 
 	/* If the page is beyond the EOF, we want to clear it - unless it's
 	 * within the cache granule containing the EOF, in which case we need
 	 * to preload the granule.
 	 */
 	if (!ops->is_cache_enabled(inode) &&
-	    netfs_skip_page_read(page, pos, len)) {
+	    netfs_skip_folio_read(folio, pos, len)) {
 		netfs_stat(&netfs_n_rh_write_zskip);
-		goto have_page_no_wait;
+		goto have_folio_no_wait;
 	}
 
 	ret = -ENOMEM;
 	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
 	if (!rreq)
 		goto error;
-	rreq->mapping		= page->mapping;
-	rreq->start		= page_offset(page);
-	rreq->len		= thp_size(page);
-	rreq->no_unlock_page	= page->index;
-	__set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags);
+	rreq->mapping		= folio_file_mapping(folio);
+	rreq->start		= folio_file_pos(folio);
+	rreq->len		= folio_size(folio);
+	rreq->no_unlock_folio	= folio_index(folio);
+	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
 	netfs_priv = NULL;
 
 	if (ops->begin_cache_operation) {
@@ -1147,14 +1148,14 @@ retry:
 	/* Expand the request to meet caching requirements and download
 	 * preferences.
 	 */
-	ractl._nr_pages = thp_nr_pages(page);
+	ractl._nr_pages = folio_nr_pages(folio);
 	netfs_rreq_expand(rreq, &ractl);
 	netfs_get_read_request(rreq);
 
-	/* We hold the page locks, so we can drop the references */
-	while ((xpage = readahead_page(&ractl)))
-		if (xpage != page)
-			put_page(xpage);
+	/* We hold the folio locks, so we can drop the references */
+	folio_get(folio);
+	while (readahead_folio(&ractl))
+		;
 
 	atomic_set(&rreq->nr_rd_ops, 1);
 	do {
@@ -1184,22 +1185,22 @@ retry:
 	if (ret < 0)
 		goto error;
 
-have_page:
-	ret = wait_on_page_fscache_killable(page);
+have_folio:
+	ret = folio_wait_fscache_killable(folio);
 	if (ret < 0)
 		goto error;
-have_page_no_wait:
+have_folio_no_wait:
 	if (netfs_priv)
 		ops->cleanup(netfs_priv, mapping);
-	*_page = page;
+	*_folio = folio;
 	_leave(" = 0");
 	return 0;
 
 error_put:
 	netfs_put_read_request(rreq, false);
 error:
-	unlock_page(page);
-	put_page(page);
+	folio_unlock(folio);
+	folio_put(folio);
 	if (netfs_priv)
 		ops->cleanup(netfs_priv, mapping);
 	_leave(" = %d", ret);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 12c4177f7703..ca0683b9e3d1 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -166,13 +166,13 @@ struct netfs_read_request {
 	short			error;		/* 0 or error that occurred */
 	loff_t			i_size;		/* Size of the file */
 	loff_t			start;		/* Start position */
-	pgoff_t			no_unlock_page;	/* Don't unlock this page after read */
+	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
 	refcount_t		usage;
 	unsigned long		flags;
 #define NETFS_RREQ_INCOMPLETE_IO	0	/* Some ioreqs terminated short or with error */
 #define NETFS_RREQ_WRITE_TO_CACHE	1	/* Need to write to the cache */
-#define NETFS_RREQ_NO_UNLOCK_PAGE	2	/* Don't unlock no_unlock_page on completion */
-#define NETFS_RREQ_DONT_UNLOCK_PAGES	3	/* Don't unlock the pages on completion */
+#define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
+#define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED		4	/* The request failed */
 #define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
 	const struct netfs_read_request_ops *netfs_ops;
@@ -190,7 +190,7 @@ struct netfs_read_request_ops {
 	void (*issue_op)(struct netfs_read_subrequest *subreq);
 	bool (*is_still_valid)(struct netfs_read_request *rreq);
 	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
-				 struct page *page, void **_fsdata);
+				 struct folio *folio, void **_fsdata);
 	void (*done)(struct netfs_read_request *rreq);
 	void (*cleanup)(struct address_space *mapping, void *netfs_priv);
 };
@@ -240,11 +240,11 @@ extern void netfs_readahead(struct readahead_control *,
 			    const struct netfs_read_request_ops *,
 			    void *);
 extern int netfs_readpage(struct file *,
-			  struct page *,
+			  struct folio *,
 			  const struct netfs_read_request_ops *,
 			  void *);
 extern int netfs_write_begin(struct file *, struct address_space *,
-			     loff_t, unsigned int, unsigned int, struct page **,
+			     loff_t, unsigned int, unsigned int, struct folio **,
 			     void **,
 			     const struct netfs_read_request_ops *,
 			     void *);
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index bca73e8c8cde..499f5fabd20f 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -1016,31 +1016,32 @@ TRACE_EVENT(afs_dir_check_failed,
 		      __entry->vnode, __entry->off, __entry->i_size)
 	    );
 
-TRACE_EVENT(afs_page_dirty,
-	    TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page),
+TRACE_EVENT(afs_folio_dirty,
+	    TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
 
-	    TP_ARGS(vnode, where, page),
+	    TP_ARGS(vnode, where, folio),
 
 	    TP_STRUCT__entry(
 		    __field(struct afs_vnode *,		vnode		)
 		    __field(const char *,		where		)
-		    __field(pgoff_t,			page		)
+		    __field(pgoff_t,			index		)
 		    __field(unsigned long,		from		)
 		    __field(unsigned long,		to		)
 			     ),
 
 	    TP_fast_assign(
+		    unsigned long priv = (unsigned long)folio_get_private(folio);
 		    __entry->vnode = vnode;
 		    __entry->where = where;
-		    __entry->page = page->index;
-		    __entry->from = afs_page_dirty_from(page, page->private);
-		    __entry->to = afs_page_dirty_to(page, page->private);
-		    __entry->to |= (afs_is_page_dirty_mmapped(page->private) ?
-				    (1UL << (BITS_PER_LONG - 1)) : 0);
+		    __entry->index = folio_index(folio);
+		    __entry->from  = afs_folio_dirty_from(folio, priv);
+		    __entry->to    = afs_folio_dirty_to(folio, priv);
+		    __entry->to   |= (afs_is_folio_dirty_mmapped(priv) ?
+				      (1UL << (BITS_PER_LONG - 1)) : 0);
 			   ),
 
 	    TP_printk("vn=%p %lx %s %lx-%lx%s",
-		      __entry->vnode, __entry->page, __entry->where,
+		      __entry->vnode, __entry->index, __entry->where,
 		      __entry->from,
 		      __entry->to & ~(1UL << (BITS_PER_LONG - 1)),
 		      __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
-- 
cgit v1.2.3


From 68dbbe7d5b4fde736d104cbbc9a2fce875562012 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Thu, 4 Nov 2021 17:31:58 +0900
Subject: libata: fix read log timeout value

Some ATA drives are very slow to respond to READ_LOG_EXT and
READ_LOG_DMA_EXT commands issued from ata_dev_configure() when the
device is revalidated right after resuming a system or inserting the
ATA adapter driver (e.g. ahci). The default 5s timeout
(ATA_EH_CMD_DFL_TIMEOUT) used for these commands is too short, causing
errors during the device configuration. Ex:

...
ata9: SATA max UDMA/133 abar m524288@0x9d200000 port 0x9d200400 irq 209
ata9: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
ata9.00: ATA-9: XXX  XXXXXXXXXXXXXXX, XXXXXXXX, max UDMA/133
ata9.00: qc timeout (cmd 0x2f)
ata9.00: Read log page 0x00 failed, Emask 0x4
ata9.00: Read log page 0x00 failed, Emask 0x40
ata9.00: NCQ Send/Recv Log not supported
ata9.00: Read log page 0x08 failed, Emask 0x40
ata9.00: 27344764928 sectors, multi 16: LBA48 NCQ (depth 32), AA
ata9.00: Read log page 0x00 failed, Emask 0x40
ata9.00: ATA Identify Device Log not supported
ata9.00: failed to set xfermode (err_mask=0x40)
ata9: SATA link up 6.0 Gbps (SStatus 133 SControl 300)
ata9.00: configured for UDMA/133
...

The timeout error causes a soft reset of the drive link, followed in
most cases by a successful revalidation as that give enough time to the
drive to become fully ready to quickly process the read log commands.
However, in some cases, this also fails resulting in the device being
dropped.

Fix this by using adding the ata_eh_revalidate_timeouts entries for the
READ_LOG_EXT and READ_LOG_DMA_EXT commands. This defines a timeout
increased to 15s, retriable one time.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 drivers/ata/libata-eh.c | 8 ++++++++
 include/linux/libata.h  | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index bf9c4b6c5c3d..1d4a6f1e88cd 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -93,6 +93,12 @@ static const unsigned long ata_eh_identify_timeouts[] = {
 	ULONG_MAX,
 };
 
+static const unsigned long ata_eh_revalidate_timeouts[] = {
+	15000,	/* Some drives are slow to read log pages when waking-up */
+	15000,  /* combined time till here is enough even for media access */
+	ULONG_MAX,
+};
+
 static const unsigned long ata_eh_flush_timeouts[] = {
 	15000,	/* be generous with flush */
 	15000,  /* ditto */
@@ -129,6 +135,8 @@ static const struct ata_eh_cmd_timeout_ent
 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = {
 	{ .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI),
 	  .timeouts = ata_eh_identify_timeouts, },
+	{ .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT),
+	  .timeouts = ata_eh_revalidate_timeouts, },
 	{ .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT),
 	  .timeouts = ata_eh_other_timeouts, },
 	{ .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT),
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 2884383f1718..5331557316e8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -394,7 +394,7 @@ enum {
 	/* This should match the actual table size of
 	 * ata_eh_cmd_timeout_table in libata-eh.c.
 	 */
-	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6,
+	ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7,
 
 	/* Horkage types. May be set by libata or controller on drives
 	   (some horkage may be drive/controller pair dependent */
-- 
cgit v1.2.3


From a25efb3863d068929f0bbeb87a995df11507e691 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 23 Sep 2021 13:57:42 +0200
Subject: dma-buf: add dma_fence_describe and dma_resv_describe v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add functions to dump dma_fence and dma_resv objects into a seq_file and
use them for printing the debugfs information.

v2: fix missing include reported by test robot.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211103081231.18578-2-christian.koenig@amd.com
---
 drivers/dma-buf/dma-buf.c   | 11 +----------
 drivers/dma-buf/dma-fence.c | 17 +++++++++++++++++
 drivers/dma-buf/dma-resv.c  | 23 +++++++++++++++++++++++
 include/linux/dma-fence.h   |  1 +
 include/linux/dma-resv.h    |  1 +
 5 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 3f63d58bf68a..385cd037325e 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -1321,8 +1321,6 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
 	struct dma_buf *buf_obj;
 	struct dma_buf_attachment *attach_obj;
-	struct dma_resv_iter cursor;
-	struct dma_fence *fence;
 	int count = 0, attach_count;
 	size_t size = 0;
 	int ret;
@@ -1350,14 +1348,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 				file_inode(buf_obj->file)->i_ino,
 				buf_obj->name ?: "");
 
-		dma_resv_for_each_fence(&cursor, buf_obj->resv, true, fence) {
-			seq_printf(s, "\t%s fence: %s %s %ssignalled\n",
-				   dma_resv_iter_is_exclusive(&cursor) ?
-					"Exclusive" : "Shared",
-				   fence->ops->get_driver_name(fence),
-				   fence->ops->get_timeline_name(fence),
-				   dma_fence_is_signaled(fence) ? "" : "un");
-		}
+		dma_resv_describe(buf_obj->resv, s);
 
 		seq_puts(s, "\tAttached Devices:\n");
 		attach_count = 0;
diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c
index 1e82ecd443fa..066400ed8841 100644
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@@ -15,6 +15,7 @@
 #include <linux/atomic.h>
 #include <linux/dma-fence.h>
 #include <linux/sched/signal.h>
+#include <linux/seq_file.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/dma_fence.h>
@@ -907,6 +908,22 @@ err_free_cb:
 }
 EXPORT_SYMBOL(dma_fence_wait_any_timeout);
 
+/**
+ * dma_fence_describe - Dump fence describtion into seq_file
+ * @fence: the 6fence to describe
+ * @seq: the seq_file to put the textual description into
+ *
+ * Dump a textual description of the fence and it's state into the seq_file.
+ */
+void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq)
+{
+	seq_printf(seq, "%s %s seq %llu %ssignalled\n",
+		   fence->ops->get_driver_name(fence),
+		   fence->ops->get_timeline_name(fence), fence->seqno,
+		   dma_fence_is_signaled(fence) ? "" : "un");
+}
+EXPORT_SYMBOL(dma_fence_describe);
+
 /**
  * dma_fence_init - Initialize a custom fence.
  * @fence: the fence to initialize
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 9eb2baa387d4..ff3c0558b3b8 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -38,6 +38,7 @@
 #include <linux/mm.h>
 #include <linux/sched/mm.h>
 #include <linux/mmu_notifier.h>
+#include <linux/seq_file.h>
 
 /**
  * DOC: Reservation Object Overview
@@ -666,6 +667,28 @@ bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all)
 }
 EXPORT_SYMBOL_GPL(dma_resv_test_signaled);
 
+/**
+ * dma_resv_describe - Dump description of the resv object into seq_file
+ * @obj: the reservation object
+ * @seq: the seq_file to dump the description into
+ *
+ * Dump a textual description of the fences inside an dma_resv object into the
+ * seq_file.
+ */
+void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+
+	dma_resv_for_each_fence(&cursor, obj, true, fence) {
+		seq_printf(seq, "\t%s fence:",
+			   dma_resv_iter_is_exclusive(&cursor) ?
+				"Exclusive" : "Shared");
+		dma_fence_describe(fence, seq);
+	}
+}
+EXPORT_SYMBOL_GPL(dma_resv_describe);
+
 #if IS_ENABLED(CONFIG_LOCKDEP)
 static int __init dma_resv_lockdep(void)
 {
diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h
index a706b7bf51d7..1ea691753bd3 100644
--- a/include/linux/dma-fence.h
+++ b/include/linux/dma-fence.h
@@ -264,6 +264,7 @@ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 
 void dma_fence_release(struct kref *kref);
 void dma_fence_free(struct dma_fence *fence);
+void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq);
 
 /**
  * dma_fence_put - decreases refcount of the fence
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index dbd235ab447f..09c6063b199a 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -490,5 +490,6 @@ int dma_resv_copy_fences(struct dma_resv *dst, struct dma_resv *src);
 long dma_resv_wait_timeout(struct dma_resv *obj, bool wait_all, bool intr,
 			   unsigned long timeout);
 bool dma_resv_test_signaled(struct dma_resv *obj, bool test_all);
+void dma_resv_describe(struct dma_resv *obj, struct seq_file *seq);
 
 #endif /* _LINUX_RESERVATION_H */
-- 
cgit v1.2.3


From 9c8e9c9681a0f3f1ae90a90230d059c7a1dece5a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 4 Nov 2021 00:27:29 +0100
Subject: PCI/MSI: Move non-mask check back into low level accessors

The recent rework of PCI/MSI[X] masking moved the non-mask checks from the
low level accessors into the higher level mask/unmask functions.

This missed the fact that these accessors can be invoked from other places
as well. The missing checks break XEN-PV which sets pci_msi_ignore_mask and
also violates the virtual MSIX and the msi_attrib.maskbit protections.

Instead of sprinkling checks all over the place, lift them back into the
low level accessor functions. To avoid checking three different conditions
combine them into one property of msi_desc::msi_attrib.

[ josef: Fixed the missed conversion in the core code ]

Fixes: fcacdfbef5a1 ("PCI/MSI: Provide a new set of mask and unmask functions")
Reported-by: Josef Johansson <josef@oderland.se>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Josef Johansson <josef@oderland.se>
Cc: Bjorn Helgaas <helgaas@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/pci/msi.c   | 26 ++++++++++++++------------
 include/linux/msi.h |  2 +-
 kernel/irq/msi.c    |  4 ++--
 3 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 12e296d634eb..6da791022d2e 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -148,6 +148,9 @@ static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 s
 	raw_spinlock_t *lock = &desc->dev->msi_lock;
 	unsigned long flags;
 
+	if (!desc->msi_attrib.can_mask)
+		return;
+
 	raw_spin_lock_irqsave(lock, flags);
 	desc->msi_mask &= ~clear;
 	desc->msi_mask |= set;
@@ -181,7 +184,8 @@ static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
 {
 	void __iomem *desc_addr = pci_msix_desc_addr(desc);
 
-	writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+	if (desc->msi_attrib.can_mask)
+		writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 }
 
 static inline void pci_msix_mask(struct msi_desc *desc)
@@ -200,23 +204,17 @@ static inline void pci_msix_unmask(struct msi_desc *desc)
 
 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
 {
-	if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-		return;
-
 	if (desc->msi_attrib.is_msix)
 		pci_msix_mask(desc);
-	else if (desc->msi_attrib.maskbit)
+	else
 		pci_msi_mask(desc, mask);
 }
 
 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
 {
-	if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-		return;
-
 	if (desc->msi_attrib.is_msix)
 		pci_msix_unmask(desc);
-	else if (desc->msi_attrib.maskbit)
+	else
 		pci_msi_unmask(desc, mask);
 }
 
@@ -484,7 +482,8 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
 	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
 	entry->msi_attrib.is_virtual    = 0;
 	entry->msi_attrib.entry_nr	= 0;
-	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
+	entry->msi_attrib.can_mask	= !pci_msi_ignore_mask &&
+					  !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 	entry->msi_attrib.multiple	= ilog2(__roundup_pow_of_two(nvec));
@@ -495,7 +494,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
 		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
 
 	/* Save the initial mask status */
-	if (entry->msi_attrib.maskbit)
+	if (entry->msi_attrib.can_mask)
 		pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
 
 out:
@@ -639,10 +638,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.is_virtual =
 			entry->msi_attrib.entry_nr >= vec_count;
 
+		entry->msi_attrib.can_mask	= !pci_msi_ignore_mask &&
+						  !entry->msi_attrib.is_virtual;
+
 		entry->msi_attrib.default_irq	= dev->irq;
 		entry->mask_base		= base;
 
-		if (!entry->msi_attrib.is_virtual) {
+		if (entry->msi_attrib.can_mask) {
 			addr = pci_msix_desc_addr(entry);
 			entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 		}
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 49cf6eb222e7..e616f94c7c58 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -148,7 +148,7 @@ struct msi_desc {
 				u8	is_msix		: 1;
 				u8	multiple	: 3;
 				u8	multi_cap	: 3;
-				u8	maskbit		: 1;
+				u8	can_mask	: 1;
 				u8	is_64		: 1;
 				u8	is_virtual	: 1;
 				u16	entry_nr;
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 6a5ecee6e567..7f350ae59c5f 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -529,10 +529,10 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
 
 	/*
 	 * Checking the first MSI descriptor is sufficient. MSIX supports
-	 * masking and MSI does so when the maskbit is set.
+	 * masking and MSI does so when the can_mask attribute is set.
 	 */
 	desc = first_msi_entry(dev);
-	return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+	return desc->msi_attrib.is_msix || desc->msi_attrib.can_mask;
 }
 
 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
-- 
cgit v1.2.3


From 2226667a145db2e1f314d7f57fd644fe69863ab9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 4 Nov 2021 18:01:29 +0000
Subject: PCI/MSI: Deal with devices lying about their MSI mask capability

It appears that some devices are lying about their mask capability,
pretending that they don't have it, while they actually do.
The net result is that now that we don't enable MSIs on such
endpoint.

Add a new per-device flag to deal with this. Further patches will
make use of it, sadly.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20211104180130.3825416-2-maz@kernel.org
Cc: Bjorn Helgaas <helgaas@kernel.org>
---
 drivers/pci/msi.c   | 3 +++
 include/linux/pci.h | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 6da791022d2e..70433013897b 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -477,6 +477,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
 		goto out;
 
 	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+	/* Lies, damned lies, and MSIs */
+	if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
+		control |= PCI_MSI_FLAGS_MASKBIT;
 
 	entry->msi_attrib.is_msix	= 0;
 	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index c8afbee5da4b..d0dba7fd9848 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -233,6 +233,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
 	/* Don't use Relaxed Ordering for TLPs directed at this device */
 	PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
+	/* Device does honor MSI masking despite saying otherwise */
+	PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
-- 
cgit v1.2.3


From a8b76910e465d718effce0cad306a21fa4f3526b Mon Sep 17 00:00:00 2001
From: Valentin Schneider <valentin.schneider@arm.com>
Date: Wed, 10 Nov 2021 20:24:44 +0000
Subject: preempt: Restore preemption model selection configs

Commit c597bfddc9e9 ("sched: Provide Kconfig support for default dynamic
preempt mode") changed the selectable config names for the preemption
model. This means a config file must now select

  CONFIG_PREEMPT_BEHAVIOUR=y

rather than

  CONFIG_PREEMPT=y

to get a preemptible kernel. This means all arch config files would need to
be updated - right now they'll all end up with the default
CONFIG_PREEMPT_NONE_BEHAVIOUR.

Rather than touch a good hundred of config files, restore usage of
CONFIG_PREEMPT{_NONE, _VOLUNTARY}. Make them configure:
o The build-time preemption model when !PREEMPT_DYNAMIC
o The default boot-time preemption model when PREEMPT_DYNAMIC

Add siblings of those configs with the _BUILD suffix to unconditionally
designate the build-time preemption model (PREEMPT_DYNAMIC is built with
the "highest" preemption model it supports, aka PREEMPT). Downstream
configs should by now all be depending / selected by CONFIG_PREEMPTION
rather than CONFIG_PREEMPT, so only a few sites need patching up.

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20211110202448.4054153-2-valentin.schneider@arm.com
---
 include/linux/kernel.h   |  2 +-
 include/linux/vermagic.h |  2 +-
 init/Makefile            |  2 +-
 kernel/Kconfig.preempt   | 42 +++++++++++++++++++++---------------------
 kernel/sched/core.c      |  6 +++---
 5 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 968b4c4fe65b..77755ac3e189 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -85,7 +85,7 @@
 struct completion;
 struct user;
 
-#ifdef CONFIG_PREEMPT_VOLUNTARY
+#ifdef CONFIG_PREEMPT_VOLUNTARY_BUILD
 
 extern int __cond_resched(void);
 # define might_resched() __cond_resched()
diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 1eaaa93c37bf..329d63babaeb 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -15,7 +15,7 @@
 #else
 #define MODULE_VERMAGIC_SMP ""
 #endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPT_BUILD
 #define MODULE_VERMAGIC_PREEMPT "preempt "
 #elif defined(CONFIG_PREEMPT_RT)
 #define MODULE_VERMAGIC_PREEMPT "preempt_rt "
diff --git a/init/Makefile b/init/Makefile
index 2846113677ee..04eeee12c076 100644
--- a/init/Makefile
+++ b/init/Makefile
@@ -30,7 +30,7 @@ $(obj)/version.o: include/generated/compile.h
 quiet_cmd_compile.h = CHK     $@
       cmd_compile.h = \
 	$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@	\
-	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)"	\
+	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)"	\
 	"$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
 
 include/generated/compile.h: FORCE
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 60f1bfc3c7b2..ce77f0265660 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -1,12 +1,23 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+config PREEMPT_NONE_BUILD
+	bool
+
+config PREEMPT_VOLUNTARY_BUILD
+	bool
+
+config PREEMPT_BUILD
+	bool
+	select PREEMPTION
+	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
+
 choice
 	prompt "Preemption Model"
-	default PREEMPT_NONE_BEHAVIOUR
+	default PREEMPT_NONE
 
-config PREEMPT_NONE_BEHAVIOUR
+config PREEMPT_NONE
 	bool "No Forced Preemption (Server)"
-	select PREEMPT_NONE if !PREEMPT_DYNAMIC
+	select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
 	help
 	  This is the traditional Linux preemption model, geared towards
 	  throughput. It will still provide good latencies most of the
@@ -18,10 +29,10 @@ config PREEMPT_NONE_BEHAVIOUR
 	  raw processing power of the kernel, irrespective of scheduling
 	  latencies.
 
-config PREEMPT_VOLUNTARY_BEHAVIOUR
+config PREEMPT_VOLUNTARY
 	bool "Voluntary Kernel Preemption (Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT_VOLUNTARY if !PREEMPT_DYNAMIC
+	select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
 	help
 	  This option reduces the latency of the kernel by adding more
 	  "explicit preemption points" to the kernel code. These new
@@ -37,10 +48,10 @@ config PREEMPT_VOLUNTARY_BEHAVIOUR
 
 	  Select this if you are building a kernel for a desktop system.
 
-config PREEMPT_BEHAVIOUR
+config PREEMPT
 	bool "Preemptible Kernel (Low-Latency Desktop)"
 	depends on !ARCH_NO_PREEMPT
-	select PREEMPT
+	select PREEMPT_BUILD
 	help
 	  This option reduces the latency of the kernel by making
 	  all kernel code (that is not executing in a critical section)
@@ -58,7 +69,7 @@ config PREEMPT_BEHAVIOUR
 
 config PREEMPT_RT
 	bool "Fully Preemptible Kernel (Real-Time)"
-	depends on EXPERT && ARCH_SUPPORTS_RT && !PREEMPT_DYNAMIC
+	depends on EXPERT && ARCH_SUPPORTS_RT
 	select PREEMPTION
 	help
 	  This option turns the kernel into a real-time kernel by replacing
@@ -75,17 +86,6 @@ config PREEMPT_RT
 
 endchoice
 
-config PREEMPT_NONE
-	bool
-
-config PREEMPT_VOLUNTARY
-	bool
-
-config PREEMPT
-	bool
-	select PREEMPTION
-	select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
-
 config PREEMPT_COUNT
        bool
 
@@ -95,8 +95,8 @@ config PREEMPTION
 
 config PREEMPT_DYNAMIC
 	bool "Preemption behaviour defined on boot"
-	depends on HAVE_PREEMPT_DYNAMIC
-	select PREEMPT
+	depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+	select PREEMPT_BUILD
 	default y
 	help
 	  This option allows to define the preemption model on the kernel
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 862af1db22ab..3c9b0fda64ac 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6628,13 +6628,13 @@ __setup("preempt=", setup_preempt_mode);
 static void __init preempt_dynamic_init(void)
 {
 	if (preempt_dynamic_mode == preempt_dynamic_undefined) {
-		if (IS_ENABLED(CONFIG_PREEMPT_NONE_BEHAVIOUR)) {
+		if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
 			sched_dynamic_update(preempt_dynamic_none);
-		} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BEHAVIOUR)) {
+		} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
 			sched_dynamic_update(preempt_dynamic_voluntary);
 		} else {
 			/* Default static call setting, nothing to do */
-			WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_BEHAVIOUR));
+			WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
 			preempt_dynamic_mode = preempt_dynamic_full;
 			pr_info("Dynamic Preempt: full\n");
 		}
-- 
cgit v1.2.3


From 2f70ddb1f71814aae525c58086fcb2f6974e6591 Mon Sep 17 00:00:00 2001
From: Ashish Kalra <ashish.kalra@amd.com>
Date: Tue, 24 Aug 2021 11:06:40 +0000
Subject: EFI: Introduce the new AMD Memory Encryption GUID.

Introduce a new AMD Memory Encryption GUID which is currently
used for defining a new UEFI environment variable which indicates
UEFI/OVMF support for the SEV live migration feature. This variable
is setup when UEFI/OVMF detects host/hypervisor support for SEV
live migration and later this variable is read by the kernel using
EFI runtime services to verify if OVMF supports the live migration
feature.

Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Message-Id: <1cea22976d2208f34d47e0c1ce0ecac816c13111.1629726117.git.ashish.kalra@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6b5d36babfcc..dbd39b20e034 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID		EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
+#define AMD_SEV_MEM_ENCRYPT_GUID		EFI_GUID(0x0cf29b71, 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
 
 typedef struct {
 	efi_guid_t guid;
-- 
cgit v1.2.3


From f4d316537059b274452727e86f46ff3bdefdde4d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 11 Nov 2021 10:13:38 -0500
Subject: KVM: generalize "bugged" VM to "dead" VM

Generalize KVM_REQ_VM_BUGGED so that it can be called even in cases
where it is by design that the VM cannot be operated upon.  In this
case any KVM_BUG_ON should still warn, so introduce a new flag
kvm->vm_dead that is separate from kvm->vm_bugged.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       |  2 +-
 include/linux/kvm_host.h | 12 ++++++++++--
 virt/kvm/kvm_main.c      | 10 +++++-----
 3 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ac83d873d65b..622cb75f5e75 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9654,7 +9654,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	}
 
 	if (kvm_request_pending(vcpu)) {
-		if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+		if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
 			r = -EIO;
 			goto out;
 		}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9fe259..9e0667e3723e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -150,7 +150,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
-#define KVM_REQ_VM_BUGGED         (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VM_DEAD           (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -617,6 +617,7 @@ struct kvm {
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
 	bool vm_bugged;
+	bool vm_dead;
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
 	struct notifier_block pm_notifier;
@@ -650,12 +651,19 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)					\
 	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline void kvm_vm_dead(struct kvm *kvm)
+{
+	kvm->vm_dead = true;
+	kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
+}
+
 static inline void kvm_vm_bugged(struct kvm *kvm)
 {
 	kvm->vm_bugged = true;
-	kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+	kvm_vm_dead(kvm);
 }
 
+
 #define KVM_BUG(cond, kvm, fmt...)				\
 ({								\
 	int __ret = (cond);					\
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f6d450355f0..d31724500501 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3747,7 +3747,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	struct kvm_fpu *fpu = NULL;
 	struct kvm_sregs *kvm_sregs = NULL;
 
-	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
 		return -EIO;
 
 	if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@ -3957,7 +3957,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
 	void __user *argp = compat_ptr(arg);
 	int r;
 
-	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+	if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
 		return -EIO;
 
 	switch (ioctl) {
@@ -4023,7 +4023,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
 {
 	struct kvm_device *dev = filp->private_data;
 
-	if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
+	if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
 		return -EIO;
 
 	switch (ioctl) {
@@ -4345,7 +4345,7 @@ static long kvm_vm_ioctl(struct file *filp,
 	void __user *argp = (void __user *)arg;
 	int r;
 
-	if (kvm->mm != current->mm || kvm->vm_bugged)
+	if (kvm->mm != current->mm || kvm->vm_dead)
 		return -EIO;
 	switch (ioctl) {
 	case KVM_CREATE_VCPU:
@@ -4556,7 +4556,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 	struct kvm *kvm = filp->private_data;
 	int r;
 
-	if (kvm->mm != current->mm || kvm->vm_bugged)
+	if (kvm->mm != current->mm || kvm->vm_dead)
 		return -EIO;
 	switch (ioctl) {
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-- 
cgit v1.2.3


From 0093de693fe7baf31641d1863793e6db93a43b6e Mon Sep 17 00:00:00 2001
From: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Date: Wed, 10 Nov 2021 20:32:30 -0800
Subject: mm/page_owner.c: modify the type of argument "order" in some
 functions

The type of "order" in struct page_owner is unsigned short.
However, it is unsigned int in the following 3 functions:

  __reset_page_owner
  __set_page_owner_handle
  __set_page_owner_handle

The type of "order" in argument list is unsigned int, which is
inconsistent.

[akpm@linux-foundation.org: update include/linux/page_owner.h]

Link: https://lkml.kernel.org/r/20211020125945.47792-1-caoyixuan2019@email.szu.edu.cn
Signed-off-by: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_owner.h | 12 ++++++------
 mm/page_owner.c            |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 43c638c51c1f..119a0c9d2a8b 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -8,9 +8,9 @@
 extern struct static_key_false page_owner_inited;
 extern struct page_ext_operations page_owner_ops;
 
-extern void __reset_page_owner(struct page *page, unsigned int order);
+extern void __reset_page_owner(struct page *page, unsigned short order);
 extern void __set_page_owner(struct page *page,
-			unsigned int order, gfp_t gfp_mask);
+			unsigned short order, gfp_t gfp_mask);
 extern void __split_page_owner(struct page *page, unsigned int nr);
 extern void __folio_copy_owner(struct folio *newfolio, struct folio *old);
 extern void __set_page_owner_migrate_reason(struct page *page, int reason);
@@ -18,14 +18,14 @@ extern void __dump_page_owner(const struct page *page);
 extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
 					pg_data_t *pgdat, struct zone *zone);
 
-static inline void reset_page_owner(struct page *page, unsigned int order)
+static inline void reset_page_owner(struct page *page, unsigned short order)
 {
 	if (static_branch_unlikely(&page_owner_inited))
 		__reset_page_owner(page, order);
 }
 
 static inline void set_page_owner(struct page *page,
-			unsigned int order, gfp_t gfp_mask)
+			unsigned short order, gfp_t gfp_mask)
 {
 	if (static_branch_unlikely(&page_owner_inited))
 		__set_page_owner(page, order, gfp_mask);
@@ -52,7 +52,7 @@ static inline void dump_page_owner(const struct page *page)
 		__dump_page_owner(page);
 }
 #else
-static inline void reset_page_owner(struct page *page, unsigned int order)
+static inline void reset_page_owner(struct page *page, unsigned short order)
 {
 }
 static inline void set_page_owner(struct page *page,
@@ -60,7 +60,7 @@ static inline void set_page_owner(struct page *page,
 {
 }
 static inline void split_page_owner(struct page *page,
-			unsigned int order)
+			unsigned short order)
 {
 }
 static inline void folio_copy_owner(struct folio *newfolio, struct folio *folio)
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 79936db59859..4f924957ce7a 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -125,7 +125,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags)
 	return handle;
 }
 
-void __reset_page_owner(struct page *page, unsigned int order)
+void __reset_page_owner(struct page *page, unsigned short order)
 {
 	int i;
 	struct page_ext *page_ext;
@@ -149,7 +149,7 @@ void __reset_page_owner(struct page *page, unsigned int order)
 
 static inline void __set_page_owner_handle(struct page_ext *page_ext,
 					depot_stack_handle_t handle,
-					unsigned int order, gfp_t gfp_mask)
+					unsigned short order, gfp_t gfp_mask)
 {
 	struct page_owner *page_owner;
 	int i;
@@ -169,7 +169,7 @@ static inline void __set_page_owner_handle(struct page_ext *page_ext,
 	}
 }
 
-noinline void __set_page_owner(struct page *page, unsigned int order,
+noinline void __set_page_owner(struct page *page, unsigned short order,
 					gfp_t gfp_mask)
 {
 	struct page_ext *page_ext = lookup_page_ext(page);
-- 
cgit v1.2.3


From ab09243aa95a72bac5c71e852773de34116f8d0f Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Wed, 10 Nov 2021 20:32:40 -0800
Subject: mm/migrate.c: remove MIGRATE_PFN_LOCKED

MIGRATE_PFN_LOCKED is used to indicate to migrate_vma_prepare() that a
source page was already locked during migrate_vma_collect().  If it
wasn't then the a second attempt is made to lock the page.  However if
the first attempt failed it's unlikely a second attempt will succeed,
and the retry adds complexity.  So clean this up by removing the retry
and MIGRATE_PFN_LOCKED flag.

Destination pages are also meant to have the MIGRATE_PFN_LOCKED flag
set, but nothing actually checks that.

Link: https://lkml.kernel.org/r/20211025041608.289017-1-apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/hmm.rst                 |   2 +-
 arch/powerpc/kvm/book3s_hv_uvmem.c       |   4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |   2 -
 drivers/gpu/drm/nouveau/nouveau_dmem.c   |   4 +-
 include/linux/migrate.h                  |   1 -
 lib/test_hmm.c                           |   5 +-
 mm/migrate.c                             | 145 ++++++-------------------------
 7 files changed, 35 insertions(+), 128 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst
index a14c2938e7af..f2a59ed82ed3 100644
--- a/Documentation/vm/hmm.rst
+++ b/Documentation/vm/hmm.rst
@@ -360,7 +360,7 @@ between device driver specific code and shared common code:
    system memory page, locks the page with ``lock_page()``, and fills in the
    ``dst`` array entry with::
 
-     dst[i] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+     dst[i] = migrate_pfn(page_to_pfn(dpage));
 
    Now that the driver knows that this page is being migrated, it can
    invalidate device private MMU mappings and copy device private memory
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index a7061ee3b157..28c436df9935 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -560,7 +560,7 @@ static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
 				  gpa, 0, page_shift);
 
 	if (ret == U_SUCCESS)
-		*mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
+		*mig.dst = migrate_pfn(pfn);
 	else {
 		unlock_page(dpage);
 		__free_page(dpage);
@@ -774,7 +774,7 @@ static int kvmppc_svm_page_in(struct vm_area_struct *vma,
 		}
 	}
 
-	*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	*mig.dst = migrate_pfn(page_to_pfn(dpage));
 	migrate_vma_pages(&mig);
 out_finalize:
 	migrate_vma_finalize(&mig);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6d8634e40b3b..d43bfd8b35ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -317,7 +317,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 			migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
 			svm_migrate_get_vram_page(prange, migrate->dst[i]);
 			migrate->dst[i] = migrate_pfn(migrate->dst[i]);
-			migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 			src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
 					      DMA_TO_DEVICE);
 			r = dma_mapping_error(dev, src[i]);
@@ -610,7 +609,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 				     dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 
 		migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
-		migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 		j++;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 92987daa5e17..3828aafd3ac4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -166,7 +166,7 @@ static vm_fault_t nouveau_dmem_fault_copy_one(struct nouveau_drm *drm,
 		goto error_dma_unmap;
 	mutex_unlock(&svmm->mutex);
 
-	args->dst[0] = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	args->dst[0] = migrate_pfn(page_to_pfn(dpage));
 	return 0;
 
 error_dma_unmap:
@@ -602,7 +602,7 @@ static unsigned long nouveau_dmem_migrate_copy_one(struct nouveau_drm *drm,
 		((paddr >> PAGE_SHIFT) << NVIF_VMM_PFNMAP_V0_ADDR_SHIFT);
 	if (src & MIGRATE_PFN_WRITE)
 		*pfn |= NVIF_VMM_PFNMAP_V0_W;
-	return migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+	return migrate_pfn(page_to_pfn(dpage));
 
 out_dma_unmap:
 	dma_unmap_page(dev, *dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index eeb818c4fc78..4850cc5bf813 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -110,7 +110,6 @@ static inline int migrate_misplaced_page(struct page *page,
  */
 #define MIGRATE_PFN_VALID	(1UL << 0)
 #define MIGRATE_PFN_MIGRATE	(1UL << 1)
-#define MIGRATE_PFN_LOCKED	(1UL << 2)
 #define MIGRATE_PFN_WRITE	(1UL << 3)
 #define MIGRATE_PFN_SHIFT	6
 
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index c259842f6d44..e2ce8f9b7605 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -613,8 +613,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args,
 		 */
 		rpage->zone_device_data = dmirror;
 
-		*dst = migrate_pfn(page_to_pfn(dpage)) |
-			    MIGRATE_PFN_LOCKED;
+		*dst = migrate_pfn(page_to_pfn(dpage));
 		if ((*src & MIGRATE_PFN_WRITE) ||
 		    (!spage && args->vma->vm_flags & VM_WRITE))
 			*dst |= MIGRATE_PFN_WRITE;
@@ -1137,7 +1136,7 @@ static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
 		lock_page(dpage);
 		xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
 		copy_highpage(dpage, spage);
-		*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
+		*dst = migrate_pfn(page_to_pfn(dpage));
 		if (*src & MIGRATE_PFN_WRITE)
 			*dst |= MIGRATE_PFN_WRITE;
 	}
diff --git a/mm/migrate.c b/mm/migrate.c
index 43dd88c7fcdc..cf25b00f03c8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2362,7 +2362,6 @@ again:
 		 * can't be dropped from it).
 		 */
 		get_page(page);
-		migrate->cpages++;
 
 		/*
 		 * Optimize for the common case where page is only mapped once
@@ -2372,7 +2371,7 @@ again:
 		if (trylock_page(page)) {
 			pte_t swp_pte;
 
-			mpfn |= MIGRATE_PFN_LOCKED;
+			migrate->cpages++;
 			ptep_get_and_clear(mm, addr, ptep);
 
 			/* Setup special migration page table entry */
@@ -2406,6 +2405,9 @@ again:
 
 			if (pte_present(pte))
 				unmapped++;
+		} else {
+			put_page(page);
+			mpfn = 0;
 		}
 
 next:
@@ -2510,15 +2512,17 @@ static bool migrate_vma_check_page(struct page *page)
 }
 
 /*
- * migrate_vma_prepare() - lock pages and isolate them from the lru
+ * migrate_vma_unmap() - replace page mapping with special migration pte entry
  * @migrate: migrate struct containing all migration information
  *
- * This locks pages that have been collected by migrate_vma_collect(). Once each
- * page is locked it is isolated from the lru (for non-device pages). Finally,
- * the ref taken by migrate_vma_collect() is dropped, as locked pages cannot be
- * migrated by concurrent kernel threads.
+ * Isolate pages from the LRU and replace mappings (CPU page table pte) with a
+ * special migration pte entry and check if it has been pinned. Pinned pages are
+ * restored because we cannot migrate them.
+ *
+ * This is the last step before we call the device driver callback to allocate
+ * destination memory and copy contents of original page over to new page.
  */
-static void migrate_vma_prepare(struct migrate_vma *migrate)
+static void migrate_vma_unmap(struct migrate_vma *migrate)
 {
 	const unsigned long npages = migrate->npages;
 	const unsigned long start = migrate->start;
@@ -2527,32 +2531,12 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 
 	lru_add_drain();
 
-	for (i = 0; (i < npages) && migrate->cpages; i++) {
+	for (i = 0; i < npages; i++) {
 		struct page *page = migrate_pfn_to_page(migrate->src[i]);
-		bool remap = true;
 
 		if (!page)
 			continue;
 
-		if (!(migrate->src[i] & MIGRATE_PFN_LOCKED)) {
-			/*
-			 * Because we are migrating several pages there can be
-			 * a deadlock between 2 concurrent migration where each
-			 * are waiting on each other page lock.
-			 *
-			 * Make migrate_vma() a best effort thing and backoff
-			 * for any page we can not lock right away.
-			 */
-			if (!trylock_page(page)) {
-				migrate->src[i] = 0;
-				migrate->cpages--;
-				put_page(page);
-				continue;
-			}
-			remap = false;
-			migrate->src[i] |= MIGRATE_PFN_LOCKED;
-		}
-
 		/* ZONE_DEVICE pages are not on LRU */
 		if (!is_zone_device_page(page)) {
 			if (!PageLRU(page) && allow_drain) {
@@ -2562,16 +2546,9 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 			}
 
 			if (isolate_lru_page(page)) {
-				if (remap) {
-					migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-					migrate->cpages--;
-					restore++;
-				} else {
-					migrate->src[i] = 0;
-					unlock_page(page);
-					migrate->cpages--;
-					put_page(page);
-				}
+				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+				migrate->cpages--;
+				restore++;
 				continue;
 			}
 
@@ -2579,80 +2556,20 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
 			put_page(page);
 		}
 
-		if (!migrate_vma_check_page(page)) {
-			if (remap) {
-				migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-				migrate->cpages--;
-				restore++;
-
-				if (!is_zone_device_page(page)) {
-					get_page(page);
-					putback_lru_page(page);
-				}
-			} else {
-				migrate->src[i] = 0;
-				unlock_page(page);
-				migrate->cpages--;
+		if (page_mapped(page))
+			try_to_migrate(page, 0);
 
-				if (!is_zone_device_page(page))
-					putback_lru_page(page);
-				else
-					put_page(page);
+		if (page_mapped(page) || !migrate_vma_check_page(page)) {
+			if (!is_zone_device_page(page)) {
+				get_page(page);
+				putback_lru_page(page);
 			}
-		}
-	}
-
-	for (i = 0, addr = start; i < npages && restore; i++, addr += PAGE_SIZE) {
-		struct page *page = migrate_pfn_to_page(migrate->src[i]);
-
-		if (!page || (migrate->src[i] & MIGRATE_PFN_MIGRATE))
-			continue;
 
-		remove_migration_pte(page, migrate->vma, addr, page);
-
-		migrate->src[i] = 0;
-		unlock_page(page);
-		put_page(page);
-		restore--;
-	}
-}
-
-/*
- * migrate_vma_unmap() - replace page mapping with special migration pte entry
- * @migrate: migrate struct containing all migration information
- *
- * Replace page mapping (CPU page table pte) with a special migration pte entry
- * and check again if it has been pinned. Pinned pages are restored because we
- * cannot migrate them.
- *
- * This is the last step before we call the device driver callback to allocate
- * destination memory and copy contents of original page over to new page.
- */
-static void migrate_vma_unmap(struct migrate_vma *migrate)
-{
-	const unsigned long npages = migrate->npages;
-	const unsigned long start = migrate->start;
-	unsigned long addr, i, restore = 0;
-
-	for (i = 0; i < npages; i++) {
-		struct page *page = migrate_pfn_to_page(migrate->src[i]);
-
-		if (!page || !(migrate->src[i] & MIGRATE_PFN_MIGRATE))
+			migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
+			migrate->cpages--;
+			restore++;
 			continue;
-
-		if (page_mapped(page)) {
-			try_to_migrate(page, 0);
-			if (page_mapped(page))
-				goto restore;
 		}
-
-		if (migrate_vma_check_page(page))
-			continue;
-
-restore:
-		migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
-		migrate->cpages--;
-		restore++;
 	}
 
 	for (addr = start, i = 0; i < npages && restore; addr += PAGE_SIZE, i++) {
@@ -2665,12 +2582,8 @@ restore:
 
 		migrate->src[i] = 0;
 		unlock_page(page);
+		put_page(page);
 		restore--;
-
-		if (is_zone_device_page(page))
-			put_page(page);
-		else
-			putback_lru_page(page);
 	}
 }
 
@@ -2693,8 +2606,8 @@ restore:
  * it for all those entries (ie with MIGRATE_PFN_VALID and MIGRATE_PFN_MIGRATE
  * flag set).  Once these are allocated and copied, the caller must update each
  * corresponding entry in the dst array with the pfn value of the destination
- * page and with the MIGRATE_PFN_VALID and MIGRATE_PFN_LOCKED flags set
- * (destination pages must have their struct pages locked, via lock_page()).
+ * page and with MIGRATE_PFN_VALID. Destination pages must be locked via
+ * lock_page().
  *
  * Note that the caller does not have to migrate all the pages that are marked
  * with MIGRATE_PFN_MIGRATE flag in src array unless this is a migration from
@@ -2763,8 +2676,6 @@ int migrate_vma_setup(struct migrate_vma *args)
 
 	migrate_vma_collect(args);
 
-	if (args->cpages)
-		migrate_vma_prepare(args);
 	if (args->cpages)
 		migrate_vma_unmap(args);
 
-- 
cgit v1.2.3


From 68da4e0eaaab421f228eac57cbe7505b136464af Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 10 Nov 2021 12:01:14 -0600
Subject: Revert "PCI: Remove struct pci_dev->driver"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit b5f9c644eb1baafcd349ad134e2110773f8d0a38.

Revert b5f9c644eb1b ("PCI: Remove struct pci_dev->driver"), which is needed
to revert 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of
pci_dev->driver").

2a4d9408c9e8 caused a NULL pointer dereference reported by Robert Święcki.
Details in the revert of that commit.

Fixes: 2a4d9408c9e8 ("PCI: Use to_pci_driver() instead of pci_dev->driver")
Link: https://lore.kernel.org/linux-i2c/CAP145pgdrdiMAT7=-iB1DMgA7t_bMqTcJL4N0=6u8kNY3EU0dw@mail.gmail.com/
Reported-by: Robert Święcki <robert@swiecki.net>
Tested-by: Robert Święcki <robert@swiecki.net>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-driver.c | 4 ++++
 include/linux/pci.h      | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 1d98c974381c..4c1f46dbfa87 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -319,10 +319,12 @@ static long local_pci_probe(void *_ddi)
 	 * its remove routine.
 	 */
 	pm_runtime_get_sync(dev);
+	pci_dev->driver = pci_drv;
 	rc = pci_drv->probe(pci_dev, ddi->id);
 	if (!rc)
 		return rc;
 	if (rc < 0) {
+		pci_dev->driver = NULL;
 		pm_runtime_put_sync(dev);
 		return rc;
 	}
@@ -388,6 +390,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
  * @pci_dev: PCI device being probed
  *
  * returns 0 on success, else error.
+ * side-effect: pci_dev->driver is set to drv when drv claims pci_dev.
  */
 static int __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev)
 {
@@ -462,6 +465,7 @@ static void pci_device_remove(struct device *dev)
 		pm_runtime_put_noidle(dev);
 	}
 	pcibios_free_irq(pci_dev);
+	pci_dev->driver = NULL;
 	pci_iov_remove(pci_dev);
 
 	/* Undo the runtime PM settings in local_pci_probe() */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index b4dbcc86b3f1..e58888e21ab7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -342,6 +342,7 @@ struct pci_dev {
 	u16		pcie_flags_reg;	/* Cached PCIe Capabilities Register */
 	unsigned long	*dma_alias_mask;/* Mask of enabled devfn aliases */
 
+	struct pci_driver *driver;	/* Driver bound to this device */
 	u64		dma_mask;	/* Mask of the bits of bus address this
 					   device implements.  Normally this is
 					   0xffffffff.  You only need to change
-- 
cgit v1.2.3


From bf9167a8b40c9cf463521da05342db81808c1b6e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 11 Nov 2021 09:56:33 +0100
Subject: HID: intel-ish-hid: fix module device-id handling

A late addititon to the intel-ish-hid framework caused a build failure
with clang, and introduced an ABI to the module loader that stops working
if any driver ever needs to bind to more than one UUID:

drivers/hid/intel-ish-hid/ishtp-fw-loader.c:1067:4: error: initializer element is not a compile-time constant

Change the ishtp_device_id to have correct documentation and a driver_data
field like all the other ones, and change the drivers to use the ID table
as the primary identification in a way that works with all compilers
and avoids duplciating the identifiers.

Fixes: f155dfeaa4ee ("platform/x86: isthp_eclite: only load for matching devices")
Fixes: facfe0a4fdce ("platform/chrome: chros_ec_ishtp: only load for matching devices")
Fixes: 0d0cccc0fd83 ("HID: intel-ish-hid: hid-client: only load for matching devices")
Fixes: 44e2a58cb880 ("HID: intel-ish-hid: fw-loader: only load for matching devices")
Fixes: cb1a2c6847f7 ("HID: intel-ish-hid: use constants for modaliases")
Fixes: fa443bc3c1e4 ("HID: intel-ish-hid: add support for MODULE_DEVICE_TABLE()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
[jkosina@suse.cz: fix ecl_ishtp_cl_driver.id initialization]
[jkosina@suse.cz: fix conflict with already fixed kerneldoc]
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/intel-ish-hid/ishtp-fw-loader.c  | 19 ++++++++-----------
 drivers/hid/intel-ish-hid/ishtp-hid-client.c | 19 ++++++++-----------
 drivers/hid/intel-ish-hid/ishtp/bus.c        |  2 +-
 drivers/platform/chrome/cros_ec_ishtp.c      | 19 ++++++++-----------
 drivers/platform/x86/intel/ishtp_eclite.c    | 19 ++++++++-----------
 include/linux/intel-ish-client-if.h          |  4 ++--
 include/linux/mod_devicetable.h              |  2 ++
 7 files changed, 37 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
index 945a9d0b68cd..0e1183e96147 100644
--- a/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
+++ b/drivers/hid/intel-ish-hid/ishtp-fw-loader.c
@@ -76,9 +76,12 @@ enum ish_loader_commands {
 #define LOADER_XFER_MODE_ISHTP			BIT(1)
 
 /* ISH Transport Loader client unique GUID */
-static const guid_t loader_ishtp_guid =
-	GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
-		  0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc);
+static const struct ishtp_device_id loader_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0xc804d06a, 0x55bd, 0x4ea7,
+		  0xad, 0xed, 0x1e, 0x31, 0x22, 0x8c, 0x76, 0xdc) },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table);
 
 #define FILENAME_SIZE				256
 
@@ -880,7 +883,7 @@ static int loader_init(struct ishtp_cl *loader_ishtp_cl, int reset)
 
 	fw_client =
 		ishtp_fw_cl_get_client(ishtp_get_ishtp_device(loader_ishtp_cl),
-				       &loader_ishtp_guid);
+				       &loader_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ISH client uuid not found\n");
@@ -1057,18 +1060,12 @@ static int loader_ishtp_cl_reset(struct ishtp_cl_device *cl_device)
 
 static struct ishtp_cl_driver	loader_ishtp_cl_driver = {
 	.name = "ish-loader",
-	.guid = &loader_ishtp_guid,
+	.id = loader_ishtp_id_table,
 	.probe = loader_ishtp_cl_probe,
 	.remove = loader_ishtp_cl_remove,
 	.reset = loader_ishtp_cl_reset,
 };
 
-static const struct ishtp_device_id loader_ishtp_id_table[] = {
-	{ loader_ishtp_guid },
-	{ }
-};
-MODULE_DEVICE_TABLE(ishtp, loader_ishtp_id_table);
-
 static int __init ish_loader_init(void)
 {
 	return ishtp_cl_driver_register(&loader_ishtp_cl_driver, THIS_MODULE);
diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
index fb47d38d1e87..4338c9b68a43 100644
--- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c
+++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c
@@ -12,9 +12,12 @@
 #include "ishtp-hid.h"
 
 /* ISH Transport protocol (ISHTP in short) GUID */
-static const guid_t hid_ishtp_guid =
-	GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
-		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26);
+static const struct ishtp_device_id hid_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x33AECD58, 0xB679, 0x4E54,
+		  0x9B, 0xD9, 0xA0, 0x4D, 0x34, 0xF0, 0xC2, 0x26), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table);
 
 /* Rx ring buffer pool size */
 #define HID_CL_RX_RING_SIZE	32
@@ -662,7 +665,7 @@ static int hid_ishtp_cl_init(struct ishtp_cl *hid_ishtp_cl, int reset)
 	ishtp_set_tx_ring_size(hid_ishtp_cl, HID_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(hid_ishtp_cl, HID_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &hid_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ish client uuid not found\n");
@@ -945,19 +948,13 @@ static const struct dev_pm_ops hid_ishtp_pm_ops = {
 
 static struct ishtp_cl_driver	hid_ishtp_cl_driver = {
 	.name = "ish-hid",
-	.guid = &hid_ishtp_guid,
+	.id = hid_ishtp_id_table,
 	.probe = hid_ishtp_cl_probe,
 	.remove = hid_ishtp_cl_remove,
 	.reset = hid_ishtp_cl_reset,
 	.driver.pm = &hid_ishtp_pm_ops,
 };
 
-static const struct ishtp_device_id hid_ishtp_id_table[] = {
-	{ hid_ishtp_guid },
-	{ }
-};
-MODULE_DEVICE_TABLE(ishtp, hid_ishtp_id_table);
-
 static int __init ish_hid_init(void)
 {
 	int	rv;
diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c
index e159cd1c5f37..f68aba8794fe 100644
--- a/drivers/hid/intel-ish-hid/ishtp/bus.c
+++ b/drivers/hid/intel-ish-hid/ishtp/bus.c
@@ -241,7 +241,7 @@ static int ishtp_cl_bus_match(struct device *dev, struct device_driver *drv)
 	struct ishtp_cl_device *device = to_ishtp_cl_device(dev);
 	struct ishtp_cl_driver *driver = to_ishtp_cl_driver(drv);
 
-	return guid_equal(driver->guid,
+	return guid_equal(&driver->id[0].guid,
 			  &device->fw_client->props.protocol_name);
 }
 
diff --git a/drivers/platform/chrome/cros_ec_ishtp.c b/drivers/platform/chrome/cros_ec_ishtp.c
index 8c17358e84c1..4020b8354bae 100644
--- a/drivers/platform/chrome/cros_ec_ishtp.c
+++ b/drivers/platform/chrome/cros_ec_ishtp.c
@@ -41,9 +41,12 @@ enum cros_ec_ish_channel {
 #define ISHTP_SEND_TIMEOUT			(3 * HZ)
 
 /* ISH Transport CrOS EC ISH client unique GUID */
-static const guid_t cros_ish_guid =
-	GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
-		  0xb0, 0xd8, 0x9e, 0x7c, 0xda,	0xe0, 0xd6, 0xa0);
+static const struct ishtp_device_id cros_ec_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x7b7154d0, 0x56f4, 0x4bdc,
+		  0xb0, 0xd8, 0x9e, 0x7c, 0xda,	0xe0, 0xd6, 0xa0), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table);
 
 struct header {
 	u8 channel;
@@ -389,7 +392,7 @@ static int cros_ish_init(struct ishtp_cl *cros_ish_cl)
 	ishtp_set_tx_ring_size(cros_ish_cl, CROS_ISH_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(cros_ish_cl, CROS_ISH_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &cros_ish_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &cros_ec_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(client_data),
 			"ish client uuid not found\n");
@@ -765,7 +768,7 @@ static SIMPLE_DEV_PM_OPS(cros_ec_ishtp_pm_ops, cros_ec_ishtp_suspend,
 
 static struct ishtp_cl_driver	cros_ec_ishtp_driver = {
 	.name = "cros_ec_ishtp",
-	.guid = &cros_ish_guid,
+	.id = cros_ec_ishtp_id_table,
 	.probe = cros_ec_ishtp_probe,
 	.remove = cros_ec_ishtp_remove,
 	.reset = cros_ec_ishtp_reset,
@@ -774,12 +777,6 @@ static struct ishtp_cl_driver	cros_ec_ishtp_driver = {
 	},
 };
 
-static const struct ishtp_device_id cros_ec_ishtp_id_table[] = {
-	{ cros_ish_guid },
-	{ }
-};
-MODULE_DEVICE_TABLE(ishtp, cros_ec_ishtp_id_table);
-
 static int __init cros_ec_ishtp_mod_init(void)
 {
 	return ishtp_cl_driver_register(&cros_ec_ishtp_driver, THIS_MODULE);
diff --git a/drivers/platform/x86/intel/ishtp_eclite.c b/drivers/platform/x86/intel/ishtp_eclite.c
index b9fb8f28fd63..93ac8b2dbf38 100644
--- a/drivers/platform/x86/intel/ishtp_eclite.c
+++ b/drivers/platform/x86/intel/ishtp_eclite.c
@@ -93,9 +93,12 @@ struct ishtp_opregion_dev {
 };
 
 /* eclite ishtp client UUID: 6a19cc4b-d760-4de3-b14d-f25ebd0fbcd9 */
-static const guid_t ecl_ishtp_guid =
-	GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
-		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9);
+static const struct ishtp_device_id ecl_ishtp_id_table[] = {
+	{ .guid = GUID_INIT(0x6a19cc4b, 0xd760, 0x4de3,
+		  0xb1, 0x4d, 0xf2, 0x5e, 0xbd, 0xf, 0xbc, 0xd9), },
+	{ }
+};
+MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table);
 
 /* ACPI DSM UUID: 91d936a7-1f01-49c6-a6b4-72f00ad8d8a5 */
 static const guid_t ecl_acpi_guid =
@@ -462,7 +465,7 @@ static int ecl_ishtp_cl_init(struct ishtp_cl *ecl_ishtp_cl)
 	ishtp_set_tx_ring_size(ecl_ishtp_cl, ECL_CL_TX_RING_SIZE);
 	ishtp_set_rx_ring_size(ecl_ishtp_cl, ECL_CL_RX_RING_SIZE);
 
-	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_guid);
+	fw_client = ishtp_fw_cl_get_client(dev, &ecl_ishtp_id_table[0].guid);
 	if (!fw_client) {
 		dev_err(cl_data_to_dev(opr_dev), "fw client not found\n");
 		return -ENOENT;
@@ -674,19 +677,13 @@ static const struct dev_pm_ops ecl_ishtp_pm_ops = {
 
 static struct ishtp_cl_driver ecl_ishtp_cl_driver = {
 	.name = "ishtp-eclite",
-	.guid = &ecl_ishtp_guid,
+	.id = ecl_ishtp_id_table,
 	.probe = ecl_ishtp_cl_probe,
 	.remove = ecl_ishtp_cl_remove,
 	.reset = ecl_ishtp_cl_reset,
 	.driver.pm = &ecl_ishtp_pm_ops,
 };
 
-static const struct ishtp_device_id ecl_ishtp_id_table[] = {
-	{ ecl_ishtp_guid },
-	{ }
-};
-MODULE_DEVICE_TABLE(ishtp, ecl_ishtp_id_table);
-
 static int __init ecl_ishtp_init(void)
 {
 	return ishtp_cl_driver_register(&ecl_ishtp_cl_driver, THIS_MODULE);
diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h
index aee8ff4739b1..f45f13304add 100644
--- a/include/linux/intel-ish-client-if.h
+++ b/include/linux/intel-ish-client-if.h
@@ -9,7 +9,7 @@
 #define _INTEL_ISH_CLIENT_IF_H_
 
 #include <linux/device.h>
-#include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct ishtp_cl_device;
 struct ishtp_device;
@@ -40,7 +40,7 @@ enum cl_state {
 struct ishtp_cl_driver {
 	struct device_driver driver;
 	const char *name;
-	const guid_t *guid;
+	const struct ishtp_device_id *id;
 	int (*probe)(struct ishtp_cl_device *dev);
 	void (*remove)(struct ishtp_cl_device *dev);
 	int (*reset)(struct ishtp_cl_device *dev);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index c70abe7aaef2..4bb71979a8fd 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -902,9 +902,11 @@ struct dfl_device_id {
 /**
  * struct ishtp_device_id - ISHTP device identifier
  * @guid: GUID of the device.
+ * @driver_data: pointer to driver specific data
  */
 struct ishtp_device_id {
 	guid_t guid;
+	kernel_ulong_t driver_data;
 };
 
 #endif /* LINUX_MOD_DEVICETABLE_H */
-- 
cgit v1.2.3


From 636f6e2af4fb916b9f1b432964294b6979c34002 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Tue, 9 Nov 2021 08:45:25 +0900
Subject: libata: add horkage for missing Identify Device log

ACS-3 introduced the ATA Identify Device Data log as mandatory. A
warning message currently signals to the user if a device does not
report supporting this log page in the log directory page, regardless
of the ATA version of the device. Furthermore, this warning will appear
for all attempts at accessing this missing log page during device
revalidation.

Since it is useless to constantly access the log directory and warn
about this lack of support once we have discovered that the device
does not support this log page, introduce the horkage flag
ATA_HORKAGE_NO_ID_DEV_LOG to mark a device as lacking support for
the Identify Device Data log page. Set this flag when
ata_log_supported() returns false in ata_identify_page_supported().
The warning is printed only if the device ATA level is 10 or above
(ACS-3 or above), and only once on device scan. With this flag set, the
log directory page is not accessed again to test for Identify Device
Data log page support.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c | 13 ++++++++++++-
 include/linux/libata.h    |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3018ca84a3d8..8a0ccb190d76 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2052,8 +2052,19 @@ static bool ata_identify_page_supported(struct ata_device *dev, u8 page)
 	struct ata_port *ap = dev->link->ap;
 	unsigned int err, i;
 
+	if (dev->horkage & ATA_HORKAGE_NO_ID_DEV_LOG)
+		return false;
+
 	if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE)) {
-		ata_dev_warn(dev, "ATA Identify Device Log not supported\n");
+		/*
+		 * IDENTIFY DEVICE data log is defined as mandatory starting
+		 * with ACS-3 (ATA version 10). Warn about the missing log
+		 * for drives which implement this ATA level or above.
+		 */
+		if (ata_id_major_version(dev->id) >= 10)
+			ata_dev_warn(dev,
+				"ATA Identify Device Log not supported\n");
+		dev->horkage |= ATA_HORKAGE_NO_ID_DEV_LOG;
 		return false;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5331557316e8..2a8404b26083 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -427,6 +427,7 @@ enum {
 	ATA_HORKAGE_MAX_SEC_1024 = (1 << 25),	/* Limit max sects to 1024 */
 	ATA_HORKAGE_MAX_TRIM_128M = (1 << 26),	/* Limit max trim size to 128M */
 	ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27),	/* Disable NCQ on ATI chipset */
+	ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28),	/* Identify device log missing */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
-- 
cgit v1.2.3


From 32a370abf12f82c8383e430c21365f5355d8b288 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 12 Nov 2021 12:07:02 -0500
Subject: net,lsm,selinux: revert the security_sctp_assoc_established() hook

This patch reverts two prior patches, e7310c94024c
("security: implement sctp_assoc_established hook in selinux") and
7c2ef0240e6a ("security: add sctp_assoc_established hook"), which
create the security_sctp_assoc_established() LSM hook and provide a
SELinux implementation.  Unfortunately these two patches were merged
without proper review (the Reviewed-by and Tested-by tags from
Richard Haines were for previous revisions of these patches that
were significantly different) and there are outstanding objections
from the SELinux maintainers regarding these patches.

Work is currently ongoing to correct the problems identified in the
reverted patches, as well as others that have come up during review,
but it is unclear at this point in time when that work will be ready
for inclusion in the mainline kernel.  In the interest of not keeping
objectionable code in the kernel for multiple weeks, and potentially
a kernel release, we are reverting the two problematic patches.

Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 Documentation/security/SCTP.rst | 22 ++++++++++++----------
 include/linux/lsm_hook_defs.h   |  2 --
 include/linux/lsm_hooks.h       |  5 -----
 include/linux/security.h        |  7 -------
 net/sctp/sm_statefuns.c         |  2 +-
 security/security.c             |  7 -------
 security/selinux/hooks.c        | 14 +-------------
 7 files changed, 14 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index 406cc68b8808..d5fd6ccc3dcb 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,7 +15,10 @@ For security module support, three SCTP specific hooks have been implemented::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+
+Also the following security hook has been utilised::
+
+    security_inet_conn_established()
 
 The usage of these hooks are described below with the SELinux implementation
 described in the `SCTP SELinux Support`_ chapter.
@@ -119,12 +122,11 @@ calls **sctp_peeloff**\(3).
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received, and the peer secid will be
-saved into ``@asoc->peer_secid`` for client::
+Called when a COOKIE ACK is received::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
@@ -132,7 +134,7 @@ Security Hooks used for Association Establishment
 -------------------------------------------------
 
 The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
+``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
 establishing an association.
 ::
 
@@ -170,7 +172,7 @@ establishing an association.
           <------------------------------------------- COOKIE ACK
           |                                               |
     sctp_sf_do_5_1E_ca                                    |
- Call security_sctp_assoc_established()                   |
+ Call security_inet_conn_established()                    |
  to set the peer label.                                   |
           |                                               |
           |                               If SCTP_SOCKET_TCP or peeled off
@@ -196,7 +198,7 @@ hooks with the SELinux specifics expanded below::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+    security_inet_conn_established()
 
 
 security_sctp_assoc_request()
@@ -269,12 +271,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 442a611fa0fb..df8de62f4710 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -335,8 +335,6 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc,
-	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d6823214d5c1..d45b6f6e27fd 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1050,11 +1050,6 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
- * @sctp_assoc_established:
- *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
- *	to the security module.
- *	@asoc pointer to sctp association structure.
- *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index 06eac4e61a13..bbf44a466832 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1430,8 +1430,6 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1651,11 +1649,6 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *newsk)
 {
 }
-
-static inline void security_sctp_assoc_established(struct sctp_association *asoc,
-						   struct sk_buff *skb)
-{
-}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 39ba82ee87ce..354c1c4de19b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -946,7 +946,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
 
 	/* Set peer label for connection. */
-	security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb);
+	security_inet_conn_established(ep->base.sk, chunk->skb);
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
diff --git a/security/security.c b/security/security.c
index 779a9edea0a0..c88167a414b4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2388,13 +2388,6 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb)
-{
-	call_void_hook(sctp_assoc_established, asoc, skb);
-}
-EXPORT_SYMBOL(security_sctp_assoc_established);
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5e5215fe2e83..62d30c0a30c2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5502,8 +5502,7 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk
 	if (!selinux_policycap_extsockclass())
 		return selinux_sk_clone_security(sk, newsk);
 
-	if (asoc->secid != SECSID_WILD)
-		newsksec->sid = asoc->secid;
+	newsksec->sid = asoc->secid;
 	newsksec->peer_sid = asoc->peer_secid;
 	newsksec->sclass = sksec->sclass;
 	selinux_netlbl_sctp_sk_clone(sk, newsk);
@@ -5559,16 +5558,6 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
-static void selinux_sctp_assoc_established(struct sctp_association *asoc,
-					   struct sk_buff *skb)
-{
-	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
-
-	selinux_inet_conn_established(asoc->base.sk, skb);
-	asoc->peer_secid = sksec->peer_sid;
-	asoc->secid = SECSID_WILD;
-}
-
 static int selinux_secmark_relabel_packet(u32 sid)
 {
 	const struct task_security_struct *__tsec;
@@ -7239,7 +7228,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request),
 	LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone),
 	LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect),
-	LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established),
 	LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request),
 	LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone),
 	LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established),
-- 
cgit v1.2.3


From 1aa3b2207e889a948049c9a8016cedb0218c2389 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Fri, 12 Nov 2021 18:18:10 -0500
Subject: net,lsm,selinux: revert the security_sctp_assoc_established() hook

This patch reverts two prior patches, e7310c94024c
("security: implement sctp_assoc_established hook in selinux") and
7c2ef0240e6a ("security: add sctp_assoc_established hook"), which
create the security_sctp_assoc_established() LSM hook and provide a
SELinux implementation.  Unfortunately these two patches were merged
without proper review (the Reviewed-by and Tested-by tags from
Richard Haines were for previous revisions of these patches that
were significantly different) and there are outstanding objections
from the SELinux maintainers regarding these patches.

Work is currently ongoing to correct the problems identified in the
reverted patches, as well as others that have come up during review,
but it is unclear at this point in time when that work will be ready
for inclusion in the mainline kernel.  In the interest of not keeping
objectionable code in the kernel for multiple weeks, and potentially
a kernel release, we are reverting the two problematic patches.

Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/security/SCTP.rst | 22 ++++++++++++----------
 include/linux/lsm_hook_defs.h   |  2 --
 include/linux/lsm_hooks.h       |  5 -----
 include/linux/security.h        |  7 -------
 net/sctp/sm_statefuns.c         |  2 +-
 security/security.c             |  7 -------
 security/selinux/hooks.c        | 14 +-------------
 7 files changed, 14 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/security/SCTP.rst b/Documentation/security/SCTP.rst
index 406cc68b8808..d5fd6ccc3dcb 100644
--- a/Documentation/security/SCTP.rst
+++ b/Documentation/security/SCTP.rst
@@ -15,7 +15,10 @@ For security module support, three SCTP specific hooks have been implemented::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+
+Also the following security hook has been utilised::
+
+    security_inet_conn_established()
 
 The usage of these hooks are described below with the SELinux implementation
 described in the `SCTP SELinux Support`_ chapter.
@@ -119,12 +122,11 @@ calls **sctp_peeloff**\(3).
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-Called when a COOKIE ACK is received, and the peer secid will be
-saved into ``@asoc->peer_secid`` for client::
+Called when a COOKIE ACK is received::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
@@ -132,7 +134,7 @@ Security Hooks used for Association Establishment
 -------------------------------------------------
 
 The following diagram shows the use of ``security_sctp_bind_connect()``,
-``security_sctp_assoc_request()``, ``security_sctp_assoc_established()`` when
+``security_sctp_assoc_request()``, ``security_inet_conn_established()`` when
 establishing an association.
 ::
 
@@ -170,7 +172,7 @@ establishing an association.
           <------------------------------------------- COOKIE ACK
           |                                               |
     sctp_sf_do_5_1E_ca                                    |
- Call security_sctp_assoc_established()                   |
+ Call security_inet_conn_established()                    |
  to set the peer label.                                   |
           |                                               |
           |                               If SCTP_SOCKET_TCP or peeled off
@@ -196,7 +198,7 @@ hooks with the SELinux specifics expanded below::
     security_sctp_assoc_request()
     security_sctp_bind_connect()
     security_sctp_sk_clone()
-    security_sctp_assoc_established()
+    security_inet_conn_established()
 
 
 security_sctp_assoc_request()
@@ -269,12 +271,12 @@ sockets sid and peer sid to that contained in the ``@asoc sid`` and
     @newsk - pointer to new sock structure.
 
 
-security_sctp_assoc_established()
+security_inet_conn_established()
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Called when a COOKIE ACK is received where it sets the connection's peer sid
 to that in ``@skb``::
 
-    @asoc - pointer to sctp association structure.
+    @sk  - pointer to sock structure.
     @skb - pointer to skbuff of the COOKIE ACK packet.
 
 
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 442a611fa0fb..df8de62f4710 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -335,8 +335,6 @@ LSM_HOOK(int, 0, sctp_bind_connect, struct sock *sk, int optname,
 	 struct sockaddr *address, int addrlen)
 LSM_HOOK(void, LSM_RET_VOID, sctp_sk_clone, struct sctp_association *asoc,
 	 struct sock *sk, struct sock *newsk)
-LSM_HOOK(void, LSM_RET_VOID, sctp_assoc_established, struct sctp_association *asoc,
-	 struct sk_buff *skb)
 #endif /* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index d6823214d5c1..d45b6f6e27fd 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1050,11 +1050,6 @@
  *	@asoc pointer to current sctp association structure.
  *	@sk pointer to current sock structure.
  *	@newsk pointer to new sock structure.
- * @sctp_assoc_established:
- *	Passes the @asoc and @chunk->skb of the association COOKIE_ACK packet
- *	to the security module.
- *	@asoc pointer to sctp association structure.
- *	@skb pointer to skbuff of association packet.
  *
  * Security hooks for Infiniband
  *
diff --git a/include/linux/security.h b/include/linux/security.h
index 06eac4e61a13..bbf44a466832 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1430,8 +1430,6 @@ int security_sctp_bind_connect(struct sock *sk, int optname,
 			       struct sockaddr *address, int addrlen);
 void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 			    struct sock *newsk);
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct sock *sock,
@@ -1651,11 +1649,6 @@ static inline void security_sctp_sk_clone(struct sctp_association *asoc,
 					  struct sock *newsk)
 {
 }
-
-static inline void security_sctp_assoc_established(struct sctp_association *asoc,
-						   struct sk_buff *skb)
-{
-}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 39ba82ee87ce..354c1c4de19b 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -946,7 +946,7 @@ enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net,
 	sctp_add_cmd_sf(commands, SCTP_CMD_INIT_COUNTER_RESET, SCTP_NULL());
 
 	/* Set peer label for connection. */
-	security_sctp_assoc_established((struct sctp_association *)asoc, chunk->skb);
+	security_inet_conn_established(ep->base.sk, chunk->skb);
 
 	/* RFC 2960 5.1 Normal Establishment of an Association
 	 *
diff --git a/security/security.c b/security/security.c
index 779a9edea0a0..c88167a414b4 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2388,13 +2388,6 @@ void security_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk,
 }
 EXPORT_SYMBOL(security_sctp_sk_clone);
 
-void security_sctp_assoc_established(struct sctp_association *asoc,
-				     struct sk_buff *skb)
-{
-	call_void_hook(sctp_assoc_established, asoc, skb);
-}
-EXPORT_SYMBOL(security_sctp_assoc_established);
-
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_INFINIBAND
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5e5215fe2e83..62d30c0a30c2 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5502,8 +5502,7 @@ static void selinux_sctp_sk_clone(struct sctp_association *asoc, struct sock *sk
 	if (!selinux_policycap_extsockclass())
 		return selinux_sk_clone_security(sk, newsk);
 
-	if (asoc->secid != SECSID_WILD)
-		newsksec->sid = asoc->secid;
+	newsksec->sid = asoc->secid;
 	newsksec->peer_sid = asoc->peer_secid;
 	newsksec->sclass = sksec->sclass;
 	selinux_netlbl_sctp_sk_clone(sk, newsk);
@@ -5559,16 +5558,6 @@ static void selinux_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 	selinux_skb_peerlbl_sid(skb, family, &sksec->peer_sid);
 }
 
-static void selinux_sctp_assoc_established(struct sctp_association *asoc,
-					   struct sk_buff *skb)
-{
-	struct sk_security_struct *sksec = asoc->base.sk->sk_security;
-
-	selinux_inet_conn_established(asoc->base.sk, skb);
-	asoc->peer_secid = sksec->peer_sid;
-	asoc->secid = SECSID_WILD;
-}
-
 static int selinux_secmark_relabel_packet(u32 sid)
 {
 	const struct task_security_struct *__tsec;
@@ -7239,7 +7228,6 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
 	LSM_HOOK_INIT(sctp_assoc_request, selinux_sctp_assoc_request),
 	LSM_HOOK_INIT(sctp_sk_clone, selinux_sctp_sk_clone),
 	LSM_HOOK_INIT(sctp_bind_connect, selinux_sctp_bind_connect),
-	LSM_HOOK_INIT(sctp_assoc_established, selinux_sctp_assoc_established),
 	LSM_HOOK_INIT(inet_conn_request, selinux_inet_conn_request),
 	LSM_HOOK_INIT(inet_csk_clone, selinux_inet_csk_clone),
 	LSM_HOOK_INIT(inet_conn_established, selinux_inet_conn_established),
-- 
cgit v1.2.3


From 938aa33f14657c9ed9deea348b7d6f14b6d69cb7 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Sun, 14 Nov 2021 13:28:34 -0500
Subject: tracing: Add length protection to histogram string copies

The string copies to the histogram storage has a max size of 256 bytes
(defined by MAX_FILTER_STR_VAL). Only the string size of the event field
needs to be copied to the event storage, but no more than what is in the
event storage. Although nothing should be bigger than 256 bytes, there's
no protection against overwriting of the storage if one day there is.

Copy no more than the destination size, and enforce it.

Also had to turn MAX_FILTER_STR_VAL into an unsigned int, to keep the
min() comparison of the string sizes of comparable types.

Link: https://lore.kernel.org/all/CAHk-=wjREUihCGrtRBwfX47y_KrLCGjiq3t6QtoNJpmVrAEb1w@mail.gmail.com/
Link: https://lkml.kernel.org/r/20211114132834.183429a4@rorschach.local.home

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 63f84ae6b82b ("tracing/histogram: Do not copy the fixed-size char array field over the field size")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h     | 2 +-
 kernel/trace/trace_events_hist.c | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 50453b287615..2d167ac3452c 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -673,7 +673,7 @@ struct trace_event_file {
 
 #define PERF_MAX_TRACE_SIZE	8192
 
-#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
+#define MAX_FILTER_STR_VAL	256U	/* Should handle KSYM_SYMBOL_LEN */
 
 enum event_trigger_type {
 	ETT_NONE		= (0),
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1475d7347fe0..34afcaebd0e5 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3026,8 +3026,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
 		if (val->flags & HIST_FIELD_FL_STRING) {
 			char *str = elt_data->field_var_str[j++];
 			char *val_str = (char *)(uintptr_t)var_val;
+			unsigned int size;
 
-			strscpy(str, val_str, val->size);
+			size = min(val->size, STR_VAR_LEN_MAX);
+			strscpy(str, val_str, size);
 			var_val = (u64)(uintptr_t)str;
 		}
 		tracing_map_set_var(elt, var_idx, var_val);
@@ -4914,6 +4916,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 			if (hist_field->flags & HIST_FIELD_FL_STRING) {
 				unsigned int str_start, var_str_idx, idx;
 				char *str, *val_str;
+				unsigned int size;
 
 				str_start = hist_data->n_field_var_str +
 					hist_data->n_save_var_str;
@@ -4922,7 +4925,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 
 				str = elt_data->field_var_str[idx];
 				val_str = (char *)(uintptr_t)hist_val;
-				strscpy(str, val_str, hist_field->size);
+
+				size = min(hist_field->size, STR_VAR_LEN_MAX);
+				strscpy(str, val_str, size);
 
 				hist_val = (u64)(uintptr_t)str;
 			}
-- 
cgit v1.2.3


From 10a2308ffb8cf262e473eb324fde42ae31b6da04 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Fri, 12 Nov 2021 18:16:34 +0800
Subject: net: Clean up some inconsistent indenting

Eliminate the follow smatch warning:

./include/linux/skbuff.h:4229 skb_remcsum_process() warn: inconsistent
indenting.

Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 686a666d073d..c8cb7e697d47 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4226,7 +4226,7 @@ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
 		return;
 	}
 
-	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
+	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
 		__skb_checksum_complete(skb);
 		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
 	}
-- 
cgit v1.2.3


From a0ddee65c527d877e798205c1391c6170e580c66 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 12 Nov 2021 16:07:49 +0200
Subject: printk: Remove printk.h inclusion in percpu.h

After the commit 42a0bb3f7138 ("printk/nmi: generic solution for safe
printk in NMI") the printk.h is not needed anymore in percpu.h.

Moreover `make headerdep` complains (an excerpt)

In file included from linux/printk.h,
                 from linux/dynamic_debug.h:188
                 from linux/printk.h:559 <-- here
                 from linux/percpu.h:9
                 from linux/idr.h:17
include/net/9p/client.h:13: warning: recursive header inclusion

Yeah, it's not a root cause of this, but removing will help to reduce
the noise.

Fixes: 42a0bb3f7138 ("printk/nmi: generic solution for safe printk in NMI")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Link: https://lore.kernel.org/r/20211112140749.80042-1-andriy.shevchenko@linux.intel.com
---
 include/linux/percpu.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 5e76af742c80..4fa3000f9c22 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -6,7 +6,6 @@
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
-#include <linux/printk.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 
-- 
cgit v1.2.3


From e9380df851878cee71df5a1c7611584421527f7e Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Sun, 31 Oct 2021 20:48:52 -0500
Subject: ACPI: Add stubs for wakeup handler functions

The commit ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()")
added new functions for drivers to use during the s2idle wakeup path, but
didn't add stubs for when CONFIG_ACPI wasn't set.

Add those stubs in for other drivers to be able to use.

Fixes: ddfd9dcf270c ("ACPI: PM: Add acpi_[un]register_wakeup_handler()")
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Link: https://lore.kernel.org/r/20211101014853.6177-1-mario.limonciello@amd.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/acpi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 143ce7e0bee1..668d007f0917 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -974,6 +974,15 @@ static inline int acpi_get_local_address(acpi_handle handle, u32 *addr)
 	return -ENODEV;
 }
 
+static inline int acpi_register_wakeup_handler(int wake_irq,
+	bool (*wakeup)(void *context), void *context)
+{
+	return -ENXIO;
+}
+
+static inline void acpi_unregister_wakeup_handler(
+	bool (*wakeup)(void *context), void *context) { }
+
 #endif	/* !CONFIG_ACPI */
 
 #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
-- 
cgit v1.2.3


From a3143f7822a9eeb38f0e046080ae8f79f6c7122d Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Tue, 2 Nov 2021 16:01:58 -0600
Subject: Remove unused header <linux/sdb.h>

Commit 6a80b30086b8 ("fmc: Delete the FMC subsystem") removed the last user
of <linux/sdb.h>, but left the header file behind.  Nothing uses this file,
delete it now.

Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Alessandro Rubini <rubini@gnudd.com>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Acked-by: Alessandro Rubini <rubini@gnudd.com>
Link: https://lore.kernel.org/r/20211102220203.940290-5-corbet@lwn.net
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/sdb.h | 160 ----------------------------------------------------
 1 file changed, 160 deletions(-)
 delete mode 100644 include/linux/sdb.h

(limited to 'include/linux')

diff --git a/include/linux/sdb.h b/include/linux/sdb.h
deleted file mode 100644
index a2404a2bbd10..000000000000
--- a/include/linux/sdb.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the official version 1.1 of sdb.h
- */
-#ifndef __SDB_H__
-#define __SDB_H__
-#ifdef __KERNEL__
-#include <linux/types.h>
-#else
-#include <stdint.h>
-#endif
-
-/*
- * All structures are 64 bytes long and are expected
- * to live in an array, one for each interconnect.
- * Most fields of the structures are shared among the
- * various types, and most-specific fields are at the
- * beginning (for alignment reasons, and to keep the
- * magic number at the head of the interconnect record
- */
-
-/* Product, 40 bytes at offset 24, 8-byte aligned
- *
- * device_id is vendor-assigned; version is device-specific,
- * date is hex (e.g 0x20120501), name is UTF-8, blank-filled
- * and not terminated with a 0 byte.
- */
-struct sdb_product {
-	uint64_t		vendor_id;	/* 0x18..0x1f */
-	uint32_t		device_id;	/* 0x20..0x23 */
-	uint32_t		version;	/* 0x24..0x27 */
-	uint32_t		date;		/* 0x28..0x2b */
-	uint8_t			name[19];	/* 0x2c..0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/*
- * Component, 56 bytes at offset 8, 8-byte aligned
- *
- * The address range is first to last, inclusive
- * (for example 0x100000 - 0x10ffff)
- */
-struct sdb_component {
-	uint64_t		addr_first;	/* 0x08..0x0f */
-	uint64_t		addr_last;	/* 0x10..0x17 */
-	struct sdb_product	product;	/* 0x18..0x3f */
-};
-
-/* Type of the SDB record */
-enum sdb_record_type {
-	sdb_type_interconnect	= 0x00,
-	sdb_type_device		= 0x01,
-	sdb_type_bridge		= 0x02,
-	sdb_type_integration	= 0x80,
-	sdb_type_repo_url	= 0x81,
-	sdb_type_synthesis	= 0x82,
-	sdb_type_empty		= 0xFF,
-};
-
-/* Type 0: interconnect (first of the array)
- *
- * sdb_records is the length of the table including this first
- * record, version is 1. The bus type is enumerated later.
- */
-#define				SDB_MAGIC	0x5344422d /* "SDB-" */
-struct sdb_interconnect {
-	uint32_t		sdb_magic;	/* 0x00-0x03 */
-	uint16_t		sdb_records;	/* 0x04-0x05 */
-	uint8_t			sdb_version;	/* 0x06 */
-	uint8_t			sdb_bus_type;	/* 0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 1: device
- *
- * class is 0 for "custom device", other values are
- * to be standardized; ABI version is for the driver,
- * bus-specific bits are defined by each bus (see below)
- */
-struct sdb_device {
-	uint16_t		abi_class;	/* 0x00-0x01 */
-	uint8_t			abi_ver_major;	/* 0x02 */
-	uint8_t			abi_ver_minor;	/* 0x03 */
-	uint32_t		bus_specific;	/* 0x04-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 2: bridge
- *
- * child is the address of the nested SDB table
- */
-struct sdb_bridge {
-	uint64_t		sdb_child;	/* 0x00-0x07 */
-	struct sdb_component	sdb_component;	/* 0x08-0x3f */
-};
-
-/* Type 0x80: integration
- *
- * all types with bit 7 set are meta-information, so
- * software can ignore the types it doesn't know. Here we
- * just provide product information for an aggregate device
- */
-struct sdb_integration {
-	uint8_t			reserved[24];	/* 0x00-0x17 */
-	struct sdb_product	product;	/* 0x08-0x3f */
-};
-
-/* Type 0x81: Top module repository url
- *
- * again, an informative field that software can ignore
- */
-struct sdb_repo_url {
-	uint8_t			repo_url[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0x82: Synthesis tool information
- *
- * this informative record
- */
-struct sdb_synthesis {
-	uint8_t			syn_name[16];	/* 0x00-0x0f */
-	uint8_t			commit_id[16];	/* 0x10-0x1f */
-	uint8_t			tool_name[8];	/* 0x20-0x27 */
-	uint32_t		tool_version;	/* 0x28-0x2b */
-	uint32_t		date;		/* 0x2c-0x2f */
-	uint8_t			user_name[15];	/* 0x30-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* Type 0xff: empty
- *
- * this allows keeping empty slots during development,
- * so they can be filled later with minimal efforts and
- * no misleading description is ever shipped -- hopefully.
- * It can also be used to pad a table to a desired length.
- */
-struct sdb_empty {
-	uint8_t			reserved[63];	/* 0x00-0x3e */
-	uint8_t			record_type;	/* 0x3f */
-};
-
-/* The type of bus, for bus-specific flags */
-enum sdb_bus_type {
-	sdb_wishbone = 0x00,
-	sdb_data     = 0x01,
-};
-
-#define SDB_WB_WIDTH_MASK	0x0f
-#define SDB_WB_ACCESS8			0x01
-#define SDB_WB_ACCESS16			0x02
-#define SDB_WB_ACCESS32			0x04
-#define SDB_WB_ACCESS64			0x08
-#define SDB_WB_LITTLE_ENDIAN	0x80
-
-#define SDB_DATA_READ		0x04
-#define SDB_DATA_WRITE		0x02
-#define SDB_DATA_EXEC		0x01
-
-#endif /* __SDB_H__ */
-- 
cgit v1.2.3


From 353050be4c19e102178ccc05988101887c25ae53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 9 Nov 2021 18:48:08 +0000
Subject: bpf: Fix toctou on read-only map's constant scalar tracking

Commit a23740ec43ba ("bpf: Track contents of read-only maps as scalars") is
checking whether maps are read-only both from BPF program side and user space
side, and then, given their content is constant, reading out their data via
map->ops->map_direct_value_addr() which is then subsequently used as known
scalar value for the register, that is, it is marked as __mark_reg_known()
with the read value at verification time. Before a23740ec43ba, the register
content was marked as an unknown scalar so the verifier could not make any
assumptions about the map content.

The current implementation however is prone to a TOCTOU race, meaning, the
value read as known scalar for the register is not guaranteed to be exactly
the same at a later point when the program is executed, and as such, the
prior made assumptions of the verifier with regards to the program will be
invalid which can cause issues such as OOB access, etc.

While the BPF_F_RDONLY_PROG map flag is always fixed and required to be
specified at map creation time, the map->frozen property is initially set to
false for the map given the map value needs to be populated, e.g. for global
data sections. Once complete, the loader "freezes" the map from user space
such that no subsequent updates/deletes are possible anymore. For the rest
of the lifetime of the map, this freeze one-time trigger cannot be undone
anymore after a successful BPF_MAP_FREEZE cmd return. Meaning, any new BPF_*
cmd calls which would update/delete map entries will be rejected with -EPERM
since map_get_sys_perms() removes the FMODE_CAN_WRITE permission. This also
means that pending update/delete map entries must still complete before this
guarantee is given. This corner case is not an issue for loaders since they
create and prepare such program private map in successive steps.

However, a malicious user is able to trigger this TOCTOU race in two different
ways: i) via userfaultfd, and ii) via batched updates. For i) userfaultfd is
used to expand the competition interval, so that map_update_elem() can modify
the contents of the map after map_freeze() and bpf_prog_load() were executed.
This works, because userfaultfd halts the parallel thread which triggered a
map_update_elem() at the time where we copy key/value from the user buffer and
this already passed the FMODE_CAN_WRITE capability test given at that time the
map was not "frozen". Then, the main thread performs the map_freeze() and
bpf_prog_load(), and once that had completed successfully, the other thread
is woken up to complete the pending map_update_elem() which then changes the
map content. For ii) the idea of the batched update is similar, meaning, when
there are a large number of updates to be processed, it can increase the
competition interval between the two. It is therefore possible in practice to
modify the contents of the map after executing map_freeze() and bpf_prog_load().

One way to fix both i) and ii) at the same time is to expand the use of the
map's map->writecnt. The latter was introduced in fc9702273e2e ("bpf: Add mmap()
support for BPF_MAP_TYPE_ARRAY") and further refined in 1f6cb19be2e2 ("bpf:
Prevent re-mmap()'ing BPF map as writable for initially r/o mapping") with
the rationale to make a writable mmap()'ing of a map mutually exclusive with
read-only freezing. The counter indicates writable mmap() mappings and then
prevents/fails the freeze operation. Its semantics can be expanded beyond
just mmap() by generally indicating ongoing write phases. This would essentially
span any parallel regular and batched flavor of update/delete operation and
then also have map_freeze() fail with -EBUSY. For the check_mem_access() in
the verifier we expand upon the bpf_map_is_rdonly() check ensuring that all
last pending writes have completed via bpf_map_write_active() test. Once the
map->frozen is set and bpf_map_write_active() indicates a map->writecnt of 0
only then we are really guaranteed to use the map's data as known constants.
For map->frozen being set and pending writes in process of still being completed
we fall back to marking that register as unknown scalar so we don't end up
making assumptions about it. With this, both TOCTOU reproducers from i) and
ii) are fixed.

Note that the map->writecnt has been converted into a atomic64 in the fix in
order to avoid a double freeze_mutex mutex_{un,}lock() pair when updating
map->writecnt in the various map update/delete BPF_* cmd flavors. Spanning
the freeze_mutex over entire map update/delete operations in syscall side
would not be possible due to then causing everything to be serialized.
Similarly, something like synchronize_rcu() after setting map->frozen to wait
for update/deletes to complete is not possible either since it would also
have to span the user copy which can sleep. On the libbpf side, this won't
break d66562fba1ce ("libbpf: Add BPF object skeleton support") as the
anonymous mmap()-ed "map initialization image" is remapped as a BPF map-backed
mmap()-ed memory where for .rodata it's non-writable.

Fixes: a23740ec43ba ("bpf: Track contents of read-only maps as scalars")
Reported-by: w1tcher.bupt@gmail.com
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h   |  3 ++-
 kernel/bpf/syscall.c  | 57 ++++++++++++++++++++++++++++++++-------------------
 kernel/bpf/verifier.c | 17 ++++++++++++++-
 3 files changed, 54 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f715e8863f4d..e7a163a3146b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,7 @@ struct bpf_map {
 	atomic64_t usercnt;
 	struct work_struct work;
 	struct mutex freeze_mutex;
-	u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
+	atomic64_t writecnt;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -1419,6 +1419,7 @@ void bpf_map_put(struct bpf_map *map);
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
+bool bpf_map_write_active(const struct bpf_map *map);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 int  generic_map_lookup_batch(struct bpf_map *map,
 			      const union bpf_attr *attr,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 50f96ea4452a..1033ee8c0caf 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
 	return map;
 }
 
+static void bpf_map_write_active_inc(struct bpf_map *map)
+{
+	atomic64_inc(&map->writecnt);
+}
+
+static void bpf_map_write_active_dec(struct bpf_map *map)
+{
+	atomic64_dec(&map->writecnt);
+}
+
+bool bpf_map_write_active(const struct bpf_map *map)
+{
+	return atomic64_read(&map->writecnt) != 0;
+}
+
 static u32 bpf_map_value_size(const struct bpf_map *map)
 {
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -601,11 +616,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	if (vma->vm_flags & VM_MAYWRITE) {
-		mutex_lock(&map->freeze_mutex);
-		map->writecnt++;
-		mutex_unlock(&map->freeze_mutex);
-	}
+	if (vma->vm_flags & VM_MAYWRITE)
+		bpf_map_write_active_inc(map);
 }
 
 /* called for all unmapped memory region (including initial) */
@@ -613,11 +625,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma)
 {
 	struct bpf_map *map = vma->vm_file->private_data;
 
-	if (vma->vm_flags & VM_MAYWRITE) {
-		mutex_lock(&map->freeze_mutex);
-		map->writecnt--;
-		mutex_unlock(&map->freeze_mutex);
-	}
+	if (vma->vm_flags & VM_MAYWRITE)
+		bpf_map_write_active_dec(map);
 }
 
 static const struct vm_operations_struct bpf_map_default_vmops = {
@@ -668,7 +677,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
 		goto out;
 
 	if (vma->vm_flags & VM_MAYWRITE)
-		map->writecnt++;
+		bpf_map_write_active_inc(map);
 out:
 	mutex_unlock(&map->freeze_mutex);
 	return err;
@@ -1139,6 +1148,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
@@ -1174,6 +1184,7 @@ free_value:
 free_key:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1196,6 +1207,7 @@ static int map_delete_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
@@ -1226,6 +1238,7 @@ static int map_delete_elem(union bpf_attr *attr)
 out:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1533,6 +1546,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
+	bpf_map_write_active_inc(map);
 	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
 	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
@@ -1597,6 +1611,7 @@ free_value:
 free_key:
 	kvfree(key);
 err_put:
+	bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
@@ -1624,8 +1639,7 @@ static int map_freeze(const union bpf_attr *attr)
 	}
 
 	mutex_lock(&map->freeze_mutex);
-
-	if (map->writecnt) {
+	if (bpf_map_write_active(map)) {
 		err = -EBUSY;
 		goto err_put;
 	}
@@ -4171,6 +4185,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
 			    union bpf_attr __user *uattr,
 			    int cmd)
 {
+	bool has_read  = cmd == BPF_MAP_LOOKUP_BATCH ||
+			 cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
+	bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
 	struct bpf_map *map;
 	int err, ufd;
 	struct fd f;
@@ -4183,16 +4200,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-
-	if ((cmd == BPF_MAP_LOOKUP_BATCH ||
-	     cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
-	    !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+	if (has_write)
+		bpf_map_write_active_inc(map);
+	if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
 		err = -EPERM;
 		goto err_put;
 	}
-
-	if (cmd != BPF_MAP_LOOKUP_BATCH &&
-	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+	if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -4205,8 +4219,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
 		BPF_DO_BATCH(map->ops->map_update_batch);
 	else
 		BPF_DO_BATCH(map->ops->map_delete_batch);
-
 err_put:
+	if (has_write)
+		bpf_map_write_active_dec(map);
 	fdput(f);
 	return err;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 65d2f93b7030..50efda51515b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4056,7 +4056,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 
 static bool bpf_map_is_rdonly(const struct bpf_map *map)
 {
-	return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+	/* A map is considered read-only if the following condition are true:
+	 *
+	 * 1) BPF program side cannot change any of the map content. The
+	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
+	 *    and was set at map creation time.
+	 * 2) The map value(s) have been initialized from user space by a
+	 *    loader and then "frozen", such that no new map update/delete
+	 *    operations from syscall side are possible for the rest of
+	 *    the map's lifetime from that point onwards.
+	 * 3) Any parallel/pending map update/delete operations from syscall
+	 *    side have been completed. Only after that point, it's safe to
+	 *    assume that map value(s) are immutable.
+	 */
+	return (map->map_flags & BPF_F_RDONLY_PROG) &&
+	       READ_ONCE(map->frozen) &&
+	       !bpf_map_write_active(map);
 }
 
 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
-- 
cgit v1.2.3


From d7751d6476185ff754b9dad2cba0c0a6e43ecadc Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Thu, 20 May 2021 17:09:58 +0300
Subject: net/mlx5: E-Switch, Fix resetting of encap mode when entering
 switchdev

E-Switch encap mode is relevant only when in switchdev mode.
The RDMA driver can query the encap configuration via
mlx5_eswitch_get_encap_mode(). Make sure it returns the currently
used mode and not the set one.

This reverts the cited commit which reset the encap mode
on entering switchdev and fixes the original issue properly.

Fixes: 9a64144d683a ("net/mlx5: E-Switch, Fix default encap mode")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c          | 9 +++++++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 7 -------
 include/linux/mlx5/eswitch.h                               | 4 ++--
 3 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ec136b499204..5872cc8bf953 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1572,6 +1572,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->enabled_vports = 0;
 	esw->mode = MLX5_ESWITCH_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
+	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+	else
+		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
@@ -1934,7 +1939,7 @@ free_out:
 	return err;
 }
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
 	struct mlx5_eswitch *esw = dev->priv.eswitch;
 
@@ -1948,7 +1953,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
 	struct mlx5_eswitch *esw;
 
 	esw = dev->priv.eswitch;
-	return mlx5_esw_allowed(esw) ? esw->offloads.encap :
+	return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS)  ? esw->offloads.encap :
 		DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f4eaa5893886..80fa76f60e1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3183,12 +3183,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
 	u64 mapping_id;
 	int err;
 
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
-	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
-	else
-		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-
 	mutex_init(&esw->offloads.termtbl_mutex);
 	mlx5_rdma_enable_roce(esw->dev);
 
@@ -3286,7 +3280,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
 	esw_offloads_metadata_uninit(esw);
 	mlx5_rdma_disable_roce(esw->dev);
 	mutex_destroy(&esw->offloads.termtbl_mutex);
-	esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h
index 97afcea39a7b..8b18fe9771f9 100644
--- a/include/linux/mlx5/eswitch.h
+++ b/include/linux/mlx5/eswitch.h
@@ -145,13 +145,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
 	GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
 		ESW_TUN_OPTS_OFFSET + 1)
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
 struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
 	return MLX5_ESWITCH_NONE;
 }
-- 
cgit v1.2.3


From 49c39ec4670a8f045729e3717af2e1a74caf89a5 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 13 Sep 2021 14:21:25 +0200
Subject: dma-buf: nuke dma_resv_get_excl_unlocked
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Heureka, that's finally not used any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210917123513.1106-27-christian.koenig@amd.com
---
 include/linux/dma-resv.h | 26 --------------------------
 1 file changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 09c6063b199a..eebf04325b34 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -440,32 +440,6 @@ dma_resv_excl_fence(struct dma_resv *obj)
 	return rcu_dereference_check(obj->fence_excl, dma_resv_held(obj));
 }
 
-/**
- * dma_resv_get_excl_unlocked - get the reservation object's
- * exclusive fence, without lock held.
- * @obj: the reservation object
- *
- * If there is an exclusive fence, this atomically increments it's
- * reference count and returns it.
- *
- * RETURNS
- * The exclusive fence or NULL if none
- */
-static inline struct dma_fence *
-dma_resv_get_excl_unlocked(struct dma_resv *obj)
-{
-	struct dma_fence *fence;
-
-	if (!rcu_access_pointer(obj->fence_excl))
-		return NULL;
-
-	rcu_read_lock();
-	fence = dma_fence_get_rcu_safe(&obj->fence_excl);
-	rcu_read_unlock();
-
-	return fence;
-}
-
 /**
  * dma_resv_shared_list - get the reservation object's shared fence list
  * @obj: the reservation object
-- 
cgit v1.2.3


From f45b2974cc0ae959a4c503a071e38a56bd64372f Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn@kernel.org>
Date: Wed, 17 Nov 2021 13:57:08 +0100
Subject: bpf, x86: Fix "no previous prototype" warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The arch_prepare_bpf_dispatcher function does not have a prototype, and
yields the following warning when W=1 is enabled for the kernel build.

  >> arch/x86/net/bpf_jit_comp.c:2188:5: warning: no previous \
  prototype for 'arch_prepare_bpf_dispatcher' [-Wmissing-prototypes]
        2188 | int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, \
	int num_funcs)
             |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~

Remove the warning by adding a function declaration to include/linux/bpf.h.

Fixes: 75ccbef6369e ("bpf: Introduce BPF dispatcher")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Björn Töpel <bjorn@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211117125708.769168-1-bjorn@kernel.org
---
 include/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e7a163a3146b..84ff6ef49462 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -732,6 +732,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
 struct bpf_trampoline *bpf_trampoline_get(u64 key,
 					  struct bpf_attach_target_info *tgt_info);
 void bpf_trampoline_put(struct bpf_trampoline *tr);
+int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs);
 #define BPF_DISPATCHER_INIT(_name) {				\
 	.mutex = __MUTEX_INITIALIZER(_name.mutex),		\
 	.func = &_name##_func,					\
-- 
cgit v1.2.3


From cf9acc90c80ecbee00334aa85d92f4e74014bcff Mon Sep 17 00:00:00 2001
From: Jonathan Davies <jonathan.davies@nutanix.com>
Date: Tue, 16 Nov 2021 17:42:42 +0000
Subject: net: virtio_net_hdr_to_skb: count transport header in UFO

virtio_net_hdr_to_skb does not set the skb's gso_size and gso_type
correctly for UFO packets received via virtio-net that are a little over
the GSO size. This can lead to problems elsewhere in the networking
stack, e.g. ovs_vport_send dropping over-sized packets if gso_size is
not set.

This is due to the comparison

  if (skb->len - p_off > gso_size)

not properly accounting for the transport layer header.

p_off includes the size of the transport layer header (thlen), so
skb->len - p_off is the size of the TCP/UDP payload.

gso_size is read from the virtio-net header. For UFO, fragmentation
happens at the IP level so does not need to include the UDP header.

Hence the calculation could be comparing a TCP/UDP payload length with
an IP payload length, causing legitimate virtio-net packets to have
lack gso_type/gso_size information.

Example: a UDP packet with payload size 1473 has IP payload size 1481.
If the guest used UFO, it is not fragmented and the virtio-net header's
flags indicate that it is a GSO frame (VIRTIO_NET_HDR_GSO_UDP), with
gso_size = 1480 for an MTU of 1500.  skb->len will be 1515 and p_off
will be 42, so skb->len - p_off = 1473.  Hence the comparison fails, and
shinfo->gso_size and gso_type are not set as they should be.

Instead, add the UDP header length before comparing to gso_size when
using UFO. In this way, it is the size of the IP payload that is
compared to gso_size.

Fixes: 6dd912f82680 ("net: check untrusted gso_size at kernel entry")
Signed-off-by: Jonathan Davies <jonathan.davies@nutanix.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index b465f8f3e554..04e87f4b9417 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -120,10 +120,15 @@ retry:
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
 		u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+		unsigned int nh_off = p_off;
 		struct skb_shared_info *shinfo = skb_shinfo(skb);
 
+		/* UFO may not include transport header in gso_size. */
+		if (gso_type & SKB_GSO_UDP)
+			nh_off -= thlen;
+
 		/* Too small packets are not really GSO ones. */
-		if (skb->len - p_off > gso_size) {
+		if (skb->len - nh_off > gso_size) {
 			shinfo->gso_size = gso_size;
 			shinfo->gso_type = gso_type;
 
-- 
cgit v1.2.3


From 522a0032af005502507f5f81ae64fdcc82b5d068 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sat, 6 Nov 2021 17:13:35 -0400
Subject: Add linux/cacheflush.h

Many architectures do not include asm-generic/cacheflush.h, so turn
the includes on their head and add linux/cacheflush.h which includes
asm/cacheflush.h.

Move the flush_dcache_folio() declaration from asm-generic/cacheflush.h
to linux/cacheflush.h and change linux/highmem.h to include
linux/cacheflush.h instead of asm/cacheflush.h so that all necessary
places will see flush_dcache_folio().

More functions should have their default implementations moved in the
future, but those are for follow-on patches.  This fixes csky, sparc and
sparc64 which were missed in the commit which added flush_dcache_folio().

Fixes: 08b0b0059bf1 ("mm: Add flush_dcache_folio()")
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/arc/include/asm/cacheflush.h     |  1 -
 arch/arm/include/asm/cacheflush.h     |  1 -
 arch/m68k/include/asm/cacheflush_mm.h |  1 -
 arch/mips/include/asm/cacheflush.h    |  2 --
 arch/nds32/include/asm/cacheflush.h   |  1 -
 arch/nios2/include/asm/cacheflush.h   |  1 -
 arch/parisc/include/asm/cacheflush.h  |  1 -
 arch/sh/include/asm/cacheflush.h      |  1 -
 arch/xtensa/include/asm/cacheflush.h  |  3 ---
 include/asm-generic/cacheflush.h      |  6 ------
 include/linux/cacheflush.h            | 18 ++++++++++++++++++
 include/linux/highmem.h               |  3 +--
 12 files changed, 19 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/cacheflush.h

(limited to 'include/linux')

diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h
index e8c2c7469e10..e201b4b1655a 100644
--- a/arch/arc/include/asm/cacheflush.h
+++ b/arch/arc/include/asm/cacheflush.h
@@ -36,7 +36,6 @@ void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 void dma_cache_wback_inv(phys_addr_t start, unsigned long sz);
 void dma_cache_inv(phys_addr_t start, unsigned long sz);
diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h
index e68fb879e4f9..5e56288e343b 100644
--- a/arch/arm/include/asm/cacheflush.h
+++ b/arch/arm/include/asm/cacheflush.h
@@ -290,7 +290,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr
  */
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 extern void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *folio);
 
 #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1
 static inline void flush_kernel_vmap_range(void *addr, int size)
diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h
index 8ab46625ddd3..1ac55e7b47f0 100644
--- a/arch/m68k/include/asm/cacheflush_mm.h
+++ b/arch/m68k/include/asm/cacheflush_mm.h
@@ -250,7 +250,6 @@ static inline void __flush_page_to_ram(void *vaddr)
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 #define flush_dcache_page(page)		__flush_page_to_ram(page_address(page))
-void flush_dcache_folio(struct folio *folio);
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 #define flush_icache_page(vma, page)	__flush_page_to_ram(page_address(page))
diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h
index f207388541d5..b3dc9c589442 100644
--- a/arch/mips/include/asm/cacheflush.h
+++ b/arch/mips/include/asm/cacheflush.h
@@ -61,8 +61,6 @@ static inline void flush_dcache_page(struct page *page)
 		SetPageDcacheDirty(page);
 }
 
-void flush_dcache_folio(struct folio *folio);
-
 #define flush_dcache_mmap_lock(mapping)		do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)	do { } while (0)
 
diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h
index 3fc0bb7d6487..c2a222ebfa2a 100644
--- a/arch/nds32/include/asm/cacheflush.h
+++ b/arch/nds32/include/asm/cacheflush.h
@@ -27,7 +27,6 @@ void flush_cache_vunmap(unsigned long start, unsigned long end);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
 		       unsigned long vaddr, void *dst, void *src, int len);
 void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h
index 1999561b22aa..d0b71dd71287 100644
--- a/arch/nios2/include/asm/cacheflush.h
+++ b/arch/nios2/include/asm/cacheflush.h
@@ -29,7 +29,6 @@ extern void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr,
 	unsigned long pfn);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 extern void flush_icache_range(unsigned long start, unsigned long end);
 extern void flush_icache_page(struct vm_area_struct *vma, struct page *page);
diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index da0cd4b3a28f..859b8a34adcf 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -50,7 +50,6 @@ void invalidate_kernel_vmap_range(void *vaddr, int size);
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 
 #define flush_dcache_mmap_lock(mapping)		xa_lock_irq(&mapping->i_pages)
 #define flush_dcache_mmap_unlock(mapping)	xa_unlock_irq(&mapping->i_pages)
diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h
index c7a97f32432f..481a664287e2 100644
--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -43,7 +43,6 @@ extern void flush_cache_range(struct vm_area_struct *vma,
 				 unsigned long start, unsigned long end);
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *page);
-void flush_dcache_folio(struct folio *folio);
 extern void flush_icache_range(unsigned long start, unsigned long end);
 #define flush_icache_user_range flush_icache_range
 extern void flush_icache_page(struct vm_area_struct *vma,
diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h
index a8a041609c5d..7b4359312c25 100644
--- a/arch/xtensa/include/asm/cacheflush.h
+++ b/arch/xtensa/include/asm/cacheflush.h
@@ -121,7 +121,6 @@ void flush_cache_page(struct vm_area_struct*,
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
 void flush_dcache_page(struct page *);
-void flush_dcache_folio(struct folio *);
 
 void local_flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end);
@@ -138,9 +137,7 @@ void local_flush_cache_page(struct vm_area_struct *vma,
 #define flush_cache_vunmap(start,end)			do { } while (0)
 
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
 #define flush_dcache_page(page)				do { } while (0)
-static inline void flush_dcache_folio(struct folio *folio) { }
 
 #define flush_icache_range local_flush_icache_range
 #define flush_cache_page(vma, addr, pfn)		do { } while (0)
diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h
index fedc0dfa4877..4f07afacbc23 100644
--- a/include/asm-generic/cacheflush.h
+++ b/include/asm-generic/cacheflush.h
@@ -50,13 +50,7 @@ static inline void flush_dcache_page(struct page *page)
 {
 }
 
-static inline void flush_dcache_folio(struct folio *folio) { }
 #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
-#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-#endif
-
-#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
-void flush_dcache_folio(struct folio *folio);
 #endif
 
 #ifndef flush_dcache_mmap_lock
diff --git a/include/linux/cacheflush.h b/include/linux/cacheflush.h
new file mode 100644
index 000000000000..fef8b607f97e
--- /dev/null
+++ b/include/linux/cacheflush.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CACHEFLUSH_H
+#define _LINUX_CACHEFLUSH_H
+
+#include <asm/cacheflush.h>
+
+#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
+#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO
+void flush_dcache_folio(struct folio *folio);
+#endif
+#else
+static inline void flush_dcache_folio(struct folio *folio)
+{
+}
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_FOLIO 0
+#endif /* ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE */
+
+#endif /* _LINUX_CACHEFLUSH_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 25aff0f2ed0b..c944b3b70ee7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -5,12 +5,11 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/cacheflush.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 #include <linux/hardirq.h>
 
-#include <asm/cacheflush.h>
-
 #include "highmem-internal.h"
 
 /**
-- 
cgit v1.2.3


From 9c3252152e8a6401c2b9e32490a5a16ec4472778 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 16 Nov 2021 21:17:14 -0500
Subject: mm: Rename folio_test_multi to folio_test_large

This is a better name.  Also add kernel-doc.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h | 8 +++++++-
 mm/memcontrol.c            | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 52ec4b5e5615..05510118fbb8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -692,7 +692,13 @@ static inline bool folio_test_single(struct folio *folio)
 	return !folio_test_head(folio);
 }
 
-static inline bool folio_test_multi(struct folio *folio)
+/**
+ * folio_test_large() - Does this folio contain more than one page?
+ * @folio: The folio to test.
+ *
+ * Return: True if the folio is larger than one page.
+ */
+static inline bool folio_test_large(struct folio *folio)
 {
 	return folio_test_head(folio);
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 781605e92015..6863a834ed42 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5558,7 +5558,7 @@ static int mem_cgroup_move_account(struct page *page,
 
 	VM_BUG_ON(from == to);
 	VM_BUG_ON_FOLIO(folio_test_lru(folio), folio);
-	VM_BUG_ON(compound && !folio_test_multi(folio));
+	VM_BUG_ON(compound && !folio_test_large(folio));
 
 	/*
 	 * Prevent mem_cgroup_migrate() from looking at
-- 
cgit v1.2.3


From a1efe484dd8c04c4c2d4eb1ee6b04d01cfc07ccc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 16 Nov 2021 21:18:52 -0500
Subject: mm: Remove folio_test_single

There's no need for this predicate; callers can just use
!folio_test_large().

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/page-flags.h | 6 ------
 mm/util.c                  | 2 +-
 2 files changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 05510118fbb8..b5f14d581113 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -686,12 +686,6 @@ static inline bool test_set_page_writeback(struct page *page)
 
 __PAGEFLAG(Head, head, PF_ANY) CLEARPAGEFLAG(Head, head, PF_ANY)
 
-/* Whether there are one or multiple pages in a folio */
-static inline bool folio_test_single(struct folio *folio)
-{
-	return !folio_test_head(folio);
-}
-
 /**
  * folio_test_large() - Does this folio contain more than one page?
  * @folio: The folio to test.
diff --git a/mm/util.c b/mm/util.c
index e58151a61255..741ba32a43ac 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -670,7 +670,7 @@ bool folio_mapped(struct folio *folio)
 {
 	long i, nr;
 
-	if (folio_test_single(folio))
+	if (!folio_test_large(folio))
 		return atomic_read(&folio->_mapcount) >= 0;
 	if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
 		return true;
-- 
cgit v1.2.3


From ff36da69bc90d80b0c73f47f4b2e270b3ff6da99 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 29 Aug 2021 06:07:03 -0400
Subject: fs: Remove FS_THP_SUPPORT

Instead of setting a bit in the fs_flags to set a bit in the
address_space, set the bit in the address_space directly.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/inode.c              |  2 --
 include/linux/fs.h      |  1 -
 include/linux/pagemap.h | 16 ++++++++++++++++
 mm/shmem.c              |  3 ++-
 4 files changed, 18 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 3eba0940ffcf..6b80a51129d5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -180,8 +180,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
-	if (sb->s_type->fs_flags & FS_THP_SUPPORT)
-		__set_bit(AS_THP_SUPPORT, &mapping->flags);
 	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1cb616fc1105..bbf812ce89a8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2518,7 +2518,6 @@ struct file_system_type {
 #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
-#define FS_THP_SUPPORT		8192	/* Remove once all fs converted */
 #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
 	int (*init_fs_context)(struct fs_context *);
 	const struct fs_parameter_spec *parameters;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1a0c646eb6ff..9e33878bf23b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -176,6 +176,22 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 	m->gfp_mask = mask;
 }
 
+/**
+ * mapping_set_large_folios() - Indicate the file supports large folios.
+ * @mapping: The file.
+ *
+ * The filesystem should call this function in its inode constructor to
+ * indicate that the VFS can use large folios to cache the contents of
+ * the file.
+ *
+ * Context: This should not be called while the inode is active as it
+ * is non-atomic.
+ */
+static inline void mapping_set_large_folios(struct address_space *mapping)
+{
+	__set_bit(AS_THP_SUPPORT, &mapping->flags);
+}
+
 static inline bool mapping_thp_support(struct address_space *mapping)
 {
 	return test_bit(AS_THP_SUPPORT, &mapping->flags);
diff --git a/mm/shmem.c b/mm/shmem.c
index dc038ce78700..18f93c2d68f1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2303,6 +2303,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		INIT_LIST_HEAD(&info->swaplist);
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
+		mapping_set_large_folios(inode->i_mapping);
 
 		switch (mode & S_IFMT) {
 		default:
@@ -3870,7 +3871,7 @@ static struct file_system_type shmem_fs_type = {
 	.parameters	= shmem_fs_parameters,
 #endif
 	.kill_sb	= kill_litter_super,
-	.fs_flags	= FS_USERNS_MOUNT | FS_THP_SUPPORT,
+	.fs_flags	= FS_USERNS_MOUNT,
 };
 
 int __init shmem_init(void)
-- 
cgit v1.2.3


From ed2145c474c9015bc634e35f6d1a9b7767f3fbfc Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 29 Aug 2021 06:28:19 -0400
Subject: fs: Rename AS_THP_SUPPORT and mapping_thp_support

These are now indicators of large folio support, not THP support.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/pagemap.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 9e33878bf23b..605246452305 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -84,7 +84,7 @@ enum mapping_flags {
 	AS_EXITING	= 4, 	/* final truncate in progress */
 	/* writeback related tags are not used */
 	AS_NO_WRITEBACK_TAGS = 5,
-	AS_THP_SUPPORT = 6,	/* THPs supported */
+	AS_LARGE_FOLIO_SUPPORT = 6,
 };
 
 /**
@@ -189,12 +189,12 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
  */
 static inline void mapping_set_large_folios(struct address_space *mapping)
 {
-	__set_bit(AS_THP_SUPPORT, &mapping->flags);
+	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
-static inline bool mapping_thp_support(struct address_space *mapping)
+static inline bool mapping_large_folio_support(struct address_space *mapping)
 {
-	return test_bit(AS_THP_SUPPORT, &mapping->flags);
+	return test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 }
 
 static inline int filemap_nr_thps(struct address_space *mapping)
@@ -209,7 +209,7 @@ static inline int filemap_nr_thps(struct address_space *mapping)
 static inline void filemap_nr_thps_inc(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_inc(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
@@ -219,7 +219,7 @@ static inline void filemap_nr_thps_inc(struct address_space *mapping)
 static inline void filemap_nr_thps_dec(struct address_space *mapping)
 {
 #ifdef CONFIG_READ_ONLY_THP_FOR_FS
-	if (!mapping_thp_support(mapping))
+	if (!mapping_large_folio_support(mapping))
 		atomic_dec(&mapping->nr_thps);
 #else
 	WARN_ON_ONCE(1);
-- 
cgit v1.2.3


From 357a18ad230f0867791b788d2b1d6f280f6f6e61 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 15 Nov 2021 16:50:27 +0000
Subject: KVM: Kill kvm_map_gfn() / kvm_unmap_gfn() and gfn_to_pfn_cache

In commit 7e2175ebd695 ("KVM: x86: Fix recording of guest steal time /
preempted status") I removed the only user of these functions because
it was basically impossible to use them safely.

There are two stages to the GFN->PFN mapping; first through the KVM
memslots to a userspace HVA and then through the page tables to
translate that HVA to an underlying PFN. Invalidations of the former
were being handled correctly, but no attempt was made to use the MMU
notifiers to invalidate the cache when the HVA->GFN mapping changed.

As a prelude to reinventing the gfn_to_pfn_cache with more usable
semantics, rip it out entirely and untangle the implementation of
the unsafe kvm_vcpu_map()/kvm_vcpu_unmap() functions from it.

All current users of kvm_vcpu_map() also look broken right now, and
will be dealt with separately. They broadly fall into two classes:

* Those which map, access the data and immediately unmap. This is
  mostly gratuitous and could just as well use the existing user
  HVA, and could probably benefit from a gfn_to_hva_cache as they
  do so.

* Those which keep the mapping around for a longer time, perhaps
  even using the PFN directly from the guest. These will need to
  be converted to the new gfn_to_pfn_cache and then kvm_vcpu_map()
  can be removed too.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20211115165030.7422-8-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h  |   6 +--
 include/linux/kvm_types.h |   7 ----
 virt/kvm/kvm_main.c       | 100 +++++-----------------------------------------
 3 files changed, 12 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 60a35d9fe259..eb625af4fc5e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -866,7 +866,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -942,12 +942,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
 kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-		struct gfn_to_pfn_cache *cache, bool atomic);
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
-		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 2237abb93ccd..234eab059839 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -53,13 +53,6 @@ struct gfn_to_hva_cache {
 	struct kvm_memory_slot *memslot;
 };
 
-struct gfn_to_pfn_cache {
-	u64 generation;
-	gfn_t gfn;
-	kvm_pfn_t pfn;
-	bool dirty;
-};
-
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3f6d450355f0..7a28c29dca8a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2548,72 +2548,36 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty)
 {
 	if (pfn == 0)
 		return;
 
-	if (cache)
-		cache->pfn = cache->gfn = 0;
-
 	if (dirty)
 		kvm_release_pfn_dirty(pfn);
 	else
 		kvm_release_pfn_clean(pfn);
 }
 
-static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
-				 struct gfn_to_pfn_cache *cache, u64 gen)
-{
-	kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
-	cache->pfn = gfn_to_pfn_memslot(slot, gfn);
-	cache->gfn = gfn;
-	cache->dirty = false;
-	cache->generation = gen;
-}
-
-static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
-			 struct kvm_host_map *map,
-			 struct gfn_to_pfn_cache *cache,
-			 bool atomic)
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 {
 	kvm_pfn_t pfn;
 	void *hva = NULL;
 	struct page *page = KVM_UNMAPPED_PAGE;
-	struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
-	u64 gen = slots->generation;
 
 	if (!map)
 		return -EINVAL;
 
-	if (cache) {
-		if (!cache->pfn || cache->gfn != gfn ||
-			cache->generation != gen) {
-			if (atomic)
-				return -EAGAIN;
-			kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
-		}
-		pfn = cache->pfn;
-	} else {
-		if (atomic)
-			return -EAGAIN;
-		pfn = gfn_to_pfn_memslot(slot, gfn);
-	}
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	if (is_error_noslot_pfn(pfn))
 		return -EINVAL;
 
 	if (pfn_valid(pfn)) {
 		page = pfn_to_page(pfn);
-		if (atomic)
-			hva = kmap_atomic(page);
-		else
-			hva = kmap(page);
+		hva = kmap(page);
 #ifdef CONFIG_HAS_IOMEM
-	} else if (!atomic) {
-		hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
 	} else {
-		return -EINVAL;
+		hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
 #endif
 	}
 
@@ -2627,27 +2591,9 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
 
 	return 0;
 }
-
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-		struct gfn_to_pfn_cache *cache, bool atomic)
-{
-	return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
-			cache, atomic);
-}
-EXPORT_SYMBOL_GPL(kvm_map_gfn);
-
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
-{
-	return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
-		NULL, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
 
-static void __kvm_unmap_gfn(struct kvm *kvm,
-			struct kvm_memory_slot *memslot,
-			struct kvm_host_map *map,
-			struct gfn_to_pfn_cache *cache,
-			bool dirty, bool atomic)
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 {
 	if (!map)
 		return;
@@ -2655,45 +2601,21 @@ static void __kvm_unmap_gfn(struct kvm *kvm,
 	if (!map->hva)
 		return;
 
-	if (map->page != KVM_UNMAPPED_PAGE) {
-		if (atomic)
-			kunmap_atomic(map->hva);
-		else
-			kunmap(map->page);
-	}
+	if (map->page != KVM_UNMAPPED_PAGE)
+		kunmap(map->page);
 #ifdef CONFIG_HAS_IOMEM
-	else if (!atomic)
-		memunmap(map->hva);
 	else
-		WARN_ONCE(1, "Unexpected unmapping in atomic context");
+		memunmap(map->hva);
 #endif
 
 	if (dirty)
-		mark_page_dirty_in_slot(kvm, memslot, map->gfn);
+		kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
 
-	if (cache)
-		cache->dirty |= dirty;
-	else
-		kvm_release_pfn(map->pfn, dirty, NULL);
+	kvm_release_pfn(map->pfn, dirty);
 
 	map->hva = NULL;
 	map->page = NULL;
 }
-
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
-		  struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
-{
-	__kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
-			cache, dirty, atomic);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
-
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
-{
-	__kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
-			map, NULL, dirty, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
-- 
cgit v1.2.3


From f915b75bffb7257bd8d26376b8e1cc67771927f8 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Wed, 17 Nov 2021 15:56:52 +0800
Subject: page_pool: Revert "page_pool: disable dma mapping support..."

This reverts commit d00e60ee54b12de945b8493cf18c1ada9e422514.

As reported by Guillaume in [1]:
Enabling LPAE always enables CONFIG_ARCH_DMA_ADDR_T_64BIT
in 32-bit systems, which breaks the bootup proceess when a
ethernet driver is using page pool with PP_FLAG_DMA_MAP flag.
As we were hoping we had no active consumers for such system
when we removed the dma mapping support, and LPAE seems like
a common feature for 32 bits system, so revert it.

1. https://www.spinics.net/lists/netdev/msg779890.html

Fixes: d00e60ee54b1 ("page_pool: disable dma mapping support for 32-bit arch with 64-bit DMA")
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Tested-by: "kernelci.org bot" <bot@kernelci.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mm_types.h | 13 ++++++++++++-
 include/net/page_pool.h  | 12 +++++++++++-
 net/core/page_pool.c     | 10 ++++------
 3 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bb8c6f5f19bc..c3a6e6209600 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -105,7 +105,18 @@ struct page {
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			atomic_long_t pp_frag_count;
+			union {
+				/**
+				 * dma_addr_upper: might require a 64-bit
+				 * value on 32-bit architectures.
+				 */
+				unsigned long dma_addr_upper;
+				/**
+				 * For frag page support, not supported in
+				 * 32-bit architectures with 64-bit DMA.
+				 */
+				atomic_long_t pp_frag_count;
+			};
 		};
 		struct {	/* slab, slob and slub */
 			union {
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index 3855f069627f..a4082406a003 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -216,14 +216,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 	page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
+		(sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-	return page->dma_addr;
+	dma_addr_t ret = page->dma_addr;
+
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+	return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
 	page->dma_addr = addr;
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+		page->dma_addr_upper = upper_32_bits(addr);
 }
 
 static inline void page_pool_set_frag_count(struct page *page, long nr)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b60e4301a44..1a6978427d6c 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -49,12 +49,6 @@ static int page_pool_init(struct page_pool *pool,
 	 * which is the XDP_TX use-case.
 	 */
 	if (pool->p.flags & PP_FLAG_DMA_MAP) {
-		/* DMA-mapping is not supported on 32-bit systems with
-		 * 64-bit DMA mapping.
-		 */
-		if (sizeof(dma_addr_t) > sizeof(unsigned long))
-			return -EOPNOTSUPP;
-
 		if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
 		    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
 			return -EINVAL;
@@ -75,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
 		 */
 	}
 
+	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+	    pool->p.flags & PP_FLAG_PAGE_FRAG)
+		return -EINVAL;
+
 	if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 6966df483d7b5b218aeb0e13e7e334a8fc3c1744 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Thu, 18 Nov 2021 13:49:51 +0200
Subject: regulator: Update protection IRQ helper docs

The documentation of IRQ notification helper had still references to
first RFC implementation which called BUG() while trying to protect the
hardware. Behaviour was improved as calling the BUG() was not a proper
solution. Current implementation attempts to call poweroff if handling
of potentially damaging error notification fails. Update the
documentation to reflect the actual behaviour.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Link: https://lore.kernel.org/r/0c9cc4bcf20c3da66fd5a85c97ee4288e5727538.1637233864.git.matti.vaittinen@fi.rohmeurope.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bd7a73db2e66..54cf566616ae 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -499,7 +499,8 @@ struct regulator_irq_data {
  *		best to shut-down regulator(s) or reboot the SOC if error
  *		handling is repeatedly failing. If fatal_cnt is given the IRQ
  *		handling is aborted if it fails for fatal_cnt times and die()
- *		callback (if populated) or BUG() is called to try to prevent
+ *		callback (if populated) is called. If die() is not populated
+ *		poweroff for the system is attempted in order to prevent any
  *		further damage.
  * @reread_ms:	The time which is waited before attempting to re-read status
  *		at the worker if IC reading fails. Immediate re-read is done
@@ -516,11 +517,12 @@ struct regulator_irq_data {
  * @data:	Driver private data pointer which will be passed as such to
  *		the renable, map_event and die callbacks in regulator_irq_data.
  * @die:	Protection callback. If IC status reading or recovery actions
- *		fail fatal_cnt times this callback or BUG() is called. This
- *		callback should implement a final protection attempt like
- *		disabling the regulator. If protection succeeded this may
- *		return 0. If anything else is returned the core assumes final
- *		protection failed and calls BUG() as a last resort.
+ *		fail fatal_cnt times this callback is called or system is
+ *		powered off. This callback should implement a final protection
+ *		attempt like disabling the regulator. If protection succeeded
+ *		die() may return 0. If anything else is returned the core
+ *		assumes final protection failed and attempts to perform a
+ *		poweroff as a last resort.
  * @map_event:	Driver callback to map IRQ status into regulator devices with
  *		events / errors. NOTE: callback MUST initialize both the
  *		errors and notifs for all rdevs which it signals having
-- 
cgit v1.2.3


From c035713998700e8843c7d087f55bce3c54c0e3ec Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 5 Nov 2021 10:19:05 -0400
Subject: mm: Add functions to zero portions of a folio

These functions are wrappers around zero_user_segments(), which means
that zero_user_segments() can now be called for compound pages even when
CONFIG_TRANSPARENT_HUGEPAGE is disabled.

Use 'xend' as the name of the parameter to indicate that this is an
excluded end, not the more usual included end.  Excluding the end makes
more sense to the callers, but can cause confusion to readers who are
more used to seeing included ends.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 include/linux/highmem.h | 44 +++++++++++++++++++++++++++++++++++++++++---
 mm/highmem.c            |  2 --
 2 files changed, 41 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index c944b3b70ee7..39bb9b47fa9c 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -230,10 +230,10 @@ static inline void tag_clear_highpage(struct page *page)
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
  */
-#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+#ifdef CONFIG_HIGHMEM
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2);
-#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#else
 static inline void zero_user_segments(struct page *page,
 		unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
@@ -253,7 +253,7 @@ static inline void zero_user_segments(struct page *page,
 	for (i = 0; i < compound_nr(page); i++)
 		flush_dcache_page(page + i);
 }
-#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
+#endif
 
 static inline void zero_user_segment(struct page *page,
 	unsigned start, unsigned end)
@@ -363,4 +363,42 @@ static inline void memzero_page(struct page *page, size_t offset, size_t len)
 	kunmap_local(addr);
 }
 
+/**
+ * folio_zero_segments() - Zero two byte ranges in a folio.
+ * @folio: The folio to write to.
+ * @start1: The first byte to zero.
+ * @xend1: One more than the last byte in the first range.
+ * @start2: The first byte to zero in the second range.
+ * @xend2: One more than the last byte in the second range.
+ */
+static inline void folio_zero_segments(struct folio *folio,
+		size_t start1, size_t xend1, size_t start2, size_t xend2)
+{
+	zero_user_segments(&folio->page, start1, xend1, start2, xend2);
+}
+
+/**
+ * folio_zero_segment() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @xend: One more than the last byte to zero.
+ */
+static inline void folio_zero_segment(struct folio *folio,
+		size_t start, size_t xend)
+{
+	zero_user_segments(&folio->page, start, xend, 0, 0);
+}
+
+/**
+ * folio_zero_range() - Zero a byte range in a folio.
+ * @folio: The folio to write to.
+ * @start: The first byte to zero.
+ * @length: The number of bytes to zero.
+ */
+static inline void folio_zero_range(struct folio *folio,
+		size_t start, size_t length)
+{
+	zero_user_segments(&folio->page, start, start + length, 0, 0);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
diff --git a/mm/highmem.c b/mm/highmem.c
index 88f65f155845..819d41140e5b 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -359,7 +359,6 @@ void kunmap_high(struct page *page)
 }
 EXPORT_SYMBOL(kunmap_high);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 		unsigned start2, unsigned end2)
 {
@@ -416,7 +415,6 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 	BUG_ON((start1 | start2 | end1 | end2) != 0);
 }
 EXPORT_SYMBOL(zero_user_segments);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 #endif /* CONFIG_HIGHMEM */
 
 #ifdef CONFIG_KMAP_LOCAL
-- 
cgit v1.2.3


From fcb116bc43c8c37c052530ead79872f8b2615711 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 18 Nov 2021 14:23:21 -0600
Subject: signal: Replace force_fatal_sig with force_exit_sig when in doubt

Recently to prevent issues with SECCOMP_RET_KILL and similar signals
being changed before they are delivered SA_IMMUTABLE was added.

Unfortunately this broke debuggers[1][2] which reasonably expect
to be able to trap synchronous SIGTRAP and SIGSEGV even when
the target process is not configured to handle those signals.

Add force_exit_sig and use it instead of force_fatal_sig where
historically the code has directly called do_exit.  This has the
implementation benefits of going through the signal exit path
(including generating core dumps) without the danger of allowing
userspace to ignore or change these signals.

This avoids userspace regressions as older kernels exited with do_exit
which debuggers also can not intercept.

In the future is should be possible to improve the quality of
implementation of the kernel by changing some of these force_exit_sig
calls to force_fatal_sig.  That can be done where it matters on
a case-by-case basis with careful analysis.

Reported-by: Kyle Huey <me@kylehuey.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
[1] https://lkml.kernel.org/r/CAP045AoMY4xf8aC_4QU_-j7obuEPYgTcnQQP3Yxk=2X90jtpjw@mail.gmail.com
[2] https://lkml.kernel.org/r/20211117150258.GB5403@xsang-OptiPlex-9020
Fixes: 00b06da29cf9 ("signal: Add SA_IMMUTABLE to ensure forced siganls do not get changed")
Fixes: a3616a3c0272 ("signal/m68k: Use force_sigsegv(SIGSEGV) in fpsp040_die")
Fixes: 83a1f27ad773 ("signal/powerpc: On swapcontext failure force SIGSEGV")
Fixes: 9bc508cf0791 ("signal/s390: Use force_sigsegv in default_trap_handler")
Fixes: 086ec444f866 ("signal/sparc32: In setup_rt_frame and setup_fram use force_fatal_sig")
Fixes: c317d306d550 ("signal/sparc32: Exit with a fatal signal when try_to_clear_window_buffer fails")
Fixes: 695dd0d634df ("signal/x86: In emulate_vsyscall force a signal instead of calling do_exit")
Fixes: 1fbd60df8a85 ("signal/vm86_32: Properly send SIGSEGV when the vm86 state cannot be saved.")
Fixes: 941edc5bf174 ("exit/syscall_user_dispatch: Send ordinary signals on failure")
Link: https://lkml.kernel.org/r/871r3dqfv8.fsf_-_@email.froward.int.ebiederm.org
Reviewed-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Tested-by: Kyle Huey <khuey@kylehuey.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/m68k/kernel/traps.c              |  2 +-
 arch/powerpc/kernel/signal_32.c       |  2 +-
 arch/powerpc/kernel/signal_64.c       |  4 ++--
 arch/s390/kernel/traps.c              |  2 +-
 arch/sparc/kernel/signal_32.c         |  4 ++--
 arch/sparc/kernel/windows.c           |  2 +-
 arch/x86/entry/vsyscall/vsyscall_64.c |  2 +-
 arch/x86/kernel/vm86_32.c             |  2 +-
 include/linux/sched/signal.h          |  1 +
 kernel/entry/syscall_user_dispatch.c  |  4 ++--
 kernel/signal.c                       | 13 +++++++++++++
 11 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
index 99058a6da956..34d6458340b0 100644
--- a/arch/m68k/kernel/traps.c
+++ b/arch/m68k/kernel/traps.c
@@ -1145,7 +1145,7 @@ asmlinkage void set_esp0(unsigned long ssp)
  */
 asmlinkage void fpsp040_die(void)
 {
-	force_fatal_sig(SIGSEGV);
+	force_exit_sig(SIGSEGV);
 }
 
 #ifdef CONFIG_M68KFPU_EMU
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 00a9c9cd6d42..3e053e2fd6b6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1063,7 +1063,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 * We kill the task with a SIGSEGV in this situation.
 	 */
 	if (do_setcontext(new_ctx, regs, 0)) {
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index ef518535d436..d1e1fc0acbea 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -704,7 +704,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 	 */
 
 	if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	set_current_blocked(&set);
@@ -713,7 +713,7 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		return -EFAULT;
 	if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
 		user_read_access_end();
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 		return -EFAULT;
 	}
 	user_read_access_end();
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 035705c9f23e..2b780786fc68 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -84,7 +84,7 @@ static void default_trap_handler(struct pt_regs *regs)
 {
 	if (user_mode(regs)) {
 		report_user_fault(regs, SIGSEGV, 0);
-		force_fatal_sig(SIGSEGV);
+		force_exit_sig(SIGSEGV);
 	} else
 		die(regs, "Unknown program exception");
 }
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index cd677bc564a7..ffab16369bea 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -244,7 +244,7 @@ static int setup_frame(struct ksignal *ksig, struct pt_regs *regs,
 		get_sigframe(ksig, regs, sigframe_size);
 
 	if (invalid_frame_pointer(sf, sigframe_size)) {
-		force_fatal_sig(SIGILL);
+		force_exit_sig(SIGILL);
 		return -EINVAL;
 	}
 
@@ -336,7 +336,7 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
 	sf = (struct rt_signal_frame __user *)
 		get_sigframe(ksig, regs, sigframe_size);
 	if (invalid_frame_pointer(sf, sigframe_size)) {
-		force_fatal_sig(SIGILL);
+		force_exit_sig(SIGILL);
 		return -EINVAL;
 	}
 
diff --git a/arch/sparc/kernel/windows.c b/arch/sparc/kernel/windows.c
index bbbd40cc6b28..8f20862ccc83 100644
--- a/arch/sparc/kernel/windows.c
+++ b/arch/sparc/kernel/windows.c
@@ -122,7 +122,7 @@ void try_to_clear_window_buffer(struct pt_regs *regs, int who)
 		if ((sp & 7) ||
 		    copy_to_user((char __user *) sp, &tp->reg_window[window],
 				 sizeof(struct reg_window32))) {
-			force_fatal_sig(SIGILL);
+			force_exit_sig(SIGILL);
 			return;
 		}
 	}
diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
index 0b6b277ee050..fd2ee9408e91 100644
--- a/arch/x86/entry/vsyscall/vsyscall_64.c
+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
@@ -226,7 +226,7 @@ bool emulate_vsyscall(unsigned long error_code,
 	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
 		warn_bad_vsyscall(KERN_DEBUG, regs,
 				  "seccomp tried to change syscall nr or ip");
-		force_fatal_sig(SIGSYS);
+		force_exit_sig(SIGSYS);
 		return true;
 	}
 	regs->orig_ax = -1;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index cce1c89cb7df..c21bcd668284 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -160,7 +160,7 @@ Efault_end:
 	user_access_end();
 Efault:
 	pr_alert("could not access userspace vm86 info\n");
-	force_fatal_sig(SIGSEGV);
+	force_exit_sig(SIGSEGV);
 	goto exit_vm86;
 }
 
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 23505394ef70..33a50642cf41 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -352,6 +352,7 @@ extern __must_check bool do_notify_parent(struct task_struct *, int);
 extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int);
 extern void force_fatal_sig(int);
+extern void force_exit_sig(int);
 extern int send_sig(int, struct task_struct *, int);
 extern int zap_other_threads(struct task_struct *p);
 extern struct sigqueue *sigqueue_alloc(void);
diff --git a/kernel/entry/syscall_user_dispatch.c b/kernel/entry/syscall_user_dispatch.c
index 4508201847d2..0b6379adff6b 100644
--- a/kernel/entry/syscall_user_dispatch.c
+++ b/kernel/entry/syscall_user_dispatch.c
@@ -48,7 +48,7 @@ bool syscall_user_dispatch(struct pt_regs *regs)
 		 * the selector is loaded by userspace.
 		 */
 		if (unlikely(__get_user(state, sd->selector))) {
-			force_fatal_sig(SIGSEGV);
+			force_exit_sig(SIGSEGV);
 			return true;
 		}
 
@@ -56,7 +56,7 @@ bool syscall_user_dispatch(struct pt_regs *regs)
 			return false;
 
 		if (state != SYSCALL_DISPATCH_FILTER_BLOCK) {
-			force_fatal_sig(SIGSYS);
+			force_exit_sig(SIGSYS);
 			return true;
 		}
 	}
diff --git a/kernel/signal.c b/kernel/signal.c
index 7815e1bbeddc..a629b11bf3e0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1671,6 +1671,19 @@ void force_fatal_sig(int sig)
 	force_sig_info_to_task(&info, current, HANDLER_SIG_DFL);
 }
 
+void force_exit_sig(int sig)
+{
+	struct kernel_siginfo info;
+
+	clear_siginfo(&info);
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_KERNEL;
+	info.si_pid = 0;
+	info.si_uid = 0;
+	force_sig_info_to_task(&info, current, HANDLER_EXIT);
+}
+
 /*
  * When things go south during signal handling, we
  * will force a SIGSEGV. And if the signal that caused
-- 
cgit v1.2.3


From 85b6d24646e4125c591639841169baa98a2da503 Mon Sep 17 00:00:00 2001
From: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Date: Fri, 19 Nov 2021 16:43:21 -0800
Subject: shm: extend forced shm destroy to support objects from several IPC
 nses

Currently, the exit_shm() function not designed to work properly when
task->sysvshm.shm_clist holds shm objects from different IPC namespaces.

This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
leads to use-after-free (reproducer exists).

This is an attempt to fix the problem by extending exit_shm mechanism to
handle shm's destroy from several IPC ns'es.

To achieve that we do several things:

1. add a namespace (non-refcounted) pointer to the struct shmid_kernel

2. during new shm object creation (newseg()/shmget syscall) we
   initialize this pointer by current task IPC ns

3. exit_shm() fully reworked such that it traverses over all shp's in
   task->sysvshm.shm_clist and gets IPC namespace not from current task
   as it was before but from shp's object itself, then call
   shm_destroy(shp, ns).

Note: We need to be really careful here, because as it was said before
(1), our pointer to IPC ns non-refcnt'ed.  To be on the safe side we
using special helper get_ipc_ns_not_zero() which allows to get IPC ns
refcounter only if IPC ns not in the "state of destruction".

Q/A

Q: Why can we access shp->ns memory using non-refcounted pointer?
A: Because shp object lifetime is always shorther than IPC namespace
   lifetime, so, if we get shp object from the task->sysvshm.shm_clist
   while holding task_lock(task) nobody can steal our namespace.

Q: Does this patch change semantics of unshare/setns/clone syscalls?
A: No. It's just fixes non-covered case when process may leave IPC
   namespace without getting task->sysvshm.shm_clist list cleaned up.

Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com
Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com
Fixes: ab602f79915 ("shm: make exit_shm work proportional to task activity")
Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  15 ++++
 include/linux/sched/task.h    |   2 +-
 ipc/shm.c                     | 189 ++++++++++++++++++++++++++++++++----------
 3 files changed, 159 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 05e22770af51..b75395ec8d52 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -131,6 +131,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	if (ns) {
+		if (refcount_inc_not_zero(&ns->ns.count))
+			return ns;
+	}
+
+	return NULL;
+}
+
 extern void put_ipc_ns(struct ipc_namespace *ns);
 #else
 static inline struct ipc_namespace *copy_ipcs(unsigned long flags,
@@ -147,6 +157,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
 	return ns;
 }
 
+static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns)
+{
+	return ns;
+}
+
 static inline void put_ipc_ns(struct ipc_namespace *ns)
 {
 }
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index ba88a6987400..058d7f371e25 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -158,7 +158,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
  * pins the final release of task.io_context.  Also protects ->cpuset and
- * ->cgroup.subsys[]. And ->vfork_done.
+ * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist.
  *
  * Nests both inside and outside of read_lock(&tasklist_lock).
  * It must not be nested with write_lock_irq(&tasklist_lock),
diff --git a/ipc/shm.c b/ipc/shm.c
index 4942bdd65748..b3048ebd5c31 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */
 	struct pid		*shm_lprid;
 	struct ucounts		*mlock_ucounts;
 
-	/* The task created the shm object.  NULL if the task is dead. */
+	/*
+	 * The task created the shm object, for
+	 * task_lock(shp->shm_creator)
+	 */
 	struct task_struct	*shm_creator;
-	struct list_head	shm_clist;	/* list by creator */
+
+	/*
+	 * List by creator. task_lock(->shm_creator) required for read/write.
+	 * If list_empty(), then the creator is dead already.
+	 */
+	struct list_head	shm_clist;
+	struct ipc_namespace	*ns;
 } __randomize_layout;
 
 /* shm_mode upper byte flags */
@@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 	struct shmid_kernel *shp;
 
 	shp = container_of(ipcp, struct shmid_kernel, shm_perm);
+	WARN_ON(ns != shp->ns);
 
 	if (shp->shm_nattch) {
 		shp->shm_perm.mode |= SHM_DEST;
@@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head)
 	kfree(shp);
 }
 
-static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
+/*
+ * It has to be called with shp locked.
+ * It must be called before ipc_rmid()
+ */
+static inline void shm_clist_rm(struct shmid_kernel *shp)
 {
-	list_del(&s->shm_clist);
-	ipc_rmid(&shm_ids(ns), &s->shm_perm);
+	struct task_struct *creator;
+
+	/* ensure that shm_creator does not disappear */
+	rcu_read_lock();
+
+	/*
+	 * A concurrent exit_shm may do a list_del_init() as well.
+	 * Just do nothing if exit_shm already did the work
+	 */
+	if (!list_empty(&shp->shm_clist)) {
+		/*
+		 * shp->shm_creator is guaranteed to be valid *only*
+		 * if shp->shm_clist is not empty.
+		 */
+		creator = shp->shm_creator;
+
+		task_lock(creator);
+		/*
+		 * list_del_init() is a nop if the entry was already removed
+		 * from the list.
+		 */
+		list_del_init(&shp->shm_clist);
+		task_unlock(creator);
+	}
+	rcu_read_unlock();
+}
+
+static inline void shm_rmid(struct shmid_kernel *s)
+{
+	shm_clist_rm(s);
+	ipc_rmid(&shm_ids(s->ns), &s->shm_perm);
 }
 
 
@@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_file = shp->shm_file;
 	shp->shm_file = NULL;
 	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	shm_rmid(ns, shp);
+	shm_rmid(shp);
 	shm_unlock(shp);
 	if (!is_file_hugepages(shm_file))
 		shmem_lock(shm_file, 0, shp->mlock_ucounts);
@@ -303,10 +346,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  *
  * 2) sysctl kernel.shm_rmid_forced is set to 1.
  */
-static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
+static bool shm_may_destroy(struct shmid_kernel *shp)
 {
 	return (shp->shm_nattch == 0) &&
-	       (ns->shm_rmid_forced ||
+	       (shp->ns->shm_rmid_forced ||
 		(shp->shm_perm.mode & SHM_DEST));
 }
 
@@ -337,7 +380,7 @@ static void shm_close(struct vm_area_struct *vma)
 	ipc_update_pid(&shp->shm_lprid, task_tgid(current));
 	shp->shm_dtim = ktime_get_real_seconds();
 	shp->shm_nattch--;
-	if (shm_may_destroy(ns, shp))
+	if (shm_may_destroy(shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
@@ -358,10 +401,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 	 *
 	 * As shp->* are changed under rwsem, it's safe to skip shp locking.
 	 */
-	if (shp->shm_creator != NULL)
+	if (!list_empty(&shp->shm_clist))
 		return 0;
 
-	if (shm_may_destroy(ns, shp)) {
+	if (shm_may_destroy(shp)) {
 		shm_lock_by_ptr(shp);
 		shm_destroy(ns, shp);
 	}
@@ -379,48 +422,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)
 /* Locking assumes this will only be called with task == current */
 void exit_shm(struct task_struct *task)
 {
-	struct ipc_namespace *ns = task->nsproxy->ipc_ns;
-	struct shmid_kernel *shp, *n;
+	for (;;) {
+		struct shmid_kernel *shp;
+		struct ipc_namespace *ns;
 
-	if (list_empty(&task->sysvshm.shm_clist))
-		return;
+		task_lock(task);
+
+		if (list_empty(&task->sysvshm.shm_clist)) {
+			task_unlock(task);
+			break;
+		}
+
+		shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel,
+				shm_clist);
 
-	/*
-	 * If kernel.shm_rmid_forced is not set then only keep track of
-	 * which shmids are orphaned, so that a later set of the sysctl
-	 * can clean them up.
-	 */
-	if (!ns->shm_rmid_forced) {
-		down_read(&shm_ids(ns).rwsem);
-		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
-			shp->shm_creator = NULL;
 		/*
-		 * Only under read lock but we are only called on current
-		 * so no entry on the list will be shared.
+		 * 1) Get pointer to the ipc namespace. It is worth to say
+		 * that this pointer is guaranteed to be valid because
+		 * shp lifetime is always shorter than namespace lifetime
+		 * in which shp lives.
+		 * We taken task_lock it means that shp won't be freed.
 		 */
-		list_del(&task->sysvshm.shm_clist);
-		up_read(&shm_ids(ns).rwsem);
-		return;
-	}
+		ns = shp->ns;
 
-	/*
-	 * Destroy all already created segments, that were not yet mapped,
-	 * and mark any mapped as orphan to cover the sysctl toggling.
-	 * Destroy is skipped if shm_may_destroy() returns false.
-	 */
-	down_write(&shm_ids(ns).rwsem);
-	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
-		shp->shm_creator = NULL;
+		/*
+		 * 2) If kernel.shm_rmid_forced is not set then only keep track of
+		 * which shmids are orphaned, so that a later set of the sysctl
+		 * can clean them up.
+		 */
+		if (!ns->shm_rmid_forced)
+			goto unlink_continue;
 
-		if (shm_may_destroy(ns, shp)) {
-			shm_lock_by_ptr(shp);
-			shm_destroy(ns, shp);
+		/*
+		 * 3) get a reference to the namespace.
+		 *    The refcount could be already 0. If it is 0, then
+		 *    the shm objects will be free by free_ipc_work().
+		 */
+		ns = get_ipc_ns_not_zero(ns);
+		if (!ns) {
+unlink_continue:
+			list_del_init(&shp->shm_clist);
+			task_unlock(task);
+			continue;
 		}
-	}
 
-	/* Remove the list head from any segments still attached. */
-	list_del(&task->sysvshm.shm_clist);
-	up_write(&shm_ids(ns).rwsem);
+		/*
+		 * 4) get a reference to shp.
+		 *   This cannot fail: shm_clist_rm() is called before
+		 *   ipc_rmid(), thus the refcount cannot be 0.
+		 */
+		WARN_ON(!ipc_rcu_getref(&shp->shm_perm));
+
+		/*
+		 * 5) unlink the shm segment from the list of segments
+		 *    created by current.
+		 *    This must be done last. After unlinking,
+		 *    only the refcounts obtained above prevent IPC_RMID
+		 *    from destroying the segment or the namespace.
+		 */
+		list_del_init(&shp->shm_clist);
+
+		task_unlock(task);
+
+		/*
+		 * 6) we have all references
+		 *    Thus lock & if needed destroy shp.
+		 */
+		down_write(&shm_ids(ns).rwsem);
+		shm_lock_by_ptr(shp);
+		/*
+		 * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's
+		 * safe to call ipc_rcu_putref here
+		 */
+		ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
+
+		if (ipc_valid_object(&shp->shm_perm)) {
+			if (shm_may_destroy(shp))
+				shm_destroy(ns, shp);
+			else
+				shm_unlock(shp);
+		} else {
+			/*
+			 * Someone else deleted the shp from namespace
+			 * idr/kht while we have waited.
+			 * Just unlock and continue.
+			 */
+			shm_unlock(shp);
+		}
+
+		up_write(&shm_ids(ns).rwsem);
+		put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */
+	}
 }
 
 static vm_fault_t shm_fault(struct vm_fault *vmf)
@@ -676,7 +768,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	if (error < 0)
 		goto no_id;
 
+	shp->ns = ns;
+
+	task_lock(current);
 	list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
+	task_unlock(current);
 
 	/*
 	 * shmid gets reported as "inode#" in /proc/pid/maps.
@@ -1567,7 +1663,8 @@ out_nattch:
 	down_write(&shm_ids(ns).rwsem);
 	shp = shm_lock(ns, shmid);
 	shp->shm_nattch--;
-	if (shm_may_destroy(ns, shp))
+
+	if (shm_may_destroy(shp))
 		shm_destroy(ns, shp);
 	else
 		shm_unlock(shp);
-- 
cgit v1.2.3


From afe041c2d0febd83698b8b0164e6b3b1dfae0b66 Mon Sep 17 00:00:00 2001
From: Bui Quang Minh <minhquangbui99@gmail.com>
Date: Fri, 19 Nov 2021 16:43:40 -0800
Subject: hugetlb: fix hugetlb cgroup refcounting during mremap

When hugetlb_vm_op_open() is called during copy_vma(), we may take the
reference to resv_map->css.  Later, when clearing the reservation
pointer of old_vma after transferring it to new_vma, we forget to drop
the reference to resv_map->css.  This leads to a reference leak of css.

Fixes this by adding a check to drop reservation css reference in
clear_vma_resv_huge_pages()

Link: https://lkml.kernel.org/r/20211113154412.91134-1-minhquangbui99@gmail.com
Fixes: 550a7d60bd5e35 ("mm, hugepages: add mremap() support for hugepage backed vma")
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Mina Almasry <almasrymina@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 12 ++++++++++++
 mm/hugetlb.c                   |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index c137396129db..ba025ae27882 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -128,6 +128,13 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
 		css_get(resv_map->css);
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+	if (resv_map->css)
+		css_put(resv_map->css);
+}
+
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					struct hugetlb_cgroup **ptr);
 extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -211,6 +218,11 @@ static inline void resv_map_dup_hugetlb_cgroup_uncharge_info(
 {
 }
 
+static inline void resv_map_put_hugetlb_cgroup_uncharge_info(
+						struct resv_map *resv_map)
+{
+}
+
 static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 					       struct hugetlb_cgroup **ptr)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e09159c957e3..3a2479003ddf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1037,8 +1037,10 @@ void clear_vma_resv_huge_pages(struct vm_area_struct *vma)
 	 */
 	struct resv_map *reservations = vma_resv_map(vma);
 
-	if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
+	if (reservations && is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+		resv_map_put_hugetlb_cgroup_uncharge_info(reservations);
 		kref_put(&reservations->refs, resv_map_release);
+	}
 
 	reset_vma_resv_huge_pages(vma);
 }
-- 
cgit v1.2.3


From 985e9ece1e55a94da842f6c1f9ff84d587b26267 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Wed, 17 Nov 2021 20:07:35 +0200
Subject: ACPI: Make acpi_node_get_parent() local

acpi_node_get_parent() isn't used outside drivers/acpi/property.c.

Make it local.

Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 3 ++-
 include/linux/acpi.h    | 7 -------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 781e312f4534..2366f54d8e9c 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1084,7 +1084,8 @@ struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
  * Returns parent node of an ACPI device or data firmware node or %NULL if
  * not available.
  */
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode)
+static struct fwnode_handle *
+acpi_node_get_parent(const struct fwnode_handle *fwnode)
 {
 	if (is_acpi_data_node(fwnode)) {
 		/* All data nodes have parent pointer so just return that */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 668d007f0917..b28f8790192a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1182,7 +1182,6 @@ int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
 
 struct fwnode_handle *acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 					    struct fwnode_handle *child);
-struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode);
 
 struct acpi_probe_entry;
 typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *,
@@ -1287,12 +1286,6 @@ acpi_get_next_subnode(const struct fwnode_handle *fwnode,
 	return NULL;
 }
 
-static inline struct fwnode_handle *
-acpi_node_get_parent(const struct fwnode_handle *fwnode)
-{
-	return NULL;
-}
-
 static inline struct fwnode_handle *
 acpi_graph_get_next_endpoint(const struct fwnode_handle *fwnode,
 			     struct fwnode_handle *prev)
-- 
cgit v1.2.3


From f124034faa911ed534bf8c4881ad98dbbde2a966 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 24 Nov 2021 18:44:17 -0500
Subject: Revert "virtio_ring: validate used buffer length"

This reverts commit 939779f5152d161b34f612af29e7dc1ac4472fcf.

Attempts to validate length in the core did not work out: there turn out
to exist multiple broken devices, and in particular legacy devices are
known to be broken in this respect.

We have ideas for handling this better in the next version but for now
let's revert to a known good state to make sure drivers work for people.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 60 --------------------------------------------
 include/linux/virtio.h       |  2 --
 2 files changed, 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 00f64f2f8b72..6d2614e34470 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -14,9 +14,6 @@
 #include <linux/spinlock.h>
 #include <xen/xen.h>
 
-static bool force_used_validation = false;
-module_param(force_used_validation, bool, 0444);
-
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
 #define BAD_RING(_vq, fmt, args...)				\
@@ -185,9 +182,6 @@ struct vring_virtqueue {
 		} packed;
 	};
 
-	/* Per-descriptor in buffer length */
-	u32 *buflen;
-
 	/* How to notify other side. FIXME: commonalize hcalls! */
 	bool (*notify)(struct virtqueue *vq);
 
@@ -496,7 +490,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	unsigned int i, n, avail, descs_used, prev, err_idx;
 	int head;
 	bool indirect;
-	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -578,7 +571,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 						     VRING_DESC_F_NEXT |
 						     VRING_DESC_F_WRITE,
 						     indirect);
-			buflen += sg->length;
 		}
 	}
 	/* Last one doesn't continue. */
@@ -618,10 +610,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
 	else
 		vq->split.desc_state[head].indir_desc = ctx;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[head] = buflen;
-
 	/* Put entry in available array (but don't update avail->idx until they
 	 * do sync). */
 	avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
@@ -796,11 +784,6 @@ static void *virtqueue_get_buf_ctx_split(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", i);
 		return NULL;
 	}
-	if (vq->buflen && unlikely(*len > vq->buflen[i])) {
-		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-			*len, vq->buflen[i]);
-		return NULL;
-	}
 
 	/* detach_buf_split clears data, so grab it now. */
 	ret = vq->split.desc_state[i].data;
@@ -1079,7 +1062,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	unsigned int i, n, err_idx;
 	u16 head, id;
 	dma_addr_t addr;
-	u32 buflen = 0;
 
 	head = vq->packed.next_avail_idx;
 	desc = alloc_indirect_packed(total_sg, gfp);
@@ -1109,8 +1091,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 			desc[i].addr = cpu_to_le64(addr);
 			desc[i].len = cpu_to_le32(sg->length);
 			i++;
-			if (n >= out_sgs)
-				buflen += sg->length;
 		}
 	}
 
@@ -1164,10 +1144,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
 	vq->packed.desc_state[id].indir_desc = desc;
 	vq->packed.desc_state[id].last = id;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[id] = buflen;
-
 	vq->num_added += 1;
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -1203,7 +1179,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	__le16 head_flags, flags;
 	u16 head, id, prev, curr, avail_used_flags;
 	int err;
-	u32 buflen = 0;
 
 	START_USE(vq);
 
@@ -1283,8 +1258,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 					1 << VRING_PACKED_DESC_F_AVAIL |
 					1 << VRING_PACKED_DESC_F_USED;
 			}
-			if (n >= out_sgs)
-				buflen += sg->length;
 		}
 	}
 
@@ -1304,10 +1277,6 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
 	vq->packed.desc_state[id].indir_desc = ctx;
 	vq->packed.desc_state[id].last = prev;
 
-	/* Store in buffer length if necessary */
-	if (vq->buflen)
-		vq->buflen[id] = buflen;
-
 	/*
 	 * A driver MUST NOT make the first descriptor in the list
 	 * available before all subsequent descriptors comprising
@@ -1494,11 +1463,6 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
 		BAD_RING(vq, "id %u is not a head!\n", id);
 		return NULL;
 	}
-	if (vq->buflen && unlikely(*len > vq->buflen[id])) {
-		BAD_RING(vq, "used len %d is larger than in buflen %u\n",
-			*len, vq->buflen[id]);
-		return NULL;
-	}
 
 	/* detach_buf_packed clears data, so grab it now. */
 	ret = vq->packed.desc_state[id].data;
@@ -1704,7 +1668,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	struct vring_virtqueue *vq;
 	struct vring_packed_desc *ring;
 	struct vring_packed_desc_event *driver, *device;
-	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr;
 	size_t ring_size_in_bytes, event_size_in_bytes;
 
@@ -1794,15 +1757,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	if (!vq->packed.desc_extra)
 		goto err_desc_extra;
 
-	if (!drv->suppress_used_validation || force_used_validation) {
-		vq->buflen = kmalloc_array(num, sizeof(*vq->buflen),
-					   GFP_KERNEL);
-		if (!vq->buflen)
-			goto err_buflen;
-	} else {
-		vq->buflen = NULL;
-	}
-
 	/* No callback?  Tell other side not to bother us. */
 	if (!callback) {
 		vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE;
@@ -1815,8 +1769,6 @@ static struct virtqueue *vring_create_virtqueue_packed(
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
-err_buflen:
-	kfree(vq->packed.desc_extra);
 err_desc_extra:
 	kfree(vq->packed.desc_state);
 err_desc_state:
@@ -2224,7 +2176,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 					void (*callback)(struct virtqueue *),
 					const char *name)
 {
-	struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
 	struct vring_virtqueue *vq;
 
 	if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED))
@@ -2284,15 +2235,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	if (!vq->split.desc_extra)
 		goto err_extra;
 
-	if (!drv->suppress_used_validation || force_used_validation) {
-		vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen),
-					   GFP_KERNEL);
-		if (!vq->buflen)
-			goto err_buflen;
-	} else {
-		vq->buflen = NULL;
-	}
-
 	/* Put everything in free lists. */
 	vq->free_head = 0;
 	memset(vq->split.desc_state, 0, vring.num *
@@ -2303,8 +2245,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index,
 	spin_unlock(&vdev->vqs_list_lock);
 	return &vq->vq;
 
-err_buflen:
-	kfree(vq->split.desc_extra);
 err_extra:
 	kfree(vq->split.desc_state);
 err_state:
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 44d0e09da2d9..41edbc01ffa4 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -152,7 +152,6 @@ size_t virtio_max_dma_size(struct virtio_device *vdev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
- * @suppress_used_validation: set to not have core validate used length
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @scan: optional function to call after successful probe; intended
  *    for virtio-scsi to invoke a scan.
@@ -169,7 +168,6 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
-	bool suppress_used_validation;
 	int (*validate)(struct virtio_device *dev);
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
-- 
cgit v1.2.3


From ec15baec3272bbec576f2ce7ce47765a8e9b7b1c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 26 Nov 2021 19:28:43 +0200
Subject: net: ptp: add a definition for the UDP port for IEEE 1588 general
 messages

As opposed to event messages (Sync, PdelayReq etc) which require
timestamping, general messages (Announce, FollowUp etc) do not.
In PTP they are part of different streams of data.

IEEE 1588-2008 Annex D.2 "UDP port numbers" states that the UDP
destination port assigned by IANA is 319 for event messages, and 320 for
general messages. Yet the kernel seems to be missing the definition for
general messages. This patch adds it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ptp_classify.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h
index ae04968a3a47..9afd34a2d36c 100644
--- a/include/linux/ptp_classify.h
+++ b/include/linux/ptp_classify.h
@@ -37,6 +37,7 @@
 #define PTP_MSGTYPE_PDELAY_RESP 0x3
 
 #define PTP_EV_PORT 319
+#define PTP_GEN_PORT 320
 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */
 
 #define OFF_PTP_SOURCE_UUID	22 /* PTPv1 only */
-- 
cgit v1.2.3


From 6a2d2ddf2c345e0149bfbffdddc4768a9ab0a741 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 12 Nov 2021 14:32:27 +0100
Subject: drm: Move nomodeset kernel parameter to the DRM subsystem

The "nomodeset" kernel cmdline parameter is handled by the vgacon driver
but the exported vgacon_text_force() symbol is only used by DRM drivers.

It makes much more sense for the parameter logic to be in the subsystem
of the drivers that are making use of it.

Let's move the vgacon_text_force() function and related logic to the DRM
subsystem. While doing that, rename it to drm_firmware_drivers_only() and
make it return true if "nomodeset" was used and false otherwise. This is
a better description of the condition that the drivers are testing for.

Suggested-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Pekka Paalanen <pekka.paalanen@collabora.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20211112133230.1595307-4-javierm@redhat.com
---
 drivers/gpu/drm/Makefile                |  2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c |  3 +--
 drivers/gpu/drm/ast/ast_drv.c           |  3 +--
 drivers/gpu/drm/i915/i915_module.c      |  4 ++--
 drivers/gpu/drm/mgag200/mgag200_drv.c   |  3 +--
 drivers/gpu/drm/nouveau/nouveau_drm.c   |  4 ++--
 drivers/gpu/drm/qxl/qxl_drv.c           |  3 +--
 drivers/gpu/drm/radeon/radeon_drv.c     |  3 +--
 drivers/gpu/drm/tiny/bochs.c            |  3 +--
 drivers/gpu/drm/tiny/cirrus.c           |  4 ++--
 drivers/gpu/drm/vboxvideo/vbox_drv.c    |  3 +--
 drivers/gpu/drm/virtio/virtgpu_drv.c    |  3 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c     |  3 +--
 drivers/video/console/vgacon.c          | 21 ---------------------
 gpu/drm/drm_nomodeset.c                 | 26 ++++++++++++++++++++++++++
 include/drm/drm_drv.h                   |  5 +++++
 include/linux/console.h                 |  6 ------
 17 files changed, 48 insertions(+), 51 deletions(-)
 create mode 100644 gpu/drm/drm_nomodeset.c

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 1c41156deb5f..c74810c285af 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -33,6 +33,8 @@ drm-$(CONFIG_DRM_PRIVACY_SCREEN) += drm_privacy_screen.o drm_privacy_screen_x86.
 
 obj-$(CONFIG_DRM_DP_AUX_BUS) += drm_dp_aux_bus.o
 
+obj-$(CONFIG_VGA_CONSOLE) += drm_nomodeset.o
+
 drm_cma_helper-y := drm_gem_cma_helper.o
 obj-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_cma_helper.o
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e8c23e37c83c..438468b82eb6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -31,7 +31,6 @@
 #include "amdgpu_drv.h"
 
 #include <drm/drm_pciids.h>
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
@@ -2516,7 +2515,7 @@ static int __init amdgpu_init(void)
 {
 	int r;
 
-	if (vgacon_text_force())
+	if (drm_firmware_drivers_only())
 		return -EINVAL;
 
 	r = amdgpu_sync_init();
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index 86d5cd7b6318..6d8613f6fe1c 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -26,7 +26,6 @@
  * Authors: Dave Airlie <airlied@redhat.com>
  */
 
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 
@@ -233,7 +232,7 @@ static struct pci_driver ast_pci_driver = {
 
 static int __init ast_init(void)
 {
-	if (vgacon_text_force() && ast_modeset == -1)
+	if (drm_firmware_drivers_only() && ast_modeset == -1)
 		return -EINVAL;
 
 	if (ast_modeset == 0)
diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c
index ab2295dd4500..1df66bf276ce 100644
--- a/drivers/gpu/drm/i915/i915_module.c
+++ b/drivers/gpu/drm/i915/i915_module.c
@@ -4,7 +4,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
-#include <linux/console.h>
+#include <drm/drm_drv.h>
 
 #include "gem/i915_gem_context.h"
 #include "gem/i915_gem_object.h"
@@ -31,7 +31,7 @@ static int i915_check_nomodeset(void)
 	if (i915_modparams.modeset == 0)
 		use_kms = false;
 
-	if (vgacon_text_force() && i915_modparams.modeset == -1)
+	if (drm_firmware_drivers_only() && i915_modparams.modeset == -1)
 		use_kms = false;
 
 	if (!use_kms) {
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 6b9243713b3c..740108a006ba 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -6,7 +6,6 @@
  *          Dave Airlie
  */
 
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/vmalloc.h>
@@ -378,7 +377,7 @@ static struct pci_driver mgag200_pci_driver = {
 
 static int __init mgag200_init(void)
 {
-	if (vgacon_text_force() && mgag200_modeset == -1)
+	if (drm_firmware_drivers_only() && mgag200_modeset == -1)
 		return -EINVAL;
 
 	if (mgag200_modeset == 0)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index e7efd9ede8e4..561309d447e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -22,7 +22,6 @@
  * Authors: Ben Skeggs
  */
 
-#include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -32,6 +31,7 @@
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_vblank.h>
@@ -1358,7 +1358,7 @@ nouveau_drm_init(void)
 	nouveau_display_options();
 
 	if (nouveau_modeset == -1) {
-		if (vgacon_text_force())
+		if (drm_firmware_drivers_only())
 			nouveau_modeset = 0;
 	}
 
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index fc47b0deb021..e4b16421500b 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -29,7 +29,6 @@
 
 #include "qxl_drv.h"
 
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/vgaarb.h>
@@ -295,7 +294,7 @@ static struct drm_driver qxl_driver = {
 
 static int __init qxl_init(void)
 {
-	if (vgacon_text_force() && qxl_modeset == -1)
+	if (drm_firmware_drivers_only() && qxl_modeset == -1)
 		return -EINVAL;
 
 	if (qxl_modeset == 0)
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 380adc61e71c..956c72b5aa33 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -31,7 +31,6 @@
 
 
 #include <linux/compat.h>
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
@@ -637,7 +636,7 @@ static struct pci_driver radeon_kms_pci_driver = {
 
 static int __init radeon_module_init(void)
 {
-	if (vgacon_text_force() && radeon_modeset == -1)
+	if (drm_firmware_drivers_only() && radeon_modeset == -1)
 		radeon_modeset = 0;
 
 	if (radeon_modeset == 0)
diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c
index 2ce3bd903b70..fc26a1ce11ee 100644
--- a/drivers/gpu/drm/tiny/bochs.c
+++ b/drivers/gpu/drm/tiny/bochs.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-#include <linux/console.h>
 #include <linux/pci.h>
 
 #include <drm/drm_aperture.h>
@@ -719,7 +718,7 @@ static struct pci_driver bochs_pci_driver = {
 
 static int __init bochs_init(void)
 {
-	if (vgacon_text_force() && bochs_modeset == -1)
+	if (drm_firmware_drivers_only() && bochs_modeset == -1)
 		return -EINVAL;
 
 	if (bochs_modeset == 0)
diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index 9327001d35dd..c95d9ff7d600 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -16,7 +16,6 @@
  * Copyright 1999-2001 Jeff Garzik <jgarzik@pobox.com>
  */
 
-#include <linux/console.h>
 #include <linux/dma-buf-map.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -636,8 +635,9 @@ static struct pci_driver cirrus_pci_driver = {
 
 static int __init cirrus_init(void)
 {
-	if (vgacon_text_force())
+	if (drm_firmware_drivers_only())
 		return -EINVAL;
+
 	return pci_register_driver(&cirrus_pci_driver);
 }
 
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c
index fd7abb029c65..f35d9e44c6b7 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c
@@ -7,7 +7,6 @@
  *          Michael Thayer <michael.thayer@oracle.com,
  *          Hans de Goede <hdegoede@redhat.com>
  */
-#include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/vt_kern.h>
@@ -193,7 +192,7 @@ static const struct drm_driver driver = {
 
 static int __init vbox_init(void)
 {
-	if (vgacon_text_force() && vbox_modeset == -1)
+	if (drm_firmware_drivers_only() && vbox_modeset == -1)
 		return -EINVAL;
 
 	if (vbox_modeset == 0)
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index d86e1ad4a972..c20c96a97a6c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -27,7 +27,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/console.h>
 #include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/wait.h>
@@ -104,7 +103,7 @@ static int virtio_gpu_probe(struct virtio_device *vdev)
 	struct drm_device *dev;
 	int ret;
 
-	if (vgacon_text_force() && virtio_gpu_modeset == -1)
+	if (drm_firmware_drivers_only() && virtio_gpu_modeset == -1)
 		return -EINVAL;
 
 	if (virtio_gpu_modeset == 0)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index bfd71c86faa5..75d1e032cf16 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -25,7 +25,6 @@
  *
  **************************************************************************/
 
-#include <linux/console.h>
 #include <linux/dma-mapping.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -1651,7 +1650,7 @@ static int __init vmwgfx_init(void)
 {
 	int ret;
 
-	if (vgacon_text_force())
+	if (drm_firmware_drivers_only())
 		return -EINVAL;
 
 	ret = pci_register_driver(&vmw_pci_driver);
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index ef9c57ce0906..d4320b147956 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -97,30 +97,9 @@ static int 		vga_video_font_height;
 static int 		vga_scan_lines		__read_mostly;
 static unsigned int 	vga_rolled_over; /* last vc_origin offset before wrap */
 
-static bool vgacon_text_mode_force;
 static bool vga_hardscroll_enabled;
 static bool vga_hardscroll_user_enable = true;
 
-bool vgacon_text_force(void)
-{
-	return vgacon_text_mode_force;
-}
-EXPORT_SYMBOL(vgacon_text_force);
-
-static int __init text_mode(char *str)
-{
-	vgacon_text_mode_force = true;
-
-	pr_warn("You have booted with nomodeset. This means your GPU drivers are DISABLED\n");
-	pr_warn("Any video related functionality will be severely degraded, and you may not even be able to suspend the system properly\n");
-	pr_warn("Unless you actually understand what nomodeset does, you should reboot without enabling it\n");
-
-	return 1;
-}
-
-/* force text mode - used by kernel modesetting */
-__setup("nomodeset", text_mode);
-
 static int __init no_scroll(char *str)
 {
 	/*
diff --git a/gpu/drm/drm_nomodeset.c b/gpu/drm/drm_nomodeset.c
new file mode 100644
index 000000000000..287edfb18b5d
--- /dev/null
+++ b/gpu/drm/drm_nomodeset.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/types.h>
+
+static bool drm_nomodeset;
+
+bool drm_firmware_drivers_only(void)
+{
+	return drm_nomodeset;
+}
+EXPORT_SYMBOL(drm_firmware_drivers_only);
+
+static int __init disable_modeset(char *str)
+{
+	drm_nomodeset = true;
+
+	pr_warn("You have booted with nomodeset. This means your GPU drivers are DISABLED\n");
+	pr_warn("Any video related functionality will be severely degraded, and you may not even be able to suspend the system properly\n");
+	pr_warn("Unless you actually understand what nomodeset does, you should reboot without enabling it\n");
+
+	return 1;
+}
+
+/* Disable kernel modesetting */
+__setup("nomodeset", disable_modeset);
diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index a84eb4028e5b..89e26a732175 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -601,5 +601,10 @@ static inline bool drm_drv_uses_atomic_modeset(struct drm_device *dev)
 
 int drm_dev_set_unique(struct drm_device *dev, const char *name);
 
+#ifdef CONFIG_VGA_CONSOLE
+extern bool drm_firmware_drivers_only(void);
+#else
+static inline bool drm_firmware_drivers_only(void) { return false; }
+#endif
 
 #endif
diff --git a/include/linux/console.h b/include/linux/console.h
index a97f277cfdfa..7cd758a4f44e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -219,12 +219,6 @@ extern atomic_t ignore_console_lock_warning;
 #define VESA_HSYNC_SUSPEND      2
 #define VESA_POWERDOWN          3
 
-#ifdef CONFIG_VGA_CONSOLE
-extern bool vgacon_text_force(void);
-#else
-static inline bool vgacon_text_force(void) { return false; }
-#endif
-
 extern void console_init(void);
 
 /* For deferred console takeover */
-- 
cgit v1.2.3


From f7e5b9bfa6c8820407b64eabc1f29c9a87e8993d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 29 Nov 2021 10:39:29 -0500
Subject: siphash: use _unaligned version by default

On ARM v6 and later, we define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
because the ordinary load/store instructions (ldr, ldrh, ldrb) can
tolerate any misalignment of the memory address. However, load/store
double and load/store multiple instructions (ldrd, ldm) may still only
be used on memory addresses that are 32-bit aligned, and so we have to
use the CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS macro with care, or we
may end up with a severe performance hit due to alignment traps that
require fixups by the kernel. Testing shows that this currently happens
with clang-13 but not gcc-11. In theory, any compiler version can
produce this bug or other problems, as we are dealing with undefined
behavior in C99 even on architectures that support this in hardware,
see also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100363.

Fortunately, the get_unaligned() accessors do the right thing: when
building for ARMv6 or later, the compiler will emit unaligned accesses
using the ordinary load/store instructions (but avoid the ones that
require 32-bit alignment). When building for older ARM, those accessors
will emit the appropriate sequence of ldrb/mov/orr instructions. And on
architectures that can truly tolerate any kind of misalignment, the
get_unaligned() accessors resolve to the leXX_to_cpup accessors that
operate on aligned addresses.

Since the compiler will in fact emit ldrd or ldm instructions when
building this code for ARM v6 or later, the solution is to use the
unaligned accessors unconditionally on architectures where this is
known to be fast. The _aligned version of the hash function is
however still needed to get the best performance on architectures
that cannot do any unaligned access in hardware.

This new version avoids the undefined behavior and should produce
the fastest hash on all architectures we support.

Link: https://lore.kernel.org/linux-arm-kernel/20181008211554.5355-4-ard.biesheuvel@linaro.org/
Link: https://lore.kernel.org/linux-crypto/CAK8P3a2KfmmGDbVHULWevB0hv71P2oi2ZCHEAqT=8dQfa0=cqQ@mail.gmail.com/
Reported-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Fixes: 2c956a60778c ("siphash: add cryptographically secure PRF")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/siphash.h | 14 ++++----------
 lib/siphash.c           | 12 ++++++------
 2 files changed, 10 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/siphash.h b/include/linux/siphash.h
index bf21591a9e5e..0cda61855d90 100644
--- a/include/linux/siphash.h
+++ b/include/linux/siphash.h
@@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key)
 }
 
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
-#endif
 
 u64 siphash_1u64(const u64 a, const siphash_key_t *key);
 u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
@@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
 static inline u64 siphash(const void *data, size_t len,
 			  const siphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+	    !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
 		return __siphash_unaligned(data, len, key);
-#endif
 	return ___siphash_aligned(data, len, key);
 }
 
@@ -96,10 +93,8 @@ typedef struct {
 
 u32 __hsiphash_aligned(const void *data, size_t len,
 		       const hsiphash_key_t *key);
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key);
-#endif
 
 u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
 u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
@@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
 static inline u32 hsiphash(const void *data, size_t len,
 			   const hsiphash_key_t *key)
 {
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
-	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
+	if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+	    !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
 		return __hsiphash_unaligned(data, len, key);
-#endif
 	return ___hsiphash_aligned(data, len, key);
 }
 
diff --git a/lib/siphash.c b/lib/siphash.c
index a90112ee72a1..72b9068ab57b 100644
--- a/lib/siphash.c
+++ b/lib/siphash.c
@@ -49,6 +49,7 @@
 	SIPROUND; \
 	return (v0 ^ v1) ^ (v2 ^ v3);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 	POSTAMBLE
 }
 EXPORT_SYMBOL(__siphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 	POSTAMBLE
 }
 EXPORT_SYMBOL(__siphash_unaligned);
-#endif
 
 /**
  * siphash_1u64 - compute 64-bit siphash PRF value of a u64
@@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32);
 	HSIPROUND; \
 	return (v0 ^ v1) ^ (v2 ^ v3);
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u64));
@@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 	HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key)
 {
@@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 	HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_unaligned);
-#endif
 
 /**
  * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
@@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32);
 	HSIPROUND; \
 	return v1 ^ v3;
 
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 {
 	const u8 *end = data + len - (len % sizeof(u32));
@@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 	HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_aligned);
+#endif
 
-#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 u32 __hsiphash_unaligned(const void *data, size_t len,
 			 const hsiphash_key_t *key)
 {
@@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len,
 	HPOSTAMBLE
 }
 EXPORT_SYMBOL(__hsiphash_unaligned);
-#endif
 
 /**
  * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
-- 
cgit v1.2.3


From 79364031c5b4365ca28ac0fa00acfab5bf465be1 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 27 Nov 2021 17:32:00 +0100
Subject: bpf: Make sure bpf_disable_instrumentation() is safe vs preemption.

The initial implementation of migrate_disable() for mainline was a
wrapper around preempt_disable(). RT kernels substituted this with a
real migrate disable implementation.

Later on mainline gained true migrate disable support, but neither
documentation nor affected code were updated.

Remove stale comments claiming that migrate_disable() is PREEMPT_RT only.

Don't use __this_cpu_inc() in the !PREEMPT_RT path because preemption is
not disabled and the RMW operation can be preempted.

Fixes: 74d862b682f51 ("sched: Make migrate_disable/enable() independent of RT")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20211127163200.10466-3-bigeasy@linutronix.de
---
 include/linux/bpf.h    | 16 ++--------------
 include/linux/filter.h |  3 ---
 2 files changed, 2 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 84ff6ef49462..755f38e893be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1353,28 +1353,16 @@ extern struct mutex bpf_stats_enabled_mutex;
  * kprobes, tracepoints) to prevent deadlocks on map operations as any of
  * these events can happen inside a region which holds a map bucket lock
  * and can deadlock on it.
- *
- * Use the preemption safe inc/dec variants on RT because migrate disable
- * is preemptible on RT and preemption in the middle of the RMW operation
- * might lead to inconsistent state. Use the raw variants for non RT
- * kernels as migrate_disable() maps to preempt_disable() so the slightly
- * more expensive save operation can be avoided.
  */
 static inline void bpf_disable_instrumentation(void)
 {
 	migrate_disable();
-	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_inc(bpf_prog_active);
-	else
-		__this_cpu_inc(bpf_prog_active);
+	this_cpu_inc(bpf_prog_active);
 }
 
 static inline void bpf_enable_instrumentation(void)
 {
-	if (IS_ENABLED(CONFIG_PREEMPT_RT))
-		this_cpu_dec(bpf_prog_active);
-	else
-		__this_cpu_dec(bpf_prog_active);
+	this_cpu_dec(bpf_prog_active);
 	migrate_enable();
 }
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 24b7ed2677af..534f678ca50f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -640,9 +640,6 @@ static __always_inline u32 bpf_prog_run(const struct bpf_prog *prog, const void
  * This uses migrate_disable/enable() explicitly to document that the
  * invocation of a BPF program does not require reentrancy protection
  * against a BPF program which is invoked from a preempting task.
- *
- * For non RT enabled kernels migrate_disable/enable() maps to
- * preempt_disable/enable(), i.e. it disables also preemption.
  */
 static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
 					  const void *ctx)
-- 
cgit v1.2.3


From 502e82b91361955c66c8453b5b7a905b0b5bd5a1 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 7 Nov 2021 17:21:45 +0200
Subject: net/mlx5: Fix access to a non-supported register

Validate MRTC register is supported before triggering a delayed work
which accesses it.

Fixes: 5a1023deeed0 ("net/mlx5: Add periodic update of host time to firmware")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c   | 8 +++-----
 include/linux/mlx5/mlx5_ifc.h                    | 5 ++++-
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 380f50d5462d..3ca998874c50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -836,7 +836,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
 	add_timer(&health->timer);
 
-	if (mlx5_core_is_pf(dev))
+	if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
 		queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index e127c0530b3a..7df9c7f8d9c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1071,18 +1071,16 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot)
 
 	mlx5_set_driver_version(dev);
 
-	mlx5_start_health_poll(dev);
-
 	err = mlx5_query_hca_caps(dev);
 	if (err) {
 		mlx5_core_err(dev, "query hca failed\n");
-		goto stop_health;
+		goto reclaim_boot_pages;
 	}
 
+	mlx5_start_health_poll(dev);
+
 	return 0;
 
-stop_health:
-	mlx5_stop_health_poll(dev, boot);
 reclaim_boot_pages:
 	mlx5_reclaim_startup_pages(dev);
 err_disable_hca:
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3636df90899a..fbaab440a484 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9698,7 +9698,10 @@ struct mlx5_ifc_mcam_access_reg_bits {
 	u8         regs_84_to_68[0x11];
 	u8         tracer_registers[0x4];
 
-	u8         regs_63_to_32[0x20];
+	u8         regs_63_to_46[0x12];
+	u8         mrtc[0x1];
+	u8         regs_44_to_32[0xd];
+
 	u8         regs_31_to_0[0x20];
 };
 
-- 
cgit v1.2.3


From 6bbfa44116689469267f1a6e3d233b52114139d2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 1 Dec 2021 23:45:50 +0900
Subject: kprobes: Limit max data_size of the kretprobe instances

The 'kprobe::data_size' is unsigned, thus it can not be negative.  But if
user sets it enough big number (e.g. (size_t)-8), the result of 'data_size
+ sizeof(struct kretprobe_instance)' becomes smaller than sizeof(struct
kretprobe_instance) or zero. In result, the kretprobe_instance are
allocated without enough memory, and kretprobe accesses outside of
allocated memory.

To avoid this issue, introduce a max limitation of the
kretprobe::data_size. 4KB per instance should be OK.

Link: https://lkml.kernel.org/r/163836995040.432120.10322772773821182925.stgit@devnote2

Cc: stable@vger.kernel.org
Fixes: f47cd9b553aa ("kprobes: kretprobe user entry-handler")
Reported-by: zhangyue <zhangyue1@kylinos.cn>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 2 ++
 kernel/kprobes.c        | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e974caf39d3e..8c8f7a4d93af 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -153,6 +153,8 @@ struct kretprobe {
 	struct kretprobe_holder *rph;
 };
 
+#define KRETPROBE_MAX_DATA_SIZE	4096
+
 struct kretprobe_instance {
 	union {
 		struct freelist_node freelist;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e9db0c810554..21eccc961bba 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -2086,6 +2086,9 @@ int register_kretprobe(struct kretprobe *rp)
 		}
 	}
 
+	if (rp->data_size > KRETPROBE_MAX_DATA_SIZE)
+		return -E2BIG;
+
 	rp->kp.pre_handler = pre_handler_kretprobe;
 	rp->kp.post_handler = NULL;
 
-- 
cgit v1.2.3


From 7a10d8c810cfad3e79372d7d1c77899d86cd6662 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 30 Nov 2021 09:01:55 -0800
Subject: net: annotate data-races on txq->xmit_lock_owner

syzbot found that __dev_queue_xmit() is reading txq->xmit_lock_owner
without annotations.

No serious issue there, let's document what is happening there.

BUG: KCSAN: data-race in __dev_queue_xmit / __dev_queue_xmit

write to 0xffff888139d09484 of 4 bytes by interrupt on cpu 0:
 __netif_tx_unlock include/linux/netdevice.h:4437 [inline]
 __dev_queue_xmit+0x948/0xf70 net/core/dev.c:4229
 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265
 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline]
 macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567
 __netdev_start_xmit include/linux/netdevice.h:4987 [inline]
 netdev_start_xmit include/linux/netdevice.h:5001 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3590
 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606
 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342
 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817
 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259
 neigh_hh_output include/net/neighbour.h:511 [inline]
 neigh_output include/net/neighbour.h:525 [inline]
 ip6_finish_output2+0x995/0xbb0 net/ipv6/ip6_output.c:126
 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
 ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224
 dst_output include/net/dst.h:450 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508
 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702
 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898
 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421
 expire_timers+0x116/0x240 kernel/time/timer.c:1466
 __run_timers+0x368/0x410 kernel/time/timer.c:1734
 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747
 __do_softirq+0x158/0x2de kernel/softirq.c:558
 __irq_exit_rcu kernel/softirq.c:636 [inline]
 irq_exit_rcu+0x37/0x70 kernel/softirq.c:648
 sysvec_apic_timer_interrupt+0x3e/0xb0 arch/x86/kernel/apic/apic.c:1097
 asm_sysvec_apic_timer_interrupt+0x12/0x20

read to 0xffff888139d09484 of 4 bytes by interrupt on cpu 1:
 __dev_queue_xmit+0x5e3/0xf70 net/core/dev.c:4213
 dev_queue_xmit_accel+0x19/0x20 net/core/dev.c:4265
 macvlan_queue_xmit drivers/net/macvlan.c:543 [inline]
 macvlan_start_xmit+0x2b3/0x3d0 drivers/net/macvlan.c:567
 __netdev_start_xmit include/linux/netdevice.h:4987 [inline]
 netdev_start_xmit include/linux/netdevice.h:5001 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3590
 dev_hard_start_xmit+0x72/0x120 net/core/dev.c:3606
 sch_direct_xmit+0x1b2/0x7c0 net/sched/sch_generic.c:342
 __dev_xmit_skb+0x83d/0x1370 net/core/dev.c:3817
 __dev_queue_xmit+0x590/0xf70 net/core/dev.c:4194
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4259
 neigh_resolve_output+0x3db/0x410 net/core/neighbour.c:1523
 neigh_output include/net/neighbour.h:527 [inline]
 ip6_finish_output2+0x9be/0xbb0 net/ipv6/ip6_output.c:126
 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
 ip6_finish_output+0x444/0x4c0 net/ipv6/ip6_output.c:201
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x10e/0x210 net/ipv6/ip6_output.c:224
 dst_output include/net/dst.h:450 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 ndisc_send_skb+0x486/0x610 net/ipv6/ndisc.c:508
 ndisc_send_rs+0x3b0/0x3e0 net/ipv6/ndisc.c:702
 addrconf_rs_timer+0x370/0x540 net/ipv6/addrconf.c:3898
 call_timer_fn+0x2e/0x240 kernel/time/timer.c:1421
 expire_timers+0x116/0x240 kernel/time/timer.c:1466
 __run_timers+0x368/0x410 kernel/time/timer.c:1734
 run_timer_softirq+0x2e/0x60 kernel/time/timer.c:1747
 __do_softirq+0x158/0x2de kernel/softirq.c:558
 __irq_exit_rcu kernel/softirq.c:636 [inline]
 irq_exit_rcu+0x37/0x70 kernel/softirq.c:648
 sysvec_apic_timer_interrupt+0x8d/0xb0 arch/x86/kernel/apic/apic.c:1097
 asm_sysvec_apic_timer_interrupt+0x12/0x20
 kcsan_setup_watchpoint+0x94/0x420 kernel/kcsan/core.c:443
 folio_test_anon include/linux/page-flags.h:581 [inline]
 PageAnon include/linux/page-flags.h:586 [inline]
 zap_pte_range+0x5ac/0x10e0 mm/memory.c:1347
 zap_pmd_range mm/memory.c:1467 [inline]
 zap_pud_range mm/memory.c:1496 [inline]
 zap_p4d_range mm/memory.c:1517 [inline]
 unmap_page_range+0x2dc/0x3d0 mm/memory.c:1538
 unmap_single_vma+0x157/0x210 mm/memory.c:1583
 unmap_vmas+0xd0/0x180 mm/memory.c:1615
 exit_mmap+0x23d/0x470 mm/mmap.c:3170
 __mmput+0x27/0x1b0 kernel/fork.c:1113
 mmput+0x3d/0x50 kernel/fork.c:1134
 exit_mm+0xdb/0x170 kernel/exit.c:507
 do_exit+0x608/0x17a0 kernel/exit.c:819
 do_group_exit+0xce/0x180 kernel/exit.c:929
 get_signal+0xfc3/0x1550 kernel/signal.c:2852
 arch_do_signal_or_restart+0x8c/0x2e0 arch/x86/kernel/signal.c:868
 handle_signal_work kernel/entry/common.c:148 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
 exit_to_user_mode_prepare+0x113/0x190 kernel/entry/common.c:207
 __syscall_exit_to_user_mode_work kernel/entry/common.c:289 [inline]
 syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:300
 do_syscall_64+0x50/0xd0 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0xffffffff

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 28712 Comm: syz-executor.0 Tainted: G        W         5.16.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20211130170155.2331929-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 19 +++++++++++++------
 net/core/dev.c            |  5 ++++-
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3ec42495a43a..be5cb3360b94 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4404,7 +4404,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
 {
 	spin_lock(&txq->_xmit_lock);
-	txq->xmit_lock_owner = cpu;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, cpu);
 }
 
 static inline bool __netif_tx_acquire(struct netdev_queue *txq)
@@ -4421,26 +4422,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq)
 static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
 {
 	spin_lock_bh(&txq->_xmit_lock);
-	txq->xmit_lock_owner = smp_processor_id();
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
 }
 
 static inline bool __netif_tx_trylock(struct netdev_queue *txq)
 {
 	bool ok = spin_trylock(&txq->_xmit_lock);
-	if (likely(ok))
-		txq->xmit_lock_owner = smp_processor_id();
+
+	if (likely(ok)) {
+		/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+		WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id());
+	}
 	return ok;
 }
 
 static inline void __netif_tx_unlock(struct netdev_queue *txq)
 {
-	txq->xmit_lock_owner = -1;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock(&txq->_xmit_lock);
 }
 
 static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
 {
-	txq->xmit_lock_owner = -1;
+	/* Pairs with READ_ONCE() in __dev_queue_xmit() */
+	WRITE_ONCE(txq->xmit_lock_owner, -1);
 	spin_unlock_bh(&txq->_xmit_lock);
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 15ac064b5562..2a352e668d10 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4210,7 +4210,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 	if (dev->flags & IFF_UP) {
 		int cpu = smp_processor_id(); /* ok because BHs are off */
 
-		if (txq->xmit_lock_owner != cpu) {
+		/* Other cpus might concurrently change txq->xmit_lock_owner
+		 * to -1 or to their cpu id, but not to our id.
+		 */
+		if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
 			if (dev_xmit_recursion())
 				goto recursion_alert;
 
-- 
cgit v1.2.3


From e7f2be115f0746b969c0df14c0d182f65f005ca5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Tue, 26 Oct 2021 16:10:55 +0200
Subject: sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full

getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime
than the actual time.

task_cputime_adjusted() snapshots utime and stime and then adjust their
sum to match the scheduler maintained cputime.sum_exec_runtime.
Unfortunately in nohz_full, sum_exec_runtime is only updated once per
second in the worst case, causing a discrepancy against utime and stime
that can be updated anytime by the reader using vtime.

To fix this situation, perform an update of cputime.sum_exec_runtime
when the cputime snapshot reports the task as actually running while
the tick is disabled. The related overhead is then contained within the
relevant situations.

Reported-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Hasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Acked-by: Phil Auld <pauld@redhat.com>
Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org
---
 include/linux/sched/cputime.h |  5 +++--
 kernel/sched/cputime.c        | 12 +++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 6c9f19a33865..ce3c58286062 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -18,15 +18,16 @@
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-extern void task_cputime(struct task_struct *t,
+extern bool task_cputime(struct task_struct *t,
 			 u64 *utime, u64 *stime);
 extern u64 task_gtime(struct task_struct *t);
 #else
-static inline void task_cputime(struct task_struct *t,
+static inline bool task_cputime(struct task_struct *t,
 				u64 *utime, u64 *stime)
 {
 	*utime = t->utime;
 	*stime = t->stime;
+	return false;
 }
 
 static inline u64 task_gtime(struct task_struct *t)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 872e481d5098..9392aea1804e 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -615,7 +615,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
 		.sum_exec_runtime = p->se.sum_exec_runtime,
 	};
 
-	task_cputime(p, &cputime.utime, &cputime.stime);
+	if (task_cputime(p, &cputime.utime, &cputime.stime))
+		cputime.sum_exec_runtime = task_sched_runtime(p);
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 EXPORT_SYMBOL_GPL(task_cputime_adjusted);
@@ -828,19 +829,21 @@ u64 task_gtime(struct task_struct *t)
  * add up the pending nohz execution time since the last
  * cputime snapshot.
  */
-void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
+bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 {
 	struct vtime *vtime = &t->vtime;
 	unsigned int seq;
 	u64 delta;
+	int ret;
 
 	if (!vtime_accounting_enabled()) {
 		*utime = t->utime;
 		*stime = t->stime;
-		return;
+		return false;
 	}
 
 	do {
+		ret = false;
 		seq = read_seqcount_begin(&vtime->seqcount);
 
 		*utime = t->utime;
@@ -850,6 +853,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 		if (vtime->state < VTIME_SYS)
 			continue;
 
+		ret = true;
 		delta = vtime_delta(vtime);
 
 		/*
@@ -861,6 +865,8 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
 		else
 			*utime += vtime->utime + delta;
 	} while (read_seqcount_retry(&vtime->seqcount, seq));
+
+	return ret;
 }
 
 static int vtime_state_fetch(struct vtime *vtime, int cpu)
-- 
cgit v1.2.3


From f83baa0cb6cfc92ebaf7f9d3a99d7e34f2e77a8a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 1 Dec 2021 19:35:01 +0100
Subject: HID: add hid_is_usb() function to make it simpler for USB detection

A number of HID drivers already call hid_is_using_ll_driver() but only
for the detection of if this is a USB device or not.  Make this more
obvious by creating hid_is_usb() and calling the function that way.

Also converts the existing hid_is_using_ll_driver() functions to use the
new call.

Cc: Jiri Kosina <jikos@kernel.org>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: linux-input@vger.kernel.org
Cc: stable@vger.kernel.org
Tested-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20211201183503.2373082-1-gregkh@linuxfoundation.org
---
 drivers/hid/hid-asus.c           | 6 ++----
 drivers/hid/hid-logitech-dj.c    | 2 +-
 drivers/hid/hid-u2fzero.c        | 2 +-
 drivers/hid/hid-uclogic-params.c | 3 +--
 drivers/hid/wacom_sys.c          | 2 +-
 include/linux/hid.h              | 5 +++++
 6 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index f3ecddc519ee..08c9a9a60ae4 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -1028,8 +1028,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	if (drvdata->quirks & QUIRK_IS_MULTITOUCH)
 		drvdata->tp = &asus_i2c_tp;
 
-	if ((drvdata->quirks & QUIRK_T100_KEYBOARD) &&
-	    hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
+	if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) {
 		struct usb_interface *intf = to_usb_interface(hdev->dev.parent);
 
 		if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) {
@@ -1057,8 +1056,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id)
 		drvdata->tp = &asus_t100chi_tp;
 	}
 
-	if ((drvdata->quirks & QUIRK_MEDION_E1239T) &&
-	    hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
+	if ((drvdata->quirks & QUIRK_MEDION_E1239T) && hid_is_usb(hdev)) {
 		struct usb_host_interface *alt =
 			to_usb_interface(hdev->dev.parent)->altsetting;
 
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index a0017b010c34..7106b921b53c 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -1777,7 +1777,7 @@ static int logi_dj_probe(struct hid_device *hdev,
 	case recvr_type_bluetooth:	no_dj_interfaces = 2; break;
 	case recvr_type_dinovo:		no_dj_interfaces = 2; break;
 	}
-	if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
+	if (hid_is_usb(hdev)) {
 		intf = to_usb_interface(hdev->dev.parent);
 		if (intf && intf->altsetting->desc.bInterfaceNumber >=
 							no_dj_interfaces) {
diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c
index 31ea7fc69916..ad489caf53ad 100644
--- a/drivers/hid/hid-u2fzero.c
+++ b/drivers/hid/hid-u2fzero.c
@@ -311,7 +311,7 @@ static int u2fzero_probe(struct hid_device *hdev,
 	unsigned int minor;
 	int ret;
 
-	if (!hid_is_using_ll_driver(hdev, &usb_hid_driver))
+	if (!hid_is_usb(hdev))
 		return -EINVAL;
 
 	dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL);
diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c
index 3d67b748a3b9..adff1bd68d9f 100644
--- a/drivers/hid/hid-uclogic-params.c
+++ b/drivers/hid/hid-uclogic-params.c
@@ -843,8 +843,7 @@ int uclogic_params_init(struct uclogic_params *params,
 	struct uclogic_params p = {0, };
 
 	/* Check arguments */
-	if (params == NULL || hdev == NULL ||
-	    !hid_is_using_ll_driver(hdev, &usb_hid_driver)) {
+	if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) {
 		rc = -EINVAL;
 		goto cleanup;
 	}
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 2717d39600b4..22d73772fbc5 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -2214,7 +2214,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix)
 	if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) {
 		char *product_name = wacom->hdev->name;
 
-		if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) {
+		if (hid_is_usb(wacom->hdev)) {
 			struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent);
 			struct usb_device *dev = interface_to_usbdev(intf);
 			product_name = dev->product;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 9e067f937dbc..f453be385bd4 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -840,6 +840,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev,
 	return hdev->ll_driver == driver;
 }
 
+static inline bool hid_is_usb(struct hid_device *hdev)
+{
+	return hid_is_using_ll_driver(hdev, &usb_hid_driver);
+}
+
 #define	PM_HINT_FULLON	1<<5
 #define PM_HINT_NORMAL	1<<1
 
-- 
cgit v1.2.3


From d9847eb8be3d895b2b5f514fdf3885d47a0b92a2 Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Mon, 22 Nov 2021 20:17:40 +0530
Subject: bpf: Make CONFIG_DEBUG_INFO_BTF depend upon CONFIG_BPF_SYSCALL

Vinicius Costa Gomes reported [0] that build fails when
CONFIG_DEBUG_INFO_BTF is enabled and CONFIG_BPF_SYSCALL is disabled.
This leads to btf.c not being compiled, and then no symbol being present
in vmlinux for the declarations in btf.h. Since BTF is not useful
without enabling BPF subsystem, disallow this combination.

However, theoretically disabling both now could still fail, as the
symbol for kfunc_btf_id_list variables is not available. This isn't a
problem as the compiler usually optimizes the whole register/unregister
call, but at lower optimization levels it can fail the build in linking
stage.

Fix that by adding dummy variables so that modules taking address of
them still work, but the whole thing is a noop.

  [0]: https://lore.kernel.org/bpf/20211110205418.332403-1-vinicius.gomes@intel.com

Fixes: 14f267d95fe4 ("bpf: btf: Introduce helpers for dynamic BTF set registration")
Reported-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20211122144742.477787-2-memxor@gmail.com
---
 include/linux/btf.h | 14 ++++++++++----
 kernel/bpf/btf.c    |  9 ++-------
 lib/Kconfig.debug   |  1 +
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/btf.h b/include/linux/btf.h
index 203eef993d76..0e1b6281fd8f 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -245,7 +245,10 @@ struct kfunc_btf_id_set {
 	struct module *owner;
 };
 
-struct kfunc_btf_id_list;
+struct kfunc_btf_id_list {
+	struct list_head list;
+	struct mutex mutex;
+};
 
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
@@ -254,6 +257,9 @@ void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
 				 struct kfunc_btf_id_set *s);
 bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 			      struct module *owner);
+
+extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
+extern struct kfunc_btf_id_list prog_test_kfunc_list;
 #else
 static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
 					     struct kfunc_btf_id_set *s)
@@ -268,13 +274,13 @@ static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist,
 {
 	return false;
 }
+
+static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused;
+static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused;
 #endif
 
 #define DEFINE_KFUNC_BTF_ID_SET(set, name)                                     \
 	struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set),     \
 					 THIS_MODULE }
 
-extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list;
-extern struct kfunc_btf_id_list prog_test_kfunc_list;
-
 #endif
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dbc3ad07e21b..ea3df9867cec 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6346,11 +6346,6 @@ BTF_ID_LIST_GLOBAL_SINGLE(btf_task_struct_ids, struct, task_struct)
 
 /* BTF ID set registration API for modules */
 
-struct kfunc_btf_id_list {
-	struct list_head list;
-	struct mutex mutex;
-};
-
 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 
 void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l,
@@ -6389,8 +6384,6 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 	return false;
 }
 
-#endif
-
 #define DEFINE_KFUNC_BTF_ID_LIST(name)                                         \
 	struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list),           \
 					  __MUTEX_INITIALIZER(name.mutex) };   \
@@ -6398,3 +6391,5 @@ bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id,
 
 DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list);
 DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list);
+
+#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9ef7ce18b4f5..596bb5e4790c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -316,6 +316,7 @@ config DEBUG_INFO_BTF
 	bool "Generate BTF typeinfo"
 	depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
 	depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
+	depends on BPF_SYSCALL
 	help
 	  Generate deduplicated BTF type information from DWARF debug info.
 	  Turning this on expects presence of pahole tool, which will convert
-- 
cgit v1.2.3


From 8581fd402a0cf80b5298e3b225e7a7bd8f110e69 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 2 Dec 2021 12:34:00 -0800
Subject: treewide: Add missing includes masked by cgroup -> bpf dependency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

cgroup.h (therefore swap.h, therefore half of the universe)
includes bpf.h which in turn includes module.h and slab.h.
Since we're about to get rid of that dependency we need
to clean things up.

v2: drop the cpu.h include from cacheinfo.h, it's not necessary
and it makes riscv sensitive to ordering of include files.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Krzysztof Wilczyński <kw@linux.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Acked-by: SeongJae Park <sj@kernel.org>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/all/20211120035253.72074-1-kuba@kernel.org/  # v1
Link: https://lore.kernel.org/all/20211120165528.197359-1-kuba@kernel.org/ # cacheinfo discussion
Link: https://lore.kernel.org/bpf/20211202203400.1208663-1-kuba@kernel.org
---
 block/fops.c                                          | 1 +
 drivers/gpu/drm/drm_gem_shmem_helper.c                | 1 +
 drivers/gpu/drm/i915/gt/intel_gtt.c                   | 1 +
 drivers/gpu/drm/i915/i915_request.c                   | 1 +
 drivers/gpu/drm/lima/lima_device.c                    | 1 +
 drivers/gpu/drm/msm/msm_gem_shrinker.c                | 1 +
 drivers/gpu/drm/ttm/ttm_tt.c                          | 1 +
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c       | 1 +
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c | 2 ++
 drivers/pci/controller/dwc/pci-exynos.c               | 1 +
 drivers/pci/controller/dwc/pcie-qcom-ep.c             | 1 +
 drivers/usb/cdns3/host.c                              | 1 +
 include/linux/cacheinfo.h                             | 1 -
 include/linux/device/driver.h                         | 1 +
 include/linux/filter.h                                | 2 +-
 mm/damon/vaddr.c                                      | 1 +
 mm/memory_hotplug.c                                   | 1 +
 mm/swap_slots.c                                       | 1 +
 18 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/fops.c b/block/fops.c
index ad732a36f9b3..3cb1e81929bc 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -15,6 +15,7 @@
 #include <linux/falloc.h>
 #include <linux/suspend.h>
 #include <linux/fs.h>
+#include <linux/module.h>
 #include "blk.h"
 
 static inline struct inode *bdev_file_inode(struct file *file)
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 7b9f69f21f1e..bca0de92802e 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -9,6 +9,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/module.h>
 
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 67d14afa6623..b67f620c3d93 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h> /* fault-inject.h is not standalone! */
 
 #include <linux/fault-inject.h>
+#include <linux/sched/mm.h>
 
 #include "gem/i915_gem_lmem.h"
 #include "i915_trace.h"
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 820a1f38b271..89cccefeea63 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -29,6 +29,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 
 #include "gem/i915_gem_context.h"
 #include "gt/intel_breadcrumbs.h"
diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c
index 65fdca366e41..f74f8048af8f 100644
--- a/drivers/gpu/drm/lima/lima_device.c
+++ b/drivers/gpu/drm/lima/lima_device.c
@@ -4,6 +4,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/clk.h>
+#include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 4a1420b05e97..086dacf2f26a 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/vmalloc.h>
+#include <linux/sched/mm.h>
 
 #include "msm_drv.h"
 #include "msm_gem.h"
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 7e83c00a3f48..79c870a3bef8 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -34,6 +34,7 @@
 #include <linux/sched.h>
 #include <linux/shmem_fs.h>
 #include <linux/file.h>
+#include <linux/module.h>
 #include <drm/drm_cache.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index a78c398bf5b2..01e7d3c0b68e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -8,6 +8,7 @@
 #include <linux/interrupt.h>
 #include <linux/etherdevice.h>
 #include <linux/netdevice.h>
+#include <linux/module.h>
 
 #include "hinic_hw_dev.h"
 #include "hinic_dev.h"
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
index 0ef68fdd1f26..61c20907315f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c
@@ -5,6 +5,8 @@
  *
  */
 
+#include <linux/module.h>
+
 #include "otx2_common.h"
 #include "otx2_ptp.h"
 
diff --git a/drivers/pci/controller/dwc/pci-exynos.c b/drivers/pci/controller/dwc/pci-exynos.c
index c24dab383654..722dacdd5a17 100644
--- a/drivers/pci/controller/dwc/pci-exynos.c
+++ b/drivers/pci/controller/dwc/pci-exynos.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 #include <linux/regulator/consumer.h>
+#include <linux/module.h>
 
 #include "pcie-designware.h"
 
diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
index 7b17da2f9b3f..cfe66bf04c1d 100644
--- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
+++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
@@ -18,6 +18,7 @@
 #include <linux/pm_domain.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
+#include <linux/module.h>
 
 #include "pcie-designware.h"
 
diff --git a/drivers/usb/cdns3/host.c b/drivers/usb/cdns3/host.c
index 84dadfa726aa..9643b905e2d8 100644
--- a/drivers/usb/cdns3/host.c
+++ b/drivers/usb/cdns3/host.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include "core.h"
 #include "drd.h"
 #include "host-export.h"
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
index 2f909ed084c6..4ff37cb763ae 100644
--- a/include/linux/cacheinfo.h
+++ b/include/linux/cacheinfo.h
@@ -3,7 +3,6 @@
 #define _LINUX_CACHEINFO_H
 
 #include <linux/bitops.h>
-#include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/smp.h>
 
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index a498ebcf4993..15e7c5e15d62 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -18,6 +18,7 @@
 #include <linux/klist.h>
 #include <linux/pm.h>
 #include <linux/device/bus.h>
+#include <linux/module.h>
 
 /**
  * enum probe_type - device driver probe type to try
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 534f678ca50f..7f1e88e3e2b5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -6,6 +6,7 @@
 #define __LINUX_FILTER_H__
 
 #include <linux/atomic.h>
+#include <linux/bpf.h>
 #include <linux/refcount.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
@@ -26,7 +27,6 @@
 
 #include <asm/byteorder.h>
 #include <uapi/linux/filter.h>
-#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 35fe49080ee9..47f47f60440e 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -13,6 +13,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/page_idle.h>
 #include <linux/pagewalk.h>
+#include <linux/sched/mm.h>
 
 #include "prmtv-common.h"
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 852041f6be41..2a9627dc784c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -35,6 +35,7 @@
 #include <linux/memblock.h>
 #include <linux/compaction.h>
 #include <linux/rmap.h>
+#include <linux/module.h>
 
 #include <asm/tlbflush.h>
 
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 16f706c55d92..2b5531840583 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -30,6 +30,7 @@
 #include <linux/swap_slots.h>
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
-- 
cgit v1.2.3


From a97770cc4016c2733bcef9dbe3d5b1ad02d13356 Mon Sep 17 00:00:00 2001
From: Yanteng Si <siyanteng01@gmail.com>
Date: Mon, 6 Dec 2021 16:12:27 +0800
Subject: net: phy: Remove unnecessary indentation in the comments of
 phy_device

Fix warning as:

linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:543: WARNING: Unexpected indentation.
linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:544: WARNING: Block quote ends without a blank line; unexpected unindent.
linux-next/Documentation/networking/kapi:122: ./include/linux/phy.h:546: WARNING: Unexpected indentation.

Suggested-by: Akira Yokosawa <akiyks@gmail.com>
Signed-off-by: Yanteng Si <siyanteng@loongson.cn>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/phy.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 96e43fbb2dd8..cbf03a5f9cf5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -538,11 +538,12 @@ struct macsec_ops;
  * @mac_managed_pm: Set true if MAC driver takes of suspending/resuming PHY
  * @state: State of the PHY for management purposes
  * @dev_flags: Device-specific flags used by the PHY driver.
- *		Bits [15:0] are free to use by the PHY driver to communicate
- *			    driver specific behavior.
- *		Bits [23:16] are currently reserved for future use.
- *		Bits [31:24] are reserved for defining generic
- *			     PHY driver behavior.
+ *
+ *      - Bits [15:0] are free to use by the PHY driver to communicate
+ *        driver specific behavior.
+ *      - Bits [23:16] are currently reserved for future use.
+ *      - Bits [31:24] are reserved for defining generic
+ *        PHY driver behavior.
  * @irq: IRQ number of the PHY's interrupt (-1 if none)
  * @phy_timer: The timer for handling the state machine
  * @phylink: Pointer to phylink instance for this PHY
-- 
cgit v1.2.3


From 444dd878e85fb33fcfb2682cfdab4c236f33ea3e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 3 Dec 2021 17:19:47 +0100
Subject: PM: runtime: Fix pm_runtime_active() kerneldoc comment

The kerneldoc comment of pm_runtime_active() does not reflect the
behavior of the function, so update it accordingly.

Fixes: 403d2d116ec0 ("PM: runtime: Add kerneldoc comments to multiple helpers")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/pm_runtime.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 222da43b7096..eddd66d426ca 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -129,7 +129,7 @@ static inline bool pm_runtime_suspended(struct device *dev)
  * pm_runtime_active - Check whether or not a device is runtime-active.
  * @dev: Target device.
  *
- * Return %true if runtime PM is enabled for @dev and its runtime PM status is
+ * Return %true if runtime PM is disabled for @dev or its runtime PM status is
  * %RPM_ACTIVE, or %false otherwise.
  *
  * Note that the return value of this function can only be trusted if it is
-- 
cgit v1.2.3


From cab2d3fd6866e089b5c50db09dece131f85bfebd Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Thu, 9 Dec 2021 18:46:33 +0530
Subject: bus: mhi: core: Add support for forced PM resume

For whatever reason, some devices like QCA6390, WCN6855 using ath11k
are not in M3 state during PM resume, but still functional. The
mhi_pm_resume should then not fail in those cases, and let the higher
level device specific stack continue resuming process.

Add an API mhi_pm_resume_force(), to force resuming irrespective of the
current MHI state. This fixes a regression with non functional ath11k WiFi
after suspend/resume cycle on some machines.

Bug report: https://bugzilla.kernel.org/show_bug.cgi?id=214179

Link: https://lore.kernel.org/regressions/871r5p0x2u.fsf@codeaurora.org/
Fixes: 020d3b26c07a ("bus: mhi: Early MHI resume failure in non M3 state")
Cc: stable@vger.kernel.org #5.13
Reported-by: Kalle Valo <kvalo@codeaurora.org>
Reported-by: Pengyu Ma <mapengyu@gmail.com>
Tested-by: Kalle Valo <kvalo@kernel.org>
Acked-by: Kalle Valo <kvalo@kernel.org>
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
[mani: Switched to API, added bug report, reported-by tags and CCed stable]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20211209131633.4168-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/mhi/core/pm.c             | 21 ++++++++++++++++++---
 drivers/net/wireless/ath/ath11k/mhi.c |  6 +++++-
 include/linux/mhi.h                   | 13 +++++++++++++
 3 files changed, 36 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/mhi/core/pm.c b/drivers/bus/mhi/core/pm.c
index fb99e3727155..547e6e769546 100644
--- a/drivers/bus/mhi/core/pm.c
+++ b/drivers/bus/mhi/core/pm.c
@@ -881,7 +881,7 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl)
 }
 EXPORT_SYMBOL_GPL(mhi_pm_suspend);
 
-int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
+static int __mhi_pm_resume(struct mhi_controller *mhi_cntrl, bool force)
 {
 	struct mhi_chan *itr, *tmp;
 	struct device *dev = &mhi_cntrl->mhi_dev->dev;
@@ -898,8 +898,12 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
 	if (MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))
 		return -EIO;
 
-	if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3)
-		return -EINVAL;
+	if (mhi_get_mhi_state(mhi_cntrl) != MHI_STATE_M3) {
+		dev_warn(dev, "Resuming from non M3 state (%s)\n",
+			 TO_MHI_STATE_STR(mhi_get_mhi_state(mhi_cntrl)));
+		if (!force)
+			return -EINVAL;
+	}
 
 	/* Notify clients about exiting LPM */
 	list_for_each_entry_safe(itr, tmp, &mhi_cntrl->lpm_chans, node) {
@@ -940,8 +944,19 @@ int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
 
 	return 0;
 }
+
+int mhi_pm_resume(struct mhi_controller *mhi_cntrl)
+{
+	return __mhi_pm_resume(mhi_cntrl, false);
+}
 EXPORT_SYMBOL_GPL(mhi_pm_resume);
 
+int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl)
+{
+	return __mhi_pm_resume(mhi_cntrl, true);
+}
+EXPORT_SYMBOL_GPL(mhi_pm_resume_force);
+
 int __mhi_device_get_sync(struct mhi_controller *mhi_cntrl)
 {
 	int ret;
diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c
index 26c7ae242db6..49c0b1ad40a0 100644
--- a/drivers/net/wireless/ath/ath11k/mhi.c
+++ b/drivers/net/wireless/ath/ath11k/mhi.c
@@ -533,7 +533,11 @@ static int ath11k_mhi_set_state(struct ath11k_pci *ab_pci,
 		ret = mhi_pm_suspend(ab_pci->mhi_ctrl);
 		break;
 	case ATH11K_MHI_RESUME:
-		ret = mhi_pm_resume(ab_pci->mhi_ctrl);
+		/* Do force MHI resume as some devices like QCA6390, WCN6855
+		 * are not in M3 state but they are functional. So just ignore
+		 * the MHI state while resuming.
+		 */
+		ret = mhi_pm_resume_force(ab_pci->mhi_ctrl);
 		break;
 	case ATH11K_MHI_TRIGGER_RDDM:
 		ret = mhi_force_rddm_mode(ab_pci->mhi_ctrl);
diff --git a/include/linux/mhi.h b/include/linux/mhi.h
index 723985879035..a5cc4cdf9cc8 100644
--- a/include/linux/mhi.h
+++ b/include/linux/mhi.h
@@ -663,6 +663,19 @@ int mhi_pm_suspend(struct mhi_controller *mhi_cntrl);
  */
 int mhi_pm_resume(struct mhi_controller *mhi_cntrl);
 
+/**
+ * mhi_pm_resume_force - Force resume MHI from suspended state
+ * @mhi_cntrl: MHI controller
+ *
+ * Resume the device irrespective of its MHI state. As per the MHI spec, devices
+ * has to be in M3 state during resume. But some devices seem to be in a
+ * different MHI state other than M3 but they continue working fine if allowed.
+ * This API is intented to be used for such devices.
+ *
+ * Return: 0 if the resume succeeds, a negative error code otherwise
+ */
+int mhi_pm_resume_force(struct mhi_controller *mhi_cntrl);
+
 /**
  * mhi_download_rddm_image - Download ramdump image from device for
  *                           debugging purpose.
-- 
cgit v1.2.3


From 42288cb44c4b5fff7653bc392b583a2b8bd6a8c0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 8 Dec 2021 17:04:51 -0800
Subject: wait: add wake_up_pollfree()

Several ->poll() implementations are special in that they use a
waitqueue whose lifetime is the current task, rather than the struct
file as is normally the case.  This is okay for blocking polls, since a
blocking poll occurs within one task; however, non-blocking polls
require another solution.  This solution is for the queue to be cleared
before it is freed, using 'wake_up_poll(wq, EPOLLHUP | POLLFREE);'.

However, that has a bug: wake_up_poll() calls __wake_up() with
nr_exclusive=1.  Therefore, if there are multiple "exclusive" waiters,
and the wakeup function for the first one returns a positive value, only
that one will be called.  That's *not* what's needed for POLLFREE;
POLLFREE is special in that it really needs to wake up everyone.

Considering the three non-blocking poll systems:

- io_uring poll doesn't handle POLLFREE at all, so it is broken anyway.

- aio poll is unaffected, since it doesn't support exclusive waits.
  However, that's fragile, as someone could add this feature later.

- epoll doesn't appear to be broken by this, since its wakeup function
  returns 0 when it sees POLLFREE.  But this is fragile.

Although there is a workaround (see epoll), it's better to define a
function which always sends POLLFREE to all waiters.  Add such a
function.  Also make it verify that the queue really becomes empty after
all waiters have been woken up.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20211209010455.42744-2-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 include/linux/wait.h | 26 ++++++++++++++++++++++++++
 kernel/sched/wait.c  |  7 +++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 2d0df57c9902..851e07da2583 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -217,6 +217,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void
 void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
 void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
+void __wake_up_pollfree(struct wait_queue_head *wq_head);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -245,6 +246,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
 #define wake_up_interruptible_sync_poll_locked(x, m)				\
 	__wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
 
+/**
+ * wake_up_pollfree - signal that a polled waitqueue is going away
+ * @wq_head: the wait queue head
+ *
+ * In the very rare cases where a ->poll() implementation uses a waitqueue whose
+ * lifetime is tied to a task rather than to the 'struct file' being polled,
+ * this function must be called before the waitqueue is freed so that
+ * non-blocking polls (e.g. epoll) are notified that the queue is going away.
+ *
+ * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via
+ * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU.
+ */
+static inline void wake_up_pollfree(struct wait_queue_head *wq_head)
+{
+	/*
+	 * For performance reasons, we don't always take the queue lock here.
+	 * Therefore, we might race with someone removing the last entry from
+	 * the queue, and proceed while they still hold the queue lock.
+	 * However, rcu_read_lock() is required to be held in such cases, so we
+	 * can safely proceed with an RCU-delayed free.
+	 */
+	if (waitqueue_active(wq_head))
+		__wake_up_pollfree(wq_head);
+}
+
 #define ___wait_cond_timeout(condition)						\
 ({										\
 	bool __cond = (condition);						\
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 76577d1642a5..eca38107b32f 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -238,6 +238,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync);	/* For internal use only */
 
+void __wake_up_pollfree(struct wait_queue_head *wq_head)
+{
+	__wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
+	/* POLLFREE must have cleared the queue. */
+	WARN_ON_ONCE(waitqueue_active(wq_head));
+}
+
 /*
  * Note: we use "set_current_state()" _after_ the wait-queue add,
  * because we need a memory barrier there on SMP, so that any
-- 
cgit v1.2.3


From a4f1192cb53758a7210ed5a9ee695aeba22f75fb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 9 Dec 2021 14:30:33 +0200
Subject: percpu_ref: Replace kernel.h with the necessary inclusions

When kernel.h is used in the headers it adds a lot into dependency hell,
especially when there are circular dependencies are involved.

Replace kernel.h inclusion with the list of what is really being used.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
---
 include/linux/percpu-refcount.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b31d3f3312ce..d73a1c08c3e3 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -51,9 +51,9 @@
 #define _LINUX_PERCPU_REFCOUNT_H
 
 #include <linux/atomic.h>
-#include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/rcupdate.h>
+#include <linux/types.h>
 #include <linux/gfp.h>
 
 struct percpu_ref;
-- 
cgit v1.2.3


From e4779015fd5d2fb8390c258268addff24d6077c7 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Fri, 10 Dec 2021 14:46:22 -0800
Subject: timers: implement usleep_idle_range()

Patch series "mm/damon: Fix fake /proc/loadavg reports", v3.

This patchset fixes DAMON's fake load report issue.  The first patch
makes yet another variant of usleep_range() for this fix, and the second
patch fixes the issue of DAMON by making it using the newly introduced
function.

This patch (of 2):

Some kernel threads such as DAMON could need to repeatedly sleep in
micro seconds level.  Because usleep_range() sleeps in uninterruptible
state, however, such threads would make /proc/loadavg reports fake load.

To help such cases, this commit implements a variant of usleep_range()
called usleep_idle_range().  It is same to usleep_range() but sets the
state of the current task as TASK_IDLE while sleeping.

Link: https://lkml.kernel.org/r/20211126145015.15862-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20211126145015.15862-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Cc: John Stultz <john.stultz@linaro.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delay.h | 14 +++++++++++++-
 kernel/time/timer.c   | 16 +++++++++-------
 2 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 8eacf67eb212..039e7e0c7378 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -20,6 +20,7 @@
  */
 
 #include <linux/math.h>
+#include <linux/sched.h>
 
 extern unsigned long loops_per_jiffy;
 
@@ -58,7 +59,18 @@ void calibrate_delay(void);
 void __attribute__((weak)) calibration_delay_done(void);
 void msleep(unsigned int msecs);
 unsigned long msleep_interruptible(unsigned int msecs);
-void usleep_range(unsigned long min, unsigned long max);
+void usleep_range_state(unsigned long min, unsigned long max,
+			unsigned int state);
+
+static inline void usleep_range(unsigned long min, unsigned long max)
+{
+	usleep_range_state(min, max, TASK_UNINTERRUPTIBLE);
+}
+
+static inline void usleep_idle_range(unsigned long min, unsigned long max)
+{
+	usleep_range_state(min, max, TASK_IDLE);
+}
 
 static inline void ssleep(unsigned int seconds)
 {
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index e3d2c23c413d..85f1021ad459 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -2054,26 +2054,28 @@ unsigned long msleep_interruptible(unsigned int msecs)
 EXPORT_SYMBOL(msleep_interruptible);
 
 /**
- * usleep_range - Sleep for an approximate time
- * @min: Minimum time in usecs to sleep
- * @max: Maximum time in usecs to sleep
+ * usleep_range_state - Sleep for an approximate time in a given state
+ * @min:	Minimum time in usecs to sleep
+ * @max:	Maximum time in usecs to sleep
+ * @state:	State of the current task that will be while sleeping
  *
  * In non-atomic context where the exact wakeup time is flexible, use
- * usleep_range() instead of udelay().  The sleep improves responsiveness
+ * usleep_range_state() instead of udelay().  The sleep improves responsiveness
  * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces
  * power usage by allowing hrtimers to take advantage of an already-
  * scheduled interrupt instead of scheduling a new one just for this sleep.
  */
-void __sched usleep_range(unsigned long min, unsigned long max)
+void __sched usleep_range_state(unsigned long min, unsigned long max,
+				unsigned int state)
 {
 	ktime_t exp = ktime_add_us(ktime_get(), min);
 	u64 delta = (u64)(max - min) * NSEC_PER_USEC;
 
 	for (;;) {
-		__set_current_state(TASK_UNINTERRUPTIBLE);
+		__set_current_state(state);
 		/* Do not return before the requested sleep time has elapsed */
 		if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS))
 			break;
 	}
 }
-EXPORT_SYMBOL(usleep_range);
+EXPORT_SYMBOL(usleep_range_state);
-- 
cgit v1.2.3